xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision d54c7f15a909557249a2bbb411dff69b7035512b)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26 
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 /*
37  * Shared array for quick translation between port_id and vport mask/values
38  * used for HWS rules.
39  */
40 struct flow_hw_port_info mlx5_flow_hw_port_infos[RTE_MAX_ETHPORTS];
41 
42 /*
43  * A global structure to save the available REG_C_x for tags usage.
44  * The Meter color REG (ASO) and the last available one will be reserved
45  * for PMD internal usage.
46  * Since there is no "port" concept in the driver, it is assumed that the
47  * available tags set will be the minimum intersection.
48  * 3 - in FDB mode / 5 - in legacy mode
49  */
50 uint32_t mlx5_flow_hw_avl_tags_init_cnt;
51 enum modify_reg mlx5_flow_hw_avl_tags[MLX5_FLOW_HW_TAGS_MAX] = {REG_NON};
52 enum modify_reg mlx5_flow_hw_aso_tag;
53 
54 struct tunnel_default_miss_ctx {
55 	uint16_t *queue;
56 	__extension__
57 	union {
58 		struct rte_flow_action_rss action_rss;
59 		struct rte_flow_action_queue miss_queue;
60 		struct rte_flow_action_jump miss_jump;
61 		uint8_t raw[0];
62 	};
63 };
64 
65 static int
66 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
67 			     struct rte_flow *flow,
68 			     const struct rte_flow_attr *attr,
69 			     const struct rte_flow_action *app_actions,
70 			     uint32_t flow_idx,
71 			     const struct mlx5_flow_tunnel *tunnel,
72 			     struct tunnel_default_miss_ctx *ctx,
73 			     struct rte_flow_error *error);
74 static struct mlx5_flow_tunnel *
75 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
76 static void
77 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
78 static uint32_t
79 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
80 				const struct mlx5_flow_tunnel *tunnel,
81 				uint32_t group, uint32_t *table,
82 				struct rte_flow_error *error);
83 
84 /** Device flow drivers. */
85 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
86 
87 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
88 
89 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
90 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
91 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
92 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
93 #endif
94 #ifdef HAVE_MLX5_HWS_SUPPORT
95 	[MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
96 #endif
97 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
98 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
99 };
100 
101 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
102 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
103 	(const int []){ \
104 		__VA_ARGS__, 0, \
105 	}
106 
107 /** Node object of input graph for mlx5_flow_expand_rss(). */
108 struct mlx5_flow_expand_node {
109 	const int *const next;
110 	/**<
111 	 * List of next node indexes. Index 0 is interpreted as a terminator.
112 	 */
113 	const enum rte_flow_item_type type;
114 	/**< Pattern item type of current node. */
115 	uint64_t rss_types;
116 	/**<
117 	 * RSS types bit-field associated with this node
118 	 * (see RTE_ETH_RSS_* definitions).
119 	 */
120 	uint64_t node_flags;
121 	/**<
122 	 *  Bit-fields that define how the node is used in the expansion.
123 	 * (see MLX5_EXPANSION_NODE_* definitions).
124 	 */
125 };
126 
127 /** Keep same format with mlx5_flow_expand_rss to share the buffer for expansion. */
128 struct mlx5_flow_expand_sqn {
129 	uint32_t entries; /** Number of entries */
130 	struct {
131 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
132 		uint32_t priority; /**< Priority offset for each expansion. */
133 	} entry[];
134 };
135 
136 /* Optional expand field. The expansion alg will not go deeper. */
137 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
138 
139 /* The node is not added implicitly as expansion to the flow pattern.
140  * If the node type does not match the flow pattern item type, the
141  * expansion alg will go deeper to its next items.
142  * In the current implementation, the list of next nodes indexes can
143  * have up to one node with this flag set and it has to be the last
144  * node index (before the list terminator).
145  */
146 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
147 
148 /** Object returned by mlx5_flow_expand_rss(). */
149 struct mlx5_flow_expand_rss {
150 	uint32_t entries;
151 	/**< Number of entries @p patterns and @p priorities. */
152 	struct {
153 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
154 		uint32_t priority; /**< Priority offset for each expansion. */
155 	} entry[];
156 };
157 
158 static void
159 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
160 
161 static const struct mlx5_flow_expand_node *
162 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
163 		unsigned int item_idx,
164 		const struct mlx5_flow_expand_node graph[],
165 		const struct mlx5_flow_expand_node *node);
166 
167 static __rte_always_inline int
168 mlx5_need_cache_flow(const struct mlx5_priv *priv,
169 		     const struct rte_flow_attr *attr)
170 {
171 	return priv->isolated && priv->sh->config.dv_flow_en == 1 &&
172 		(attr ? !attr->group : true) &&
173 		priv->mode_info.mode == MLX5_FLOW_ENGINE_MODE_STANDBY &&
174 		(!priv->sh->config.dv_esw_en || !priv->sh->config.fdb_def_rule);
175 }
176 
177 static bool
178 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
179 {
180 	switch (item->type) {
181 	case RTE_FLOW_ITEM_TYPE_ETH:
182 	case RTE_FLOW_ITEM_TYPE_VLAN:
183 	case RTE_FLOW_ITEM_TYPE_IPV4:
184 	case RTE_FLOW_ITEM_TYPE_IPV6:
185 	case RTE_FLOW_ITEM_TYPE_UDP:
186 	case RTE_FLOW_ITEM_TYPE_TCP:
187 	case RTE_FLOW_ITEM_TYPE_ESP:
188 	case RTE_FLOW_ITEM_TYPE_ICMP:
189 	case RTE_FLOW_ITEM_TYPE_ICMP6:
190 	case RTE_FLOW_ITEM_TYPE_VXLAN:
191 	case RTE_FLOW_ITEM_TYPE_NVGRE:
192 	case RTE_FLOW_ITEM_TYPE_GRE:
193 	case RTE_FLOW_ITEM_TYPE_GENEVE:
194 	case RTE_FLOW_ITEM_TYPE_MPLS:
195 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
196 	case RTE_FLOW_ITEM_TYPE_GRE_KEY:
197 	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
198 	case RTE_FLOW_ITEM_TYPE_GTP:
199 		return true;
200 	default:
201 		break;
202 	}
203 	return false;
204 }
205 
206 /**
207  * Network Service Header (NSH) and its next protocol values
208  * are described in RFC-8393.
209  */
210 static enum rte_flow_item_type
211 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
212 {
213 	enum rte_flow_item_type type;
214 
215 	switch (proto_mask & proto_spec) {
216 	case 0:
217 		type = RTE_FLOW_ITEM_TYPE_VOID;
218 		break;
219 	case RTE_VXLAN_GPE_TYPE_IPV4:
220 		type = RTE_FLOW_ITEM_TYPE_IPV4;
221 		break;
222 	case RTE_VXLAN_GPE_TYPE_IPV6:
223 		type = RTE_VXLAN_GPE_TYPE_IPV6;
224 		break;
225 	case RTE_VXLAN_GPE_TYPE_ETH:
226 		type = RTE_FLOW_ITEM_TYPE_ETH;
227 		break;
228 	default:
229 		type = RTE_FLOW_ITEM_TYPE_END;
230 	}
231 	return type;
232 }
233 
234 static enum rte_flow_item_type
235 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
236 {
237 	enum rte_flow_item_type type;
238 
239 	switch (proto_mask & proto_spec) {
240 	case 0:
241 		type = RTE_FLOW_ITEM_TYPE_VOID;
242 		break;
243 	case IPPROTO_UDP:
244 		type = RTE_FLOW_ITEM_TYPE_UDP;
245 		break;
246 	case IPPROTO_TCP:
247 		type = RTE_FLOW_ITEM_TYPE_TCP;
248 		break;
249 	case IPPROTO_IPIP:
250 		type = RTE_FLOW_ITEM_TYPE_IPV4;
251 		break;
252 	case IPPROTO_IPV6:
253 		type = RTE_FLOW_ITEM_TYPE_IPV6;
254 		break;
255 	case IPPROTO_ESP:
256 		type = RTE_FLOW_ITEM_TYPE_ESP;
257 		break;
258 	default:
259 		type = RTE_FLOW_ITEM_TYPE_END;
260 	}
261 	return type;
262 }
263 
264 static enum rte_flow_item_type
265 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
266 			    rte_be16_t type_mask, bool is_tunnel)
267 {
268 	enum rte_flow_item_type type;
269 
270 	switch (rte_be_to_cpu_16(type_spec & type_mask)) {
271 	case 0:
272 		type = RTE_FLOW_ITEM_TYPE_VOID;
273 		break;
274 	case RTE_ETHER_TYPE_TEB:
275 		type = is_tunnel ?
276 		       RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
277 		break;
278 	case RTE_ETHER_TYPE_VLAN:
279 		type = !is_tunnel ?
280 		       RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
281 		break;
282 	case RTE_ETHER_TYPE_IPV4:
283 		type = RTE_FLOW_ITEM_TYPE_IPV4;
284 		break;
285 	case RTE_ETHER_TYPE_IPV6:
286 		type = RTE_FLOW_ITEM_TYPE_IPV6;
287 		break;
288 	default:
289 		type = RTE_FLOW_ITEM_TYPE_END;
290 	}
291 	return type;
292 }
293 
294 static enum rte_flow_item_type
295 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
296 {
297 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
298 	do {                                                             \
299 		const void *m = item->mask;                              \
300 		const void *s = item->spec;                              \
301 		mask = m ?                                               \
302 			((const struct rte_flow_item_##type *)m)->fld :  \
303 			rte_flow_item_##type##_mask.fld;                 \
304 		spec = ((const struct rte_flow_item_##type *)s)->fld;    \
305 	} while (0)
306 
307 	enum rte_flow_item_type ret;
308 	uint16_t spec, mask;
309 
310 	if (item == NULL || item->spec == NULL)
311 		return RTE_FLOW_ITEM_TYPE_VOID;
312 	switch (item->type) {
313 	case RTE_FLOW_ITEM_TYPE_ETH:
314 		MLX5_XSET_ITEM_MASK_SPEC(eth, hdr.ether_type);
315 		if (!mask)
316 			return RTE_FLOW_ITEM_TYPE_VOID;
317 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
318 		break;
319 	case RTE_FLOW_ITEM_TYPE_VLAN:
320 		MLX5_XSET_ITEM_MASK_SPEC(vlan, hdr.eth_proto);
321 		if (!mask)
322 			return RTE_FLOW_ITEM_TYPE_VOID;
323 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
324 		break;
325 	case RTE_FLOW_ITEM_TYPE_IPV4:
326 		MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
327 		if (!mask)
328 			return RTE_FLOW_ITEM_TYPE_VOID;
329 		ret = mlx5_inet_proto_to_item_type(spec, mask);
330 		break;
331 	case RTE_FLOW_ITEM_TYPE_IPV6:
332 		MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
333 		if (!mask)
334 			return RTE_FLOW_ITEM_TYPE_VOID;
335 		ret = mlx5_inet_proto_to_item_type(spec, mask);
336 		break;
337 	case RTE_FLOW_ITEM_TYPE_GENEVE:
338 		MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
339 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
340 		break;
341 	case RTE_FLOW_ITEM_TYPE_GRE:
342 		MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
343 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
344 		break;
345 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
346 		MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, hdr.proto);
347 		ret = mlx5_nsh_proto_to_item_type(spec, mask);
348 		break;
349 	default:
350 		ret = RTE_FLOW_ITEM_TYPE_VOID;
351 		break;
352 	}
353 	return ret;
354 #undef MLX5_XSET_ITEM_MASK_SPEC
355 }
356 
357 static const int *
358 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
359 		const int *next_node)
360 {
361 	const struct mlx5_flow_expand_node *node = NULL;
362 	const int *next = next_node;
363 
364 	while (next && *next) {
365 		/*
366 		 * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
367 		 * flag set, because they were not found in the flow pattern.
368 		 */
369 		node = &graph[*next];
370 		if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
371 			break;
372 		next = node->next;
373 	}
374 	return next;
375 }
376 
377 #define MLX5_RSS_EXP_ELT_N 16
378 
379 /**
380  * Expand RSS flows into several possible flows according to the RSS hash
381  * fields requested and the driver capabilities.
382  *
383  * @param[out] buf
384  *   Buffer to store the result expansion.
385  * @param[in] size
386  *   Buffer size in bytes. If 0, @p buf can be NULL.
387  * @param[in] pattern
388  *   User flow pattern.
389  * @param[in] types
390  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
391  * @param[in] graph
392  *   Input graph to expand @p pattern according to @p types.
393  * @param[in] graph_root_index
394  *   Index of root node in @p graph, typically 0.
395  *
396  * @return
397  *   A positive value representing the size of @p buf in bytes regardless of
398  *   @p size on success, a negative errno value otherwise and rte_errno is
399  *   set, the following errors are defined:
400  *
401  *   -E2BIG: graph-depth @p graph is too deep.
402  *   -EINVAL: @p size has not enough space for expanded pattern.
403  */
404 static int
405 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
406 		     const struct rte_flow_item *pattern, uint64_t types,
407 		     const struct mlx5_flow_expand_node graph[],
408 		     int graph_root_index)
409 {
410 	const struct rte_flow_item *item;
411 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
412 	const int *next_node;
413 	const int *stack[MLX5_RSS_EXP_ELT_N];
414 	int stack_pos = 0;
415 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
416 	unsigned int i, item_idx, last_expand_item_idx = 0;
417 	size_t lsize;
418 	size_t user_pattern_size = 0;
419 	void *addr = NULL;
420 	const struct mlx5_flow_expand_node *next = NULL;
421 	struct rte_flow_item missed_item;
422 	int missed = 0;
423 	int elt = 0;
424 	const struct rte_flow_item *last_expand_item = NULL;
425 
426 	memset(&missed_item, 0, sizeof(missed_item));
427 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
428 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
429 	if (lsize > size)
430 		return -EINVAL;
431 	buf->entry[0].priority = 0;
432 	buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
433 	buf->entries = 0;
434 	addr = buf->entry[0].pattern;
435 	for (item = pattern, item_idx = 0;
436 			item->type != RTE_FLOW_ITEM_TYPE_END;
437 			item++, item_idx++) {
438 		if (!mlx5_flow_is_rss_expandable_item(item)) {
439 			user_pattern_size += sizeof(*item);
440 			continue;
441 		}
442 		last_expand_item = item;
443 		last_expand_item_idx = item_idx;
444 		i = 0;
445 		while (node->next && node->next[i]) {
446 			next = &graph[node->next[i]];
447 			if (next->type == item->type)
448 				break;
449 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
450 				node = next;
451 				i = 0;
452 			} else {
453 				++i;
454 			}
455 		}
456 		if (next)
457 			node = next;
458 		user_pattern_size += sizeof(*item);
459 	}
460 	user_pattern_size += sizeof(*item); /* Handle END item. */
461 	lsize += user_pattern_size;
462 	if (lsize > size)
463 		return -EINVAL;
464 	/* Copy the user pattern in the first entry of the buffer. */
465 	rte_memcpy(addr, pattern, user_pattern_size);
466 	addr = (void *)(((uintptr_t)addr) + user_pattern_size);
467 	buf->entries = 1;
468 	/* Start expanding. */
469 	memset(flow_items, 0, sizeof(flow_items));
470 	user_pattern_size -= sizeof(*item);
471 	/*
472 	 * Check if the last valid item has spec set, need complete pattern,
473 	 * and the pattern can be used for expansion.
474 	 */
475 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
476 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
477 		/* Item type END indicates expansion is not required. */
478 		return lsize;
479 	}
480 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
481 		next = NULL;
482 		missed = 1;
483 		i = 0;
484 		while (node->next && node->next[i]) {
485 			next = &graph[node->next[i]];
486 			if (next->type == missed_item.type) {
487 				flow_items[0].type = missed_item.type;
488 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
489 				break;
490 			}
491 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
492 				node = next;
493 				i = 0;
494 			} else {
495 				++i;
496 			}
497 			next = NULL;
498 		}
499 	}
500 	if (next && missed) {
501 		elt = 2; /* missed item + item end. */
502 		node = next;
503 		lsize += elt * sizeof(*item) + user_pattern_size;
504 		if (lsize > size)
505 			return -EINVAL;
506 		if (node->rss_types & types) {
507 			buf->entry[buf->entries].priority = 1;
508 			buf->entry[buf->entries].pattern = addr;
509 			buf->entries++;
510 			rte_memcpy(addr, buf->entry[0].pattern,
511 				   user_pattern_size);
512 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
513 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
514 			addr = (void *)(((uintptr_t)addr) +
515 					elt * sizeof(*item));
516 		}
517 	} else if (last_expand_item != NULL) {
518 		node = mlx5_flow_expand_rss_adjust_node(pattern,
519 				last_expand_item_idx, graph, node);
520 	}
521 	memset(flow_items, 0, sizeof(flow_items));
522 	next_node = mlx5_flow_expand_rss_skip_explicit(graph,
523 			node->next);
524 	stack[stack_pos] = next_node;
525 	node = next_node ? &graph[*next_node] : NULL;
526 	while (node) {
527 		flow_items[stack_pos].type = node->type;
528 		if (node->rss_types & types) {
529 			size_t n;
530 			/*
531 			 * compute the number of items to copy from the
532 			 * expansion and copy it.
533 			 * When the stack_pos is 0, there are 1 element in it,
534 			 * plus the addition END item.
535 			 */
536 			elt = stack_pos + 2;
537 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
538 			lsize += elt * sizeof(*item) + user_pattern_size;
539 			if (lsize > size)
540 				return -EINVAL;
541 			n = elt * sizeof(*item);
542 			buf->entry[buf->entries].priority =
543 				stack_pos + 1 + missed;
544 			buf->entry[buf->entries].pattern = addr;
545 			buf->entries++;
546 			rte_memcpy(addr, buf->entry[0].pattern,
547 				   user_pattern_size);
548 			addr = (void *)(((uintptr_t)addr) +
549 					user_pattern_size);
550 			rte_memcpy(addr, &missed_item,
551 				   missed * sizeof(*item));
552 			addr = (void *)(((uintptr_t)addr) +
553 				missed * sizeof(*item));
554 			rte_memcpy(addr, flow_items, n);
555 			addr = (void *)(((uintptr_t)addr) + n);
556 		}
557 		/* Go deeper. */
558 		if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
559 				node->next) {
560 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
561 					node->next);
562 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
563 				rte_errno = E2BIG;
564 				return -rte_errno;
565 			}
566 			stack[stack_pos] = next_node;
567 		} else if (*(next_node + 1)) {
568 			/* Follow up with the next possibility. */
569 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
570 					++next_node);
571 		} else if (!stack_pos) {
572 			/*
573 			 * Completing the traverse over the different paths.
574 			 * The next_node is advanced to the terminator.
575 			 */
576 			++next_node;
577 		} else {
578 			/* Move to the next path. */
579 			while (stack_pos) {
580 				next_node = stack[--stack_pos];
581 				next_node++;
582 				if (*next_node)
583 					break;
584 			}
585 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
586 					next_node);
587 			stack[stack_pos] = next_node;
588 		}
589 		node = next_node && *next_node ? &graph[*next_node] : NULL;
590 	};
591 	return lsize;
592 }
593 
594 /**
595  * Expand SQN flows into several possible flows according to the Tx queue
596  * number
597  *
598  * @param[in] buf
599  *   Buffer to store the result expansion.
600  * @param[in] size
601  *   Buffer size in bytes. If 0, @p buf can be NULL.
602  * @param[in] pattern
603  *   User flow pattern.
604  * @param[in] sq_specs
605  *   Buffer to store sq spec.
606  *
607  * @return
608  *   0 for success and negative value for failure
609  *
610  */
611 static int
612 mlx5_flow_expand_sqn(struct mlx5_flow_expand_sqn *buf, size_t size,
613 		     const struct rte_flow_item *pattern,
614 		     struct mlx5_rte_flow_item_sq *sq_specs)
615 {
616 	const struct rte_flow_item *item;
617 	bool port_representor = false;
618 	size_t user_pattern_size = 0;
619 	struct rte_eth_dev *dev;
620 	struct mlx5_priv *priv;
621 	void *addr = NULL;
622 	uint16_t port_id;
623 	size_t lsize;
624 	int elt = 2;
625 	uint16_t i;
626 
627 	buf->entries = 0;
628 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
629 		if (item->type == RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR) {
630 			const struct rte_flow_item_ethdev *pid_v = item->spec;
631 
632 			if (!pid_v)
633 				return 0;
634 			port_id = pid_v->port_id;
635 			port_representor = true;
636 		}
637 		user_pattern_size += sizeof(*item);
638 	}
639 	if (!port_representor)
640 		return 0;
641 	dev = &rte_eth_devices[port_id];
642 	priv = dev->data->dev_private;
643 	buf->entry[0].pattern = (void *)&buf->entry[priv->txqs_n];
644 	lsize = offsetof(struct mlx5_flow_expand_sqn, entry) +
645 		sizeof(buf->entry[0]) * priv->txqs_n;
646 	if (lsize + (user_pattern_size + sizeof(struct rte_flow_item) * elt) * priv->txqs_n > size)
647 		return -EINVAL;
648 	addr = buf->entry[0].pattern;
649 	for (i = 0; i != priv->txqs_n; ++i) {
650 		struct rte_flow_item pattern_add[] = {
651 			{
652 				.type = (enum rte_flow_item_type)
653 					MLX5_RTE_FLOW_ITEM_TYPE_SQ,
654 				.spec = &sq_specs[i],
655 			},
656 			{
657 				.type = RTE_FLOW_ITEM_TYPE_END,
658 			},
659 		};
660 		struct mlx5_txq_ctrl *txq = mlx5_txq_get(dev, i);
661 
662 		if (txq == NULL)
663 			return -EINVAL;
664 		buf->entry[i].pattern = addr;
665 		sq_specs[i].queue = mlx5_txq_get_sqn(txq);
666 		mlx5_txq_release(dev, i);
667 		rte_memcpy(addr, pattern, user_pattern_size);
668 		addr = (void *)(((uintptr_t)addr) + user_pattern_size);
669 		rte_memcpy(addr, pattern_add, sizeof(struct rte_flow_item) * elt);
670 		addr = (void *)(((uintptr_t)addr) + sizeof(struct rte_flow_item) * elt);
671 		buf->entries++;
672 	}
673 	return 0;
674 }
675 
676 enum mlx5_expansion {
677 	MLX5_EXPANSION_ROOT,
678 	MLX5_EXPANSION_ROOT_OUTER,
679 	MLX5_EXPANSION_OUTER_ETH,
680 	MLX5_EXPANSION_OUTER_VLAN,
681 	MLX5_EXPANSION_OUTER_IPV4,
682 	MLX5_EXPANSION_OUTER_IPV4_UDP,
683 	MLX5_EXPANSION_OUTER_IPV4_TCP,
684 	MLX5_EXPANSION_OUTER_IPV4_ESP,
685 	MLX5_EXPANSION_OUTER_IPV4_ICMP,
686 	MLX5_EXPANSION_OUTER_IPV6,
687 	MLX5_EXPANSION_OUTER_IPV6_UDP,
688 	MLX5_EXPANSION_OUTER_IPV6_TCP,
689 	MLX5_EXPANSION_OUTER_IPV6_ESP,
690 	MLX5_EXPANSION_OUTER_IPV6_ICMP6,
691 	MLX5_EXPANSION_VXLAN,
692 	MLX5_EXPANSION_STD_VXLAN,
693 	MLX5_EXPANSION_L3_VXLAN,
694 	MLX5_EXPANSION_VXLAN_GPE,
695 	MLX5_EXPANSION_GRE,
696 	MLX5_EXPANSION_NVGRE,
697 	MLX5_EXPANSION_GRE_KEY,
698 	MLX5_EXPANSION_MPLS,
699 	MLX5_EXPANSION_ETH,
700 	MLX5_EXPANSION_VLAN,
701 	MLX5_EXPANSION_IPV4,
702 	MLX5_EXPANSION_IPV4_UDP,
703 	MLX5_EXPANSION_IPV4_TCP,
704 	MLX5_EXPANSION_IPV4_ESP,
705 	MLX5_EXPANSION_IPV4_ICMP,
706 	MLX5_EXPANSION_IPV6,
707 	MLX5_EXPANSION_IPV6_UDP,
708 	MLX5_EXPANSION_IPV6_TCP,
709 	MLX5_EXPANSION_IPV6_ESP,
710 	MLX5_EXPANSION_IPV6_ICMP6,
711 	MLX5_EXPANSION_IPV6_FRAG_EXT,
712 	MLX5_EXPANSION_GTP,
713 	MLX5_EXPANSION_GENEVE,
714 };
715 
716 /** Supported expansion of items. */
717 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
718 	[MLX5_EXPANSION_ROOT] = {
719 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
720 						  MLX5_EXPANSION_IPV4,
721 						  MLX5_EXPANSION_IPV6),
722 		.type = RTE_FLOW_ITEM_TYPE_END,
723 	},
724 	[MLX5_EXPANSION_ROOT_OUTER] = {
725 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
726 						  MLX5_EXPANSION_OUTER_IPV4,
727 						  MLX5_EXPANSION_OUTER_IPV6),
728 		.type = RTE_FLOW_ITEM_TYPE_END,
729 	},
730 	[MLX5_EXPANSION_OUTER_ETH] = {
731 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
732 		.type = RTE_FLOW_ITEM_TYPE_ETH,
733 		.rss_types = 0,
734 	},
735 	[MLX5_EXPANSION_OUTER_VLAN] = {
736 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
737 						  MLX5_EXPANSION_OUTER_IPV6),
738 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
739 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
740 	},
741 	[MLX5_EXPANSION_OUTER_IPV4] = {
742 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
743 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
744 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
745 			 MLX5_EXPANSION_OUTER_IPV4_ESP,
746 			 MLX5_EXPANSION_OUTER_IPV4_ICMP,
747 			 MLX5_EXPANSION_GRE,
748 			 MLX5_EXPANSION_NVGRE,
749 			 MLX5_EXPANSION_IPV4,
750 			 MLX5_EXPANSION_IPV6),
751 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
752 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
753 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
754 	},
755 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
756 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
757 						  MLX5_EXPANSION_VXLAN_GPE,
758 						  MLX5_EXPANSION_MPLS,
759 						  MLX5_EXPANSION_GENEVE,
760 						  MLX5_EXPANSION_GTP),
761 		.type = RTE_FLOW_ITEM_TYPE_UDP,
762 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
763 	},
764 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
765 		.type = RTE_FLOW_ITEM_TYPE_TCP,
766 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
767 	},
768 	[MLX5_EXPANSION_OUTER_IPV4_ESP] = {
769 		.type = RTE_FLOW_ITEM_TYPE_ESP,
770 		.rss_types = RTE_ETH_RSS_ESP,
771 	},
772 	[MLX5_EXPANSION_OUTER_IPV4_ICMP] = {
773 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
774 	},
775 	[MLX5_EXPANSION_OUTER_IPV6] = {
776 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
777 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
778 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
779 			 MLX5_EXPANSION_OUTER_IPV6_ESP,
780 			 MLX5_EXPANSION_OUTER_IPV6_ICMP6,
781 			 MLX5_EXPANSION_IPV4,
782 			 MLX5_EXPANSION_IPV6,
783 			 MLX5_EXPANSION_GRE,
784 			 MLX5_EXPANSION_NVGRE),
785 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
786 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
787 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
788 	},
789 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
790 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
791 						  MLX5_EXPANSION_VXLAN_GPE,
792 						  MLX5_EXPANSION_MPLS,
793 						  MLX5_EXPANSION_GENEVE,
794 						  MLX5_EXPANSION_GTP),
795 		.type = RTE_FLOW_ITEM_TYPE_UDP,
796 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
797 	},
798 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
799 		.type = RTE_FLOW_ITEM_TYPE_TCP,
800 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
801 	},
802 	[MLX5_EXPANSION_OUTER_IPV6_ESP] = {
803 		.type = RTE_FLOW_ITEM_TYPE_ESP,
804 		.rss_types = RTE_ETH_RSS_ESP,
805 	},
806 	[MLX5_EXPANSION_OUTER_IPV6_ICMP6] = {
807 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
808 	},
809 	[MLX5_EXPANSION_VXLAN] = {
810 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
811 						  MLX5_EXPANSION_IPV4,
812 						  MLX5_EXPANSION_IPV6),
813 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
814 	},
815 	[MLX5_EXPANSION_STD_VXLAN] = {
816 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
817 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
818 	},
819 	[MLX5_EXPANSION_L3_VXLAN] = {
820 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
821 					MLX5_EXPANSION_IPV6),
822 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
823 	},
824 	[MLX5_EXPANSION_VXLAN_GPE] = {
825 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
826 						  MLX5_EXPANSION_IPV4,
827 						  MLX5_EXPANSION_IPV6),
828 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
829 	},
830 	[MLX5_EXPANSION_GRE] = {
831 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
832 						  MLX5_EXPANSION_IPV4,
833 						  MLX5_EXPANSION_IPV6,
834 						  MLX5_EXPANSION_GRE_KEY,
835 						  MLX5_EXPANSION_MPLS),
836 		.type = RTE_FLOW_ITEM_TYPE_GRE,
837 	},
838 	[MLX5_EXPANSION_GRE_KEY] = {
839 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
840 						  MLX5_EXPANSION_IPV6,
841 						  MLX5_EXPANSION_MPLS),
842 		.type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
843 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
844 	},
845 	[MLX5_EXPANSION_NVGRE] = {
846 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
847 		.type = RTE_FLOW_ITEM_TYPE_NVGRE,
848 	},
849 	[MLX5_EXPANSION_MPLS] = {
850 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
851 						  MLX5_EXPANSION_IPV6,
852 						  MLX5_EXPANSION_ETH),
853 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
854 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
855 	},
856 	[MLX5_EXPANSION_ETH] = {
857 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
858 		.type = RTE_FLOW_ITEM_TYPE_ETH,
859 	},
860 	[MLX5_EXPANSION_VLAN] = {
861 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
862 						  MLX5_EXPANSION_IPV6),
863 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
864 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
865 	},
866 	[MLX5_EXPANSION_IPV4] = {
867 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
868 						  MLX5_EXPANSION_IPV4_TCP,
869 						  MLX5_EXPANSION_IPV4_ESP,
870 						  MLX5_EXPANSION_IPV4_ICMP),
871 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
872 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
873 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
874 	},
875 	[MLX5_EXPANSION_IPV4_UDP] = {
876 		.type = RTE_FLOW_ITEM_TYPE_UDP,
877 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
878 	},
879 	[MLX5_EXPANSION_IPV4_TCP] = {
880 		.type = RTE_FLOW_ITEM_TYPE_TCP,
881 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
882 	},
883 	[MLX5_EXPANSION_IPV4_ESP] = {
884 		.type = RTE_FLOW_ITEM_TYPE_ESP,
885 		.rss_types = RTE_ETH_RSS_ESP,
886 	},
887 	[MLX5_EXPANSION_IPV4_ICMP] = {
888 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
889 	},
890 	[MLX5_EXPANSION_IPV6] = {
891 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
892 						  MLX5_EXPANSION_IPV6_TCP,
893 						  MLX5_EXPANSION_IPV6_ESP,
894 						  MLX5_EXPANSION_IPV6_ICMP6,
895 						  MLX5_EXPANSION_IPV6_FRAG_EXT),
896 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
897 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
898 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
899 	},
900 	[MLX5_EXPANSION_IPV6_UDP] = {
901 		.type = RTE_FLOW_ITEM_TYPE_UDP,
902 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
903 	},
904 	[MLX5_EXPANSION_IPV6_TCP] = {
905 		.type = RTE_FLOW_ITEM_TYPE_TCP,
906 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
907 	},
908 	[MLX5_EXPANSION_IPV6_ESP] = {
909 		.type = RTE_FLOW_ITEM_TYPE_ESP,
910 		.rss_types = RTE_ETH_RSS_ESP,
911 	},
912 	[MLX5_EXPANSION_IPV6_FRAG_EXT] = {
913 		.type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
914 	},
915 	[MLX5_EXPANSION_IPV6_ICMP6] = {
916 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
917 	},
918 	[MLX5_EXPANSION_GTP] = {
919 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
920 						  MLX5_EXPANSION_IPV6),
921 		.type = RTE_FLOW_ITEM_TYPE_GTP,
922 	},
923 	[MLX5_EXPANSION_GENEVE] = {
924 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
925 						  MLX5_EXPANSION_IPV4,
926 						  MLX5_EXPANSION_IPV6),
927 		.type = RTE_FLOW_ITEM_TYPE_GENEVE,
928 	},
929 };
930 
931 static struct rte_flow_action_handle *
932 mlx5_action_handle_create(struct rte_eth_dev *dev,
933 			  const struct rte_flow_indir_action_conf *conf,
934 			  const struct rte_flow_action *action,
935 			  struct rte_flow_error *error);
936 static int mlx5_action_handle_destroy
937 				(struct rte_eth_dev *dev,
938 				 struct rte_flow_action_handle *handle,
939 				 struct rte_flow_error *error);
940 static int mlx5_action_handle_update
941 				(struct rte_eth_dev *dev,
942 				 struct rte_flow_action_handle *handle,
943 				 const void *update,
944 				 struct rte_flow_error *error);
945 static int mlx5_action_handle_query
946 				(struct rte_eth_dev *dev,
947 				 const struct rte_flow_action_handle *handle,
948 				 void *data,
949 				 struct rte_flow_error *error);
950 static int
951 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
952 		    struct rte_flow_tunnel *app_tunnel,
953 		    struct rte_flow_action **actions,
954 		    uint32_t *num_of_actions,
955 		    struct rte_flow_error *error);
956 static int
957 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
958 		       struct rte_flow_tunnel *app_tunnel,
959 		       struct rte_flow_item **items,
960 		       uint32_t *num_of_items,
961 		       struct rte_flow_error *error);
962 static int
963 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
964 			      struct rte_flow_item *pmd_items,
965 			      uint32_t num_items, struct rte_flow_error *err);
966 static int
967 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
968 				struct rte_flow_action *pmd_actions,
969 				uint32_t num_actions,
970 				struct rte_flow_error *err);
971 static int
972 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
973 				  struct rte_mbuf *m,
974 				  struct rte_flow_restore_info *info,
975 				  struct rte_flow_error *err);
976 static struct rte_flow_item_flex_handle *
977 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
978 			   const struct rte_flow_item_flex_conf *conf,
979 			   struct rte_flow_error *error);
980 static int
981 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
982 			    const struct rte_flow_item_flex_handle *handle,
983 			    struct rte_flow_error *error);
984 static int
985 mlx5_flow_info_get(struct rte_eth_dev *dev,
986 		   struct rte_flow_port_info *port_info,
987 		   struct rte_flow_queue_info *queue_info,
988 		   struct rte_flow_error *error);
989 static int
990 mlx5_flow_port_configure(struct rte_eth_dev *dev,
991 			 const struct rte_flow_port_attr *port_attr,
992 			 uint16_t nb_queue,
993 			 const struct rte_flow_queue_attr *queue_attr[],
994 			 struct rte_flow_error *err);
995 
996 static struct rte_flow_pattern_template *
997 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
998 		const struct rte_flow_pattern_template_attr *attr,
999 		const struct rte_flow_item items[],
1000 		struct rte_flow_error *error);
1001 
1002 static int
1003 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
1004 				   struct rte_flow_pattern_template *template,
1005 				   struct rte_flow_error *error);
1006 static struct rte_flow_actions_template *
1007 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
1008 			const struct rte_flow_actions_template_attr *attr,
1009 			const struct rte_flow_action actions[],
1010 			const struct rte_flow_action masks[],
1011 			struct rte_flow_error *error);
1012 static int
1013 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
1014 				   struct rte_flow_actions_template *template,
1015 				   struct rte_flow_error *error);
1016 
1017 static struct rte_flow_template_table *
1018 mlx5_flow_table_create(struct rte_eth_dev *dev,
1019 		       const struct rte_flow_template_table_attr *attr,
1020 		       struct rte_flow_pattern_template *item_templates[],
1021 		       uint8_t nb_item_templates,
1022 		       struct rte_flow_actions_template *action_templates[],
1023 		       uint8_t nb_action_templates,
1024 		       struct rte_flow_error *error);
1025 static int
1026 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
1027 			struct rte_flow_template_table *table,
1028 			struct rte_flow_error *error);
1029 static struct rte_flow *
1030 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
1031 			    uint32_t queue,
1032 			    const struct rte_flow_op_attr *attr,
1033 			    struct rte_flow_template_table *table,
1034 			    const struct rte_flow_item items[],
1035 			    uint8_t pattern_template_index,
1036 			    const struct rte_flow_action actions[],
1037 			    uint8_t action_template_index,
1038 			    void *user_data,
1039 			    struct rte_flow_error *error);
1040 static struct rte_flow *
1041 mlx5_flow_async_flow_create_by_index(struct rte_eth_dev *dev,
1042 			    uint32_t queue,
1043 			    const struct rte_flow_op_attr *attr,
1044 			    struct rte_flow_template_table *table,
1045 			    uint32_t rule_index,
1046 			    const struct rte_flow_action actions[],
1047 			    uint8_t action_template_index,
1048 			    void *user_data,
1049 			    struct rte_flow_error *error);
1050 static int
1051 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
1052 			     uint32_t queue,
1053 			     const struct rte_flow_op_attr *attr,
1054 			     struct rte_flow *flow,
1055 			     void *user_data,
1056 			     struct rte_flow_error *error);
1057 static int
1058 mlx5_flow_pull(struct rte_eth_dev *dev,
1059 	       uint32_t queue,
1060 	       struct rte_flow_op_result res[],
1061 	       uint16_t n_res,
1062 	       struct rte_flow_error *error);
1063 static int
1064 mlx5_flow_push(struct rte_eth_dev *dev,
1065 	       uint32_t queue,
1066 	       struct rte_flow_error *error);
1067 
1068 static struct rte_flow_action_handle *
1069 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
1070 				 const struct rte_flow_op_attr *attr,
1071 				 const struct rte_flow_indir_action_conf *conf,
1072 				 const struct rte_flow_action *action,
1073 				 void *user_data,
1074 				 struct rte_flow_error *error);
1075 
1076 static int
1077 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
1078 				 const struct rte_flow_op_attr *attr,
1079 				 struct rte_flow_action_handle *handle,
1080 				 const void *update,
1081 				 void *user_data,
1082 				 struct rte_flow_error *error);
1083 
1084 static int
1085 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
1086 				  const struct rte_flow_op_attr *attr,
1087 				  struct rte_flow_action_handle *handle,
1088 				  void *user_data,
1089 				  struct rte_flow_error *error);
1090 
1091 static int
1092 mlx5_flow_async_action_handle_query(struct rte_eth_dev *dev, uint32_t queue,
1093 				 const struct rte_flow_op_attr *attr,
1094 				 const struct rte_flow_action_handle *handle,
1095 				 void *data,
1096 				 void *user_data,
1097 				 struct rte_flow_error *error);
1098 
1099 static const struct rte_flow_ops mlx5_flow_ops = {
1100 	.validate = mlx5_flow_validate,
1101 	.create = mlx5_flow_create,
1102 	.destroy = mlx5_flow_destroy,
1103 	.flush = mlx5_flow_flush,
1104 	.isolate = mlx5_flow_isolate,
1105 	.query = mlx5_flow_query,
1106 	.dev_dump = mlx5_flow_dev_dump,
1107 	.get_q_aged_flows = mlx5_flow_get_q_aged_flows,
1108 	.get_aged_flows = mlx5_flow_get_aged_flows,
1109 	.action_handle_create = mlx5_action_handle_create,
1110 	.action_handle_destroy = mlx5_action_handle_destroy,
1111 	.action_handle_update = mlx5_action_handle_update,
1112 	.action_handle_query = mlx5_action_handle_query,
1113 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
1114 	.tunnel_match = mlx5_flow_tunnel_match,
1115 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
1116 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
1117 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
1118 	.flex_item_create = mlx5_flow_flex_item_create,
1119 	.flex_item_release = mlx5_flow_flex_item_release,
1120 	.info_get = mlx5_flow_info_get,
1121 	.pick_transfer_proxy = mlx5_flow_pick_transfer_proxy,
1122 	.configure = mlx5_flow_port_configure,
1123 	.pattern_template_create = mlx5_flow_pattern_template_create,
1124 	.pattern_template_destroy = mlx5_flow_pattern_template_destroy,
1125 	.actions_template_create = mlx5_flow_actions_template_create,
1126 	.actions_template_destroy = mlx5_flow_actions_template_destroy,
1127 	.template_table_create = mlx5_flow_table_create,
1128 	.template_table_destroy = mlx5_flow_table_destroy,
1129 	.async_create = mlx5_flow_async_flow_create,
1130 	.async_create_by_index = mlx5_flow_async_flow_create_by_index,
1131 	.async_destroy = mlx5_flow_async_flow_destroy,
1132 	.pull = mlx5_flow_pull,
1133 	.push = mlx5_flow_push,
1134 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
1135 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
1136 	.async_action_handle_query = mlx5_flow_async_action_handle_query,
1137 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
1138 };
1139 
1140 /* Tunnel information. */
1141 struct mlx5_flow_tunnel_info {
1142 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
1143 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
1144 };
1145 
1146 static struct mlx5_flow_tunnel_info tunnels_info[] = {
1147 	{
1148 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
1149 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
1150 	},
1151 	{
1152 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
1153 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
1154 	},
1155 	{
1156 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
1157 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
1158 	},
1159 	{
1160 		.tunnel = MLX5_FLOW_LAYER_GRE,
1161 		.ptype = RTE_PTYPE_TUNNEL_GRE,
1162 	},
1163 	{
1164 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
1165 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
1166 	},
1167 	{
1168 		.tunnel = MLX5_FLOW_LAYER_MPLS,
1169 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
1170 	},
1171 	{
1172 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
1173 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
1174 	},
1175 	{
1176 		.tunnel = MLX5_FLOW_LAYER_IPIP,
1177 		.ptype = RTE_PTYPE_TUNNEL_IP,
1178 	},
1179 	{
1180 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
1181 		.ptype = RTE_PTYPE_TUNNEL_IP,
1182 	},
1183 	{
1184 		.tunnel = MLX5_FLOW_LAYER_GTP,
1185 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
1186 	},
1187 };
1188 
1189 
1190 
1191 /**
1192  * Translate tag ID to register.
1193  *
1194  * @param[in] dev
1195  *   Pointer to the Ethernet device structure.
1196  * @param[in] feature
1197  *   The feature that request the register.
1198  * @param[in] id
1199  *   The request register ID.
1200  * @param[out] error
1201  *   Error description in case of any.
1202  *
1203  * @return
1204  *   The request register on success, a negative errno
1205  *   value otherwise and rte_errno is set.
1206  */
1207 int
1208 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1209 		     enum mlx5_feature_name feature,
1210 		     uint32_t id,
1211 		     struct rte_flow_error *error)
1212 {
1213 	struct mlx5_priv *priv = dev->data->dev_private;
1214 	struct mlx5_sh_config *config = &priv->sh->config;
1215 	enum modify_reg start_reg;
1216 	bool skip_mtr_reg = false;
1217 
1218 	switch (feature) {
1219 	case MLX5_HAIRPIN_RX:
1220 		return REG_B;
1221 	case MLX5_HAIRPIN_TX:
1222 		return REG_A;
1223 	case MLX5_METADATA_RX:
1224 		switch (config->dv_xmeta_en) {
1225 		case MLX5_XMETA_MODE_LEGACY:
1226 			return REG_B;
1227 		case MLX5_XMETA_MODE_META16:
1228 			return REG_C_0;
1229 		case MLX5_XMETA_MODE_META32:
1230 			return REG_C_1;
1231 		case MLX5_XMETA_MODE_META32_HWS:
1232 			return REG_C_1;
1233 		}
1234 		break;
1235 	case MLX5_METADATA_TX:
1236 		if (config->dv_flow_en == 2 && config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS) {
1237 			return REG_C_1;
1238 		} else {
1239 			return REG_A;
1240 		}
1241 	case MLX5_METADATA_FDB:
1242 		switch (config->dv_xmeta_en) {
1243 		case MLX5_XMETA_MODE_LEGACY:
1244 			return REG_NON;
1245 		case MLX5_XMETA_MODE_META16:
1246 			return REG_C_0;
1247 		case MLX5_XMETA_MODE_META32:
1248 			return REG_C_1;
1249 		case MLX5_XMETA_MODE_META32_HWS:
1250 			return REG_C_1;
1251 		}
1252 		break;
1253 	case MLX5_FLOW_MARK:
1254 		switch (config->dv_xmeta_en) {
1255 		case MLX5_XMETA_MODE_LEGACY:
1256 		case MLX5_XMETA_MODE_META32_HWS:
1257 			return REG_NON;
1258 		case MLX5_XMETA_MODE_META16:
1259 			return REG_C_1;
1260 		case MLX5_XMETA_MODE_META32:
1261 			return REG_C_0;
1262 		}
1263 		break;
1264 	case MLX5_MTR_ID:
1265 		/*
1266 		 * If meter color and meter id share one register, flow match
1267 		 * should use the meter color register for match.
1268 		 */
1269 		if (priv->mtr_reg_share)
1270 			return priv->mtr_color_reg;
1271 		else
1272 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1273 			       REG_C_3;
1274 	case MLX5_MTR_COLOR:
1275 	case MLX5_ASO_FLOW_HIT:
1276 	case MLX5_ASO_CONNTRACK:
1277 	case MLX5_SAMPLE_ID:
1278 		/* All features use the same REG_C. */
1279 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1280 		return priv->mtr_color_reg;
1281 	case MLX5_COPY_MARK:
1282 		/*
1283 		 * Metadata COPY_MARK register using is in meter suffix sub
1284 		 * flow while with meter. It's safe to share the same register.
1285 		 */
1286 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1287 	case MLX5_APP_TAG:
1288 		/*
1289 		 * If meter is enable, it will engage the register for color
1290 		 * match and flow match. If meter color match is not using the
1291 		 * REG_C_2, need to skip the REG_C_x be used by meter color
1292 		 * match.
1293 		 * If meter is disable, free to use all available registers.
1294 		 */
1295 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1296 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1297 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1298 		if (id > (uint32_t)(REG_C_7 - start_reg))
1299 			return rte_flow_error_set(error, EINVAL,
1300 						  RTE_FLOW_ERROR_TYPE_ITEM,
1301 						  NULL, "invalid tag id");
1302 		if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1303 			return rte_flow_error_set(error, ENOTSUP,
1304 						  RTE_FLOW_ERROR_TYPE_ITEM,
1305 						  NULL, "unsupported tag id");
1306 		/*
1307 		 * This case means meter is using the REG_C_x great than 2.
1308 		 * Take care not to conflict with meter color REG_C_x.
1309 		 * If the available index REG_C_y >= REG_C_x, skip the
1310 		 * color register.
1311 		 */
1312 		if (skip_mtr_reg && priv->sh->flow_mreg_c
1313 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1314 			if (id >= (uint32_t)(REG_C_7 - start_reg))
1315 				return rte_flow_error_set(error, EINVAL,
1316 						       RTE_FLOW_ERROR_TYPE_ITEM,
1317 							NULL, "invalid tag id");
1318 			if (priv->sh->flow_mreg_c
1319 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
1320 				return priv->sh->flow_mreg_c
1321 					       [id + 1 + start_reg - REG_C_0];
1322 			return rte_flow_error_set(error, ENOTSUP,
1323 						  RTE_FLOW_ERROR_TYPE_ITEM,
1324 						  NULL, "unsupported tag id");
1325 		}
1326 		return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1327 	}
1328 	MLX5_ASSERT(false);
1329 	return rte_flow_error_set(error, EINVAL,
1330 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1331 				  NULL, "invalid feature name");
1332 }
1333 
1334 /**
1335  * Check extensive flow metadata register support.
1336  *
1337  * @param dev
1338  *   Pointer to rte_eth_dev structure.
1339  *
1340  * @return
1341  *   True if device supports extensive flow metadata register, otherwise false.
1342  */
1343 bool
1344 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1345 {
1346 	struct mlx5_priv *priv = dev->data->dev_private;
1347 
1348 	/*
1349 	 * Having available reg_c can be regarded inclusively as supporting
1350 	 * extensive flow metadata register, which could mean,
1351 	 * - metadata register copy action by modify header.
1352 	 * - 16 modify header actions is supported.
1353 	 * - reg_c's are preserved across different domain (FDB and NIC) on
1354 	 *   packet loopback by flow lookup miss.
1355 	 */
1356 	return priv->sh->flow_mreg_c[2] != REG_NON;
1357 }
1358 
1359 /**
1360  * Get the lowest priority.
1361  *
1362  * @param[in] dev
1363  *   Pointer to the Ethernet device structure.
1364  * @param[in] attributes
1365  *   Pointer to device flow rule attributes.
1366  *
1367  * @return
1368  *   The value of lowest priority of flow.
1369  */
1370 uint32_t
1371 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1372 			  const struct rte_flow_attr *attr)
1373 {
1374 	struct mlx5_priv *priv = dev->data->dev_private;
1375 
1376 	if (!attr->group && !(attr->transfer && priv->fdb_def_rule))
1377 		return priv->sh->flow_max_priority - 2;
1378 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1379 }
1380 
1381 /**
1382  * Calculate matcher priority of the flow.
1383  *
1384  * @param[in] dev
1385  *   Pointer to the Ethernet device structure.
1386  * @param[in] attr
1387  *   Pointer to device flow rule attributes.
1388  * @param[in] subpriority
1389  *   The priority based on the items.
1390  * @param[in] external
1391  *   Flow is user flow.
1392  * @return
1393  *   The matcher priority of the flow.
1394  */
1395 uint16_t
1396 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1397 			  const struct rte_flow_attr *attr,
1398 			  uint32_t subpriority, bool external)
1399 {
1400 	uint16_t priority = (uint16_t)attr->priority;
1401 	struct mlx5_priv *priv = dev->data->dev_private;
1402 
1403 	/* NIC root rules */
1404 	if (!attr->group && !attr->transfer) {
1405 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1406 			priority = priv->sh->flow_max_priority - 1;
1407 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1408 	/* FDB root rules */
1409 	} else if (attr->transfer && (!external || !priv->fdb_def_rule) &&
1410 		   attr->group == 0 &&
1411 		   attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1412 		return (priv->sh->flow_max_priority - 1) * 3;
1413 	}
1414 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1415 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1416 	return priority * 3 + subpriority;
1417 }
1418 
1419 /**
1420  * Verify the @p item specifications (spec, last, mask) are compatible with the
1421  * NIC capabilities.
1422  *
1423  * @param[in] item
1424  *   Item specification.
1425  * @param[in] mask
1426  *   @p item->mask or flow default bit-masks.
1427  * @param[in] nic_mask
1428  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1429  * @param[in] size
1430  *   Bit-masks size in bytes.
1431  * @param[in] range_accepted
1432  *   True if range of values is accepted for specific fields, false otherwise.
1433  * @param[out] error
1434  *   Pointer to error structure.
1435  *
1436  * @return
1437  *   0 on success, a negative errno value otherwise and rte_errno is set.
1438  */
1439 int
1440 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1441 			  const uint8_t *mask,
1442 			  const uint8_t *nic_mask,
1443 			  unsigned int size,
1444 			  bool range_accepted,
1445 			  struct rte_flow_error *error)
1446 {
1447 	unsigned int i;
1448 
1449 	MLX5_ASSERT(nic_mask);
1450 	for (i = 0; i < size; ++i)
1451 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
1452 			return rte_flow_error_set(error, ENOTSUP,
1453 						  RTE_FLOW_ERROR_TYPE_ITEM,
1454 						  item,
1455 						  "mask enables non supported"
1456 						  " bits");
1457 	if (!item->spec && (item->mask || item->last))
1458 		return rte_flow_error_set(error, EINVAL,
1459 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1460 					  "mask/last without a spec is not"
1461 					  " supported");
1462 	if (item->spec && item->last && !range_accepted) {
1463 		uint8_t spec[size];
1464 		uint8_t last[size];
1465 		unsigned int i;
1466 		int ret;
1467 
1468 		for (i = 0; i < size; ++i) {
1469 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1470 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1471 		}
1472 		ret = memcmp(spec, last, size);
1473 		if (ret != 0)
1474 			return rte_flow_error_set(error, EINVAL,
1475 						  RTE_FLOW_ERROR_TYPE_ITEM,
1476 						  item,
1477 						  "range is not valid");
1478 	}
1479 	return 0;
1480 }
1481 
1482 /**
1483  * Adjust the hash fields according to the @p flow information.
1484  *
1485  * @param[in] dev_flow.
1486  *   Pointer to the mlx5_flow.
1487  * @param[in] tunnel
1488  *   1 when the hash field is for a tunnel item.
1489  * @param[in] layer_types
1490  *   RTE_ETH_RSS_* types.
1491  * @param[in] hash_fields
1492  *   Item hash fields.
1493  *
1494  * @return
1495  *   The hash fields that should be used.
1496  */
1497 uint64_t
1498 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1499 			    int tunnel __rte_unused, uint64_t layer_types,
1500 			    uint64_t hash_fields)
1501 {
1502 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1503 	int rss_request_inner = rss_desc->level >= 2;
1504 
1505 	/* Check RSS hash level for tunnel. */
1506 	if (tunnel && rss_request_inner)
1507 		hash_fields |= IBV_RX_HASH_INNER;
1508 	else if (tunnel || rss_request_inner)
1509 		return 0;
1510 #endif
1511 	/* Check if requested layer matches RSS hash fields. */
1512 	if (!(rss_desc->types & layer_types))
1513 		return 0;
1514 	return hash_fields;
1515 }
1516 
1517 /**
1518  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1519  * if several tunnel rules are used on this queue, the tunnel ptype will be
1520  * cleared.
1521  *
1522  * @param rxq_ctrl
1523  *   Rx queue to update.
1524  */
1525 static void
1526 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1527 {
1528 	unsigned int i;
1529 	uint32_t tunnel_ptype = 0;
1530 
1531 	/* Look up for the ptype to use. */
1532 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1533 		if (!rxq_ctrl->flow_tunnels_n[i])
1534 			continue;
1535 		if (!tunnel_ptype) {
1536 			tunnel_ptype = tunnels_info[i].ptype;
1537 		} else {
1538 			tunnel_ptype = 0;
1539 			break;
1540 		}
1541 	}
1542 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1543 }
1544 
1545 /**
1546  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1547  * flow.
1548  *
1549  * @param[in] dev
1550  *   Pointer to the Ethernet device structure.
1551  * @param[in] dev_handle
1552  *   Pointer to device flow handle structure.
1553  */
1554 void
1555 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1556 		       struct mlx5_flow_handle *dev_handle)
1557 {
1558 	struct mlx5_priv *priv = dev->data->dev_private;
1559 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1560 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1561 	unsigned int i;
1562 
1563 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1564 		struct mlx5_hrxq *hrxq;
1565 
1566 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1567 			      dev_handle->rix_hrxq);
1568 		if (hrxq)
1569 			ind_tbl = hrxq->ind_table;
1570 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1571 		struct mlx5_shared_action_rss *shared_rss;
1572 
1573 		shared_rss = mlx5_ipool_get
1574 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1575 			 dev_handle->rix_srss);
1576 		if (shared_rss)
1577 			ind_tbl = shared_rss->ind_tbl;
1578 	}
1579 	if (!ind_tbl)
1580 		return;
1581 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1582 		int idx = ind_tbl->queues[i];
1583 		struct mlx5_rxq_ctrl *rxq_ctrl;
1584 
1585 		if (mlx5_is_external_rxq(dev, idx))
1586 			continue;
1587 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1588 		MLX5_ASSERT(rxq_ctrl != NULL);
1589 		if (rxq_ctrl == NULL)
1590 			continue;
1591 		/*
1592 		 * To support metadata register copy on Tx loopback,
1593 		 * this must be always enabled (metadata may arive
1594 		 * from other port - not from local flows only.
1595 		 */
1596 		if (tunnel) {
1597 			unsigned int j;
1598 
1599 			/* Increase the counter matching the flow. */
1600 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1601 				if ((tunnels_info[j].tunnel &
1602 				     dev_handle->layers) ==
1603 				    tunnels_info[j].tunnel) {
1604 					rxq_ctrl->flow_tunnels_n[j]++;
1605 					break;
1606 				}
1607 			}
1608 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1609 		}
1610 	}
1611 }
1612 
1613 static void
1614 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1615 {
1616 	struct mlx5_priv *priv = dev->data->dev_private;
1617 	struct mlx5_rxq_ctrl *rxq_ctrl;
1618 	uint16_t port_id;
1619 
1620 	if (priv->sh->shared_mark_enabled)
1621 		return;
1622 	if (priv->master || priv->representor) {
1623 		MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
1624 			struct mlx5_priv *opriv =
1625 				rte_eth_devices[port_id].data->dev_private;
1626 
1627 			if (!opriv ||
1628 			    opriv->sh != priv->sh ||
1629 			    opriv->domain_id != priv->domain_id ||
1630 			    opriv->mark_enabled)
1631 				continue;
1632 			LIST_FOREACH(rxq_ctrl, &opriv->rxqsctrl, next) {
1633 				rxq_ctrl->rxq.mark = 1;
1634 			}
1635 			opriv->mark_enabled = 1;
1636 		}
1637 	} else {
1638 		LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1639 			rxq_ctrl->rxq.mark = 1;
1640 		}
1641 		priv->mark_enabled = 1;
1642 	}
1643 	priv->sh->shared_mark_enabled = 1;
1644 }
1645 
1646 /**
1647  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1648  *
1649  * @param[in] dev
1650  *   Pointer to the Ethernet device structure.
1651  * @param[in] flow
1652  *   Pointer to flow structure.
1653  */
1654 static void
1655 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1656 {
1657 	struct mlx5_priv *priv = dev->data->dev_private;
1658 	uint32_t handle_idx;
1659 	struct mlx5_flow_handle *dev_handle;
1660 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1661 
1662 	MLX5_ASSERT(wks);
1663 	if (wks->mark)
1664 		flow_rxq_mark_flag_set(dev);
1665 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1666 		       handle_idx, dev_handle, next)
1667 		flow_drv_rxq_flags_set(dev, dev_handle);
1668 }
1669 
1670 /**
1671  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1672  * device flow if no other flow uses it with the same kind of request.
1673  *
1674  * @param dev
1675  *   Pointer to Ethernet device.
1676  * @param[in] dev_handle
1677  *   Pointer to the device flow handle structure.
1678  */
1679 static void
1680 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1681 			struct mlx5_flow_handle *dev_handle)
1682 {
1683 	struct mlx5_priv *priv = dev->data->dev_private;
1684 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1685 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1686 	unsigned int i;
1687 
1688 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1689 		struct mlx5_hrxq *hrxq;
1690 
1691 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1692 			      dev_handle->rix_hrxq);
1693 		if (hrxq)
1694 			ind_tbl = hrxq->ind_table;
1695 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1696 		struct mlx5_shared_action_rss *shared_rss;
1697 
1698 		shared_rss = mlx5_ipool_get
1699 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1700 			 dev_handle->rix_srss);
1701 		if (shared_rss)
1702 			ind_tbl = shared_rss->ind_tbl;
1703 	}
1704 	if (!ind_tbl)
1705 		return;
1706 	MLX5_ASSERT(dev->data->dev_started);
1707 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1708 		int idx = ind_tbl->queues[i];
1709 		struct mlx5_rxq_ctrl *rxq_ctrl;
1710 
1711 		if (mlx5_is_external_rxq(dev, idx))
1712 			continue;
1713 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1714 		MLX5_ASSERT(rxq_ctrl != NULL);
1715 		if (rxq_ctrl == NULL)
1716 			continue;
1717 		if (tunnel) {
1718 			unsigned int j;
1719 
1720 			/* Decrease the counter matching the flow. */
1721 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1722 				if ((tunnels_info[j].tunnel &
1723 				     dev_handle->layers) ==
1724 				    tunnels_info[j].tunnel) {
1725 					rxq_ctrl->flow_tunnels_n[j]--;
1726 					break;
1727 				}
1728 			}
1729 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1730 		}
1731 	}
1732 }
1733 
1734 /**
1735  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1736  * @p flow if no other flow uses it with the same kind of request.
1737  *
1738  * @param dev
1739  *   Pointer to Ethernet device.
1740  * @param[in] flow
1741  *   Pointer to the flow.
1742  */
1743 static void
1744 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1745 {
1746 	struct mlx5_priv *priv = dev->data->dev_private;
1747 	uint32_t handle_idx;
1748 	struct mlx5_flow_handle *dev_handle;
1749 
1750 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1751 		       handle_idx, dev_handle, next)
1752 		flow_drv_rxq_flags_trim(dev, dev_handle);
1753 }
1754 
1755 /**
1756  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1757  *
1758  * @param dev
1759  *   Pointer to Ethernet device.
1760  */
1761 static void
1762 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1763 {
1764 	struct mlx5_priv *priv = dev->data->dev_private;
1765 	unsigned int i;
1766 
1767 	for (i = 0; i != priv->rxqs_n; ++i) {
1768 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1769 		unsigned int j;
1770 
1771 		if (rxq == NULL || rxq->ctrl == NULL)
1772 			continue;
1773 		rxq->ctrl->rxq.mark = 0;
1774 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1775 			rxq->ctrl->flow_tunnels_n[j] = 0;
1776 		rxq->ctrl->rxq.tunnel = 0;
1777 	}
1778 	priv->mark_enabled = 0;
1779 	priv->sh->shared_mark_enabled = 0;
1780 }
1781 
1782 /**
1783  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1784  *
1785  * @param[in] dev
1786  *   Pointer to the Ethernet device structure.
1787  */
1788 void
1789 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1790 {
1791 	struct mlx5_priv *priv = dev->data->dev_private;
1792 	unsigned int i;
1793 
1794 	for (i = 0; i != priv->rxqs_n; ++i) {
1795 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1796 		struct mlx5_rxq_data *data;
1797 
1798 		if (rxq == NULL || rxq->ctrl == NULL)
1799 			continue;
1800 		data = &rxq->ctrl->rxq;
1801 		if (!rte_flow_dynf_metadata_avail()) {
1802 			data->dynf_meta = 0;
1803 			data->flow_meta_mask = 0;
1804 			data->flow_meta_offset = -1;
1805 			data->flow_meta_port_mask = 0;
1806 		} else {
1807 			data->dynf_meta = 1;
1808 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1809 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1810 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1811 		}
1812 	}
1813 }
1814 
1815 /*
1816  * return a pointer to the desired action in the list of actions.
1817  *
1818  * @param[in] actions
1819  *   The list of actions to search the action in.
1820  * @param[in] action
1821  *   The action to find.
1822  *
1823  * @return
1824  *   Pointer to the action in the list, if found. NULL otherwise.
1825  */
1826 const struct rte_flow_action *
1827 mlx5_flow_find_action(const struct rte_flow_action *actions,
1828 		      enum rte_flow_action_type action)
1829 {
1830 	if (actions == NULL)
1831 		return NULL;
1832 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1833 		if (actions->type == action)
1834 			return actions;
1835 	return NULL;
1836 }
1837 
1838 /*
1839  * Validate the flag action.
1840  *
1841  * @param[in] action_flags
1842  *   Bit-fields that holds the actions detected until now.
1843  * @param[in] attr
1844  *   Attributes of flow that includes this action.
1845  * @param[out] error
1846  *   Pointer to error structure.
1847  *
1848  * @return
1849  *   0 on success, a negative errno value otherwise and rte_errno is set.
1850  */
1851 int
1852 mlx5_flow_validate_action_flag(uint64_t action_flags,
1853 			       const struct rte_flow_attr *attr,
1854 			       struct rte_flow_error *error)
1855 {
1856 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1857 		return rte_flow_error_set(error, EINVAL,
1858 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1859 					  "can't mark and flag in same flow");
1860 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1861 		return rte_flow_error_set(error, EINVAL,
1862 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1863 					  "can't have 2 flag"
1864 					  " actions in same flow");
1865 	if (attr->egress)
1866 		return rte_flow_error_set(error, ENOTSUP,
1867 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1868 					  "flag action not supported for "
1869 					  "egress");
1870 	return 0;
1871 }
1872 
1873 /*
1874  * Validate the mark action.
1875  *
1876  * @param[in] action
1877  *   Pointer to the queue action.
1878  * @param[in] action_flags
1879  *   Bit-fields that holds the actions detected until now.
1880  * @param[in] attr
1881  *   Attributes of flow that includes this action.
1882  * @param[out] error
1883  *   Pointer to error structure.
1884  *
1885  * @return
1886  *   0 on success, a negative errno value otherwise and rte_errno is set.
1887  */
1888 int
1889 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1890 			       uint64_t action_flags,
1891 			       const struct rte_flow_attr *attr,
1892 			       struct rte_flow_error *error)
1893 {
1894 	const struct rte_flow_action_mark *mark = action->conf;
1895 
1896 	if (!mark)
1897 		return rte_flow_error_set(error, EINVAL,
1898 					  RTE_FLOW_ERROR_TYPE_ACTION,
1899 					  action,
1900 					  "configuration cannot be null");
1901 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1902 		return rte_flow_error_set(error, EINVAL,
1903 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1904 					  &mark->id,
1905 					  "mark id must in 0 <= id < "
1906 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1907 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1908 		return rte_flow_error_set(error, EINVAL,
1909 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1910 					  "can't flag and mark in same flow");
1911 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1912 		return rte_flow_error_set(error, EINVAL,
1913 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1914 					  "can't have 2 mark actions in same"
1915 					  " flow");
1916 	if (attr->egress)
1917 		return rte_flow_error_set(error, ENOTSUP,
1918 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1919 					  "mark action not supported for "
1920 					  "egress");
1921 	return 0;
1922 }
1923 
1924 /*
1925  * Validate the drop action.
1926  *
1927  * @param[in] action_flags
1928  *   Bit-fields that holds the actions detected until now.
1929  * @param[in] attr
1930  *   Attributes of flow that includes this action.
1931  * @param[out] error
1932  *   Pointer to error structure.
1933  *
1934  * @return
1935  *   0 on success, a negative errno value otherwise and rte_errno is set.
1936  */
1937 int
1938 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1939 			       const struct rte_flow_attr *attr,
1940 			       struct rte_flow_error *error)
1941 {
1942 	if (attr->egress)
1943 		return rte_flow_error_set(error, ENOTSUP,
1944 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1945 					  "drop action not supported for "
1946 					  "egress");
1947 	return 0;
1948 }
1949 
1950 /*
1951  * Validate the queue action.
1952  *
1953  * @param[in] action
1954  *   Pointer to the queue action.
1955  * @param[in] action_flags
1956  *   Bit-fields that holds the actions detected until now.
1957  * @param[in] dev
1958  *   Pointer to the Ethernet device structure.
1959  * @param[in] attr
1960  *   Attributes of flow that includes this action.
1961  * @param[out] error
1962  *   Pointer to error structure.
1963  *
1964  * @return
1965  *   0 on success, a negative errno value otherwise and rte_errno is set.
1966  */
1967 int
1968 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1969 				uint64_t action_flags,
1970 				struct rte_eth_dev *dev,
1971 				const struct rte_flow_attr *attr,
1972 				struct rte_flow_error *error)
1973 {
1974 	struct mlx5_priv *priv = dev->data->dev_private;
1975 	const struct rte_flow_action_queue *queue = action->conf;
1976 
1977 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1978 		return rte_flow_error_set(error, EINVAL,
1979 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1980 					  "can't have 2 fate actions in"
1981 					  " same flow");
1982 	if (attr->egress)
1983 		return rte_flow_error_set(error, ENOTSUP,
1984 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1985 					  "queue action not supported for egress.");
1986 	if (mlx5_is_external_rxq(dev, queue->index))
1987 		return 0;
1988 	if (!priv->rxqs_n)
1989 		return rte_flow_error_set(error, EINVAL,
1990 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1991 					  NULL, "No Rx queues configured");
1992 	if (queue->index >= priv->rxqs_n)
1993 		return rte_flow_error_set(error, EINVAL,
1994 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1995 					  &queue->index,
1996 					  "queue index out of range");
1997 	if (mlx5_rxq_get(dev, queue->index) == NULL)
1998 		return rte_flow_error_set(error, EINVAL,
1999 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2000 					  &queue->index,
2001 					  "queue is not configured");
2002 	return 0;
2003 }
2004 
2005 /**
2006  * Validate queue numbers for device RSS.
2007  *
2008  * @param[in] dev
2009  *   Configured device.
2010  * @param[in] queues
2011  *   Array of queue numbers.
2012  * @param[in] queues_n
2013  *   Size of the @p queues array.
2014  * @param[out] error
2015  *   On error, filled with a textual error description.
2016  * @param[out] queue_idx
2017  *   On error, filled with an offending queue index in @p queues array.
2018  *
2019  * @return
2020  *   0 on success, a negative errno code on error.
2021  */
2022 static int
2023 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
2024 			 const uint16_t *queues, uint32_t queues_n,
2025 			 const char **error, uint32_t *queue_idx)
2026 {
2027 	const struct mlx5_priv *priv = dev->data->dev_private;
2028 	bool is_hairpin = false;
2029 	bool is_ext_rss = false;
2030 	uint32_t i;
2031 
2032 	for (i = 0; i != queues_n; ++i) {
2033 		struct mlx5_rxq_ctrl *rxq_ctrl;
2034 
2035 		if (mlx5_is_external_rxq(dev, queues[0])) {
2036 			is_ext_rss = true;
2037 			continue;
2038 		}
2039 		if (is_ext_rss) {
2040 			*error = "Combining external and regular RSS queues is not supported";
2041 			*queue_idx = i;
2042 			return -ENOTSUP;
2043 		}
2044 		if (queues[i] >= priv->rxqs_n) {
2045 			*error = "queue index out of range";
2046 			*queue_idx = i;
2047 			return -EINVAL;
2048 		}
2049 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
2050 		if (rxq_ctrl == NULL) {
2051 			*error =  "queue is not configured";
2052 			*queue_idx = i;
2053 			return -EINVAL;
2054 		}
2055 		if (i == 0 && rxq_ctrl->is_hairpin)
2056 			is_hairpin = true;
2057 		if (is_hairpin != rxq_ctrl->is_hairpin) {
2058 			*error = "combining hairpin and regular RSS queues is not supported";
2059 			*queue_idx = i;
2060 			return -ENOTSUP;
2061 		}
2062 	}
2063 	return 0;
2064 }
2065 
2066 /*
2067  * Validate the rss action.
2068  *
2069  * @param[in] dev
2070  *   Pointer to the Ethernet device structure.
2071  * @param[in] action
2072  *   Pointer to the queue action.
2073  * @param[out] error
2074  *   Pointer to error structure.
2075  *
2076  * @return
2077  *   0 on success, a negative errno value otherwise and rte_errno is set.
2078  */
2079 int
2080 mlx5_validate_action_rss(struct rte_eth_dev *dev,
2081 			 const struct rte_flow_action *action,
2082 			 struct rte_flow_error *error)
2083 {
2084 	struct mlx5_priv *priv = dev->data->dev_private;
2085 	const struct rte_flow_action_rss *rss = action->conf;
2086 	int ret;
2087 	const char *message;
2088 	uint32_t queue_idx;
2089 
2090 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
2091 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
2092 		return rte_flow_error_set(error, ENOTSUP,
2093 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2094 					  &rss->func,
2095 					  "RSS hash function not supported");
2096 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2097 	if (rss->level > 2)
2098 #else
2099 	if (rss->level > 1)
2100 #endif
2101 		return rte_flow_error_set(error, ENOTSUP,
2102 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2103 					  &rss->level,
2104 					  "tunnel RSS is not supported");
2105 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
2106 	if (rss->key_len == 0 && rss->key != NULL)
2107 		return rte_flow_error_set(error, ENOTSUP,
2108 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2109 					  &rss->key_len,
2110 					  "RSS hash key length 0");
2111 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
2112 		return rte_flow_error_set(error, ENOTSUP,
2113 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2114 					  &rss->key_len,
2115 					  "RSS hash key too small");
2116 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
2117 		return rte_flow_error_set(error, ENOTSUP,
2118 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2119 					  &rss->key_len,
2120 					  "RSS hash key too large");
2121 	if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
2122 		return rte_flow_error_set(error, ENOTSUP,
2123 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2124 					  &rss->queue_num,
2125 					  "number of queues too large");
2126 	if (rss->types & MLX5_RSS_HF_MASK)
2127 		return rte_flow_error_set(error, ENOTSUP,
2128 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2129 					  &rss->types,
2130 					  "some RSS protocols are not"
2131 					  " supported");
2132 	if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
2133 	    !(rss->types & RTE_ETH_RSS_IP))
2134 		return rte_flow_error_set(error, EINVAL,
2135 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2136 					  "L3 partial RSS requested but L3 RSS"
2137 					  " type not specified");
2138 	if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
2139 	    !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
2140 		return rte_flow_error_set(error, EINVAL,
2141 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2142 					  "L4 partial RSS requested but L4 RSS"
2143 					  " type not specified");
2144 	if (!priv->rxqs_n && priv->ext_rxqs == NULL)
2145 		return rte_flow_error_set(error, EINVAL,
2146 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2147 					  NULL, "No Rx queues configured");
2148 	if (!rss->queue_num)
2149 		return rte_flow_error_set(error, EINVAL,
2150 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2151 					  NULL, "No queues configured");
2152 	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
2153 				       &message, &queue_idx);
2154 	if (ret != 0) {
2155 		return rte_flow_error_set(error, -ret,
2156 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2157 					  &rss->queue[queue_idx], message);
2158 	}
2159 	return 0;
2160 }
2161 
2162 /*
2163  * Validate the rss action.
2164  *
2165  * @param[in] action
2166  *   Pointer to the queue action.
2167  * @param[in] action_flags
2168  *   Bit-fields that holds the actions detected until now.
2169  * @param[in] dev
2170  *   Pointer to the Ethernet device structure.
2171  * @param[in] attr
2172  *   Attributes of flow that includes this action.
2173  * @param[in] item_flags
2174  *   Items that were detected.
2175  * @param[out] error
2176  *   Pointer to error structure.
2177  *
2178  * @return
2179  *   0 on success, a negative errno value otherwise and rte_errno is set.
2180  */
2181 int
2182 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
2183 			      uint64_t action_flags,
2184 			      struct rte_eth_dev *dev,
2185 			      const struct rte_flow_attr *attr,
2186 			      uint64_t item_flags,
2187 			      struct rte_flow_error *error)
2188 {
2189 	const struct rte_flow_action_rss *rss = action->conf;
2190 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2191 	int ret;
2192 
2193 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2194 		return rte_flow_error_set(error, EINVAL,
2195 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2196 					  "can't have 2 fate actions"
2197 					  " in same flow");
2198 	ret = mlx5_validate_action_rss(dev, action, error);
2199 	if (ret)
2200 		return ret;
2201 	if (attr->egress)
2202 		return rte_flow_error_set(error, ENOTSUP,
2203 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2204 					  "rss action not supported for "
2205 					  "egress");
2206 	if (rss->level > 1 && !tunnel)
2207 		return rte_flow_error_set(error, EINVAL,
2208 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2209 					  "inner RSS is not supported for "
2210 					  "non-tunnel flows");
2211 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
2212 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
2213 		return rte_flow_error_set(error, EINVAL,
2214 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2215 					  "RSS on eCPRI is not supported now");
2216 	}
2217 	if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
2218 	    !(item_flags &
2219 	      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
2220 	    rss->level > 1)
2221 		return rte_flow_error_set(error, EINVAL,
2222 					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2223 					  "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2224 	return 0;
2225 }
2226 
2227 /*
2228  * Validate the default miss action.
2229  *
2230  * @param[in] action_flags
2231  *   Bit-fields that holds the actions detected until now.
2232  * @param[out] error
2233  *   Pointer to error structure.
2234  *
2235  * @return
2236  *   0 on success, a negative errno value otherwise and rte_errno is set.
2237  */
2238 int
2239 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2240 				const struct rte_flow_attr *attr,
2241 				struct rte_flow_error *error)
2242 {
2243 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2244 		return rte_flow_error_set(error, EINVAL,
2245 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2246 					  "can't have 2 fate actions in"
2247 					  " same flow");
2248 	if (attr->egress)
2249 		return rte_flow_error_set(error, ENOTSUP,
2250 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2251 					  "default miss action not supported "
2252 					  "for egress");
2253 	if (attr->group)
2254 		return rte_flow_error_set(error, ENOTSUP,
2255 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2256 					  "only group 0 is supported");
2257 	if (attr->transfer)
2258 		return rte_flow_error_set(error, ENOTSUP,
2259 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2260 					  NULL, "transfer is not supported");
2261 	return 0;
2262 }
2263 
2264 /*
2265  * Validate the count action.
2266  *
2267  * @param[in] dev
2268  *   Pointer to the Ethernet device structure.
2269  * @param[in] attr
2270  *   Attributes of flow that includes this action.
2271  * @param[out] error
2272  *   Pointer to error structure.
2273  *
2274  * @return
2275  *   0 on success, a negative errno value otherwise and rte_errno is set.
2276  */
2277 int
2278 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2279 				const struct rte_flow_attr *attr,
2280 				struct rte_flow_error *error)
2281 {
2282 	if (attr->egress)
2283 		return rte_flow_error_set(error, ENOTSUP,
2284 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2285 					  "count action not supported for "
2286 					  "egress");
2287 	return 0;
2288 }
2289 
2290 /*
2291  * Validate the ASO CT action.
2292  *
2293  * @param[in] dev
2294  *   Pointer to the Ethernet device structure.
2295  * @param[in] conntrack
2296  *   Pointer to the CT action profile.
2297  * @param[out] error
2298  *   Pointer to error structure.
2299  *
2300  * @return
2301  *   0 on success, a negative errno value otherwise and rte_errno is set.
2302  */
2303 int
2304 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2305 			const struct rte_flow_action_conntrack *conntrack,
2306 			struct rte_flow_error *error)
2307 {
2308 	RTE_SET_USED(dev);
2309 
2310 	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2311 		return rte_flow_error_set(error, EINVAL,
2312 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2313 					  "Invalid CT state");
2314 	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2315 		return rte_flow_error_set(error, EINVAL,
2316 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2317 					  "Invalid last TCP packet flag");
2318 	return 0;
2319 }
2320 
2321 /**
2322  * Validate the level value for modify field action.
2323  *
2324  * @param[in] data
2325  *   Pointer to the rte_flow_action_modify_data structure either src or dst.
2326  * @param[out] error
2327  *   Pointer to error structure.
2328  *
2329  * @return
2330  *   0 on success, a negative errno value otherwise and rte_errno is set.
2331  */
2332 int
2333 flow_validate_modify_field_level(const struct rte_flow_action_modify_data *data,
2334 				 struct rte_flow_error *error)
2335 {
2336 	if (data->level == 0)
2337 		return 0;
2338 	if (data->field != RTE_FLOW_FIELD_TAG)
2339 		return rte_flow_error_set(error, ENOTSUP,
2340 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2341 					  "inner header fields modification is not supported");
2342 	if (data->tag_index != 0)
2343 		return rte_flow_error_set(error, EINVAL,
2344 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2345 					  "tag array can be provided using 'level' or 'tag_index' fields, not both");
2346 	/*
2347 	 * The tag array for RTE_FLOW_FIELD_TAG type is provided using
2348 	 * 'tag_index' field. In old API, it was provided using 'level' field
2349 	 * and it is still supported for backwards compatibility.
2350 	 */
2351 	DRV_LOG(WARNING, "tag array provided in 'level' field instead of 'tag_index' field.");
2352 	return 0;
2353 }
2354 
2355 /**
2356  * Validate ICMP6 item.
2357  *
2358  * @param[in] item
2359  *   Item specification.
2360  * @param[in] item_flags
2361  *   Bit-fields that holds the items detected until now.
2362  * @param[in] ext_vlan_sup
2363  *   Whether extended VLAN features are supported or not.
2364  * @param[out] error
2365  *   Pointer to error structure.
2366  *
2367  * @return
2368  *   0 on success, a negative errno value otherwise and rte_errno is set.
2369  */
2370 int
2371 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2372 			       uint64_t item_flags,
2373 			       uint8_t target_protocol,
2374 			       struct rte_flow_error *error)
2375 {
2376 	const struct rte_flow_item_icmp6 *mask = item->mask;
2377 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2378 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2379 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2380 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2381 				      MLX5_FLOW_LAYER_OUTER_L4;
2382 	int ret;
2383 
2384 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2385 		return rte_flow_error_set(error, EINVAL,
2386 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2387 					  "protocol filtering not compatible"
2388 					  " with ICMP6 layer");
2389 	if (!(item_flags & l3m))
2390 		return rte_flow_error_set(error, EINVAL,
2391 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2392 					  "IPv6 is mandatory to filter on"
2393 					  " ICMP6");
2394 	if (item_flags & l4m)
2395 		return rte_flow_error_set(error, EINVAL,
2396 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2397 					  "multiple L4 layers not supported");
2398 	if (!mask)
2399 		mask = &rte_flow_item_icmp6_mask;
2400 	ret = mlx5_flow_item_acceptable
2401 		(item, (const uint8_t *)mask,
2402 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
2403 		 sizeof(struct rte_flow_item_icmp6),
2404 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2405 	if (ret < 0)
2406 		return ret;
2407 	return 0;
2408 }
2409 
2410 /**
2411  * Validate ICMP6 echo request/reply item.
2412  *
2413  * @param[in] item
2414  *   Item specification.
2415  * @param[in] item_flags
2416  *   Bit-fields that holds the items detected until now.
2417  * @param[in] ext_vlan_sup
2418  *   Whether extended VLAN features are supported or not.
2419  * @param[out] error
2420  *   Pointer to error structure.
2421  *
2422  * @return
2423  *   0 on success, a negative errno value otherwise and rte_errno is set.
2424  */
2425 int
2426 mlx5_flow_validate_item_icmp6_echo(const struct rte_flow_item *item,
2427 				   uint64_t item_flags,
2428 				   uint8_t target_protocol,
2429 				   struct rte_flow_error *error)
2430 {
2431 	const struct rte_flow_item_icmp6_echo *mask = item->mask;
2432 	const struct rte_flow_item_icmp6_echo nic_mask = {
2433 		.hdr.base.type = 0xff,
2434 		.hdr.base.code = 0xff,
2435 		.hdr.identifier = RTE_BE16(0xffff),
2436 		.hdr.sequence = RTE_BE16(0xffff),
2437 	};
2438 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2439 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2440 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2441 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2442 				      MLX5_FLOW_LAYER_OUTER_L4;
2443 	int ret;
2444 
2445 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2446 		return rte_flow_error_set(error, EINVAL,
2447 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2448 					  "protocol filtering not compatible"
2449 					  " with ICMP6 layer");
2450 	if (!(item_flags & l3m))
2451 		return rte_flow_error_set(error, EINVAL,
2452 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2453 					  "IPv6 is mandatory to filter on"
2454 					  " ICMP6");
2455 	if (item_flags & l4m)
2456 		return rte_flow_error_set(error, EINVAL,
2457 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2458 					  "multiple L4 layers not supported");
2459 	if (!mask)
2460 		mask = &nic_mask;
2461 	ret = mlx5_flow_item_acceptable
2462 		(item, (const uint8_t *)mask,
2463 		 (const uint8_t *)&nic_mask,
2464 		 sizeof(struct rte_flow_item_icmp6_echo),
2465 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2466 	if (ret < 0)
2467 		return ret;
2468 	return 0;
2469 }
2470 
2471 /**
2472  * Validate ICMP item.
2473  *
2474  * @param[in] item
2475  *   Item specification.
2476  * @param[in] item_flags
2477  *   Bit-fields that holds the items detected until now.
2478  * @param[out] error
2479  *   Pointer to error structure.
2480  *
2481  * @return
2482  *   0 on success, a negative errno value otherwise and rte_errno is set.
2483  */
2484 int
2485 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2486 			     uint64_t item_flags,
2487 			     uint8_t target_protocol,
2488 			     struct rte_flow_error *error)
2489 {
2490 	const struct rte_flow_item_icmp *mask = item->mask;
2491 	const struct rte_flow_item_icmp nic_mask = {
2492 		.hdr.icmp_type = 0xff,
2493 		.hdr.icmp_code = 0xff,
2494 		.hdr.icmp_ident = RTE_BE16(0xffff),
2495 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
2496 	};
2497 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2498 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2499 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2500 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2501 				      MLX5_FLOW_LAYER_OUTER_L4;
2502 	int ret;
2503 
2504 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2505 		return rte_flow_error_set(error, EINVAL,
2506 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2507 					  "protocol filtering not compatible"
2508 					  " with ICMP layer");
2509 	if (!(item_flags & l3m))
2510 		return rte_flow_error_set(error, EINVAL,
2511 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2512 					  "IPv4 is mandatory to filter"
2513 					  " on ICMP");
2514 	if (item_flags & l4m)
2515 		return rte_flow_error_set(error, EINVAL,
2516 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2517 					  "multiple L4 layers not supported");
2518 	if (!mask)
2519 		mask = &nic_mask;
2520 	ret = mlx5_flow_item_acceptable
2521 		(item, (const uint8_t *)mask,
2522 		 (const uint8_t *)&nic_mask,
2523 		 sizeof(struct rte_flow_item_icmp),
2524 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2525 	if (ret < 0)
2526 		return ret;
2527 	return 0;
2528 }
2529 
2530 /**
2531  * Validate Ethernet item.
2532  *
2533  * @param[in] item
2534  *   Item specification.
2535  * @param[in] item_flags
2536  *   Bit-fields that holds the items detected until now.
2537  * @param[out] error
2538  *   Pointer to error structure.
2539  *
2540  * @return
2541  *   0 on success, a negative errno value otherwise and rte_errno is set.
2542  */
2543 int
2544 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2545 			    uint64_t item_flags, bool ext_vlan_sup,
2546 			    struct rte_flow_error *error)
2547 {
2548 	const struct rte_flow_item_eth *mask = item->mask;
2549 	const struct rte_flow_item_eth nic_mask = {
2550 		.hdr.dst_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2551 		.hdr.src_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2552 		.hdr.ether_type = RTE_BE16(0xffff),
2553 		.has_vlan = ext_vlan_sup ? 1 : 0,
2554 	};
2555 	int ret;
2556 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2557 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
2558 				       MLX5_FLOW_LAYER_OUTER_L2;
2559 
2560 	if (item_flags & ethm)
2561 		return rte_flow_error_set(error, ENOTSUP,
2562 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2563 					  "multiple L2 layers not supported");
2564 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2565 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2566 		return rte_flow_error_set(error, EINVAL,
2567 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2568 					  "L2 layer should not follow "
2569 					  "L3 layers");
2570 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2571 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2572 		return rte_flow_error_set(error, EINVAL,
2573 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2574 					  "L2 layer should not follow VLAN");
2575 	if (item_flags & MLX5_FLOW_LAYER_GTP)
2576 		return rte_flow_error_set(error, EINVAL,
2577 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2578 					  "L2 layer should not follow GTP");
2579 	if (!mask)
2580 		mask = &rte_flow_item_eth_mask;
2581 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2582 					(const uint8_t *)&nic_mask,
2583 					sizeof(struct rte_flow_item_eth),
2584 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2585 	return ret;
2586 }
2587 
2588 /**
2589  * Validate VLAN item.
2590  *
2591  * @param[in] item
2592  *   Item specification.
2593  * @param[in] item_flags
2594  *   Bit-fields that holds the items detected until now.
2595  * @param[in] dev
2596  *   Ethernet device flow is being created on.
2597  * @param[out] error
2598  *   Pointer to error structure.
2599  *
2600  * @return
2601  *   0 on success, a negative errno value otherwise and rte_errno is set.
2602  */
2603 int
2604 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2605 			     uint64_t item_flags,
2606 			     struct rte_eth_dev *dev,
2607 			     struct rte_flow_error *error)
2608 {
2609 	const struct rte_flow_item_vlan *spec = item->spec;
2610 	const struct rte_flow_item_vlan *mask = item->mask;
2611 	const struct rte_flow_item_vlan nic_mask = {
2612 		.hdr.vlan_tci = RTE_BE16(UINT16_MAX),
2613 		.hdr.eth_proto = RTE_BE16(UINT16_MAX),
2614 	};
2615 	uint16_t vlan_tag = 0;
2616 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2617 	int ret;
2618 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2619 					MLX5_FLOW_LAYER_INNER_L4) :
2620 				       (MLX5_FLOW_LAYER_OUTER_L3 |
2621 					MLX5_FLOW_LAYER_OUTER_L4);
2622 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2623 					MLX5_FLOW_LAYER_OUTER_VLAN;
2624 
2625 	if (item_flags & vlanm)
2626 		return rte_flow_error_set(error, EINVAL,
2627 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2628 					  "multiple VLAN layers not supported");
2629 	else if ((item_flags & l34m) != 0)
2630 		return rte_flow_error_set(error, EINVAL,
2631 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2632 					  "VLAN cannot follow L3/L4 layer");
2633 	if (!mask)
2634 		mask = &rte_flow_item_vlan_mask;
2635 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2636 					(const uint8_t *)&nic_mask,
2637 					sizeof(struct rte_flow_item_vlan),
2638 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2639 	if (ret)
2640 		return ret;
2641 	if (!tunnel && mask->hdr.vlan_tci != RTE_BE16(0x0fff)) {
2642 		struct mlx5_priv *priv = dev->data->dev_private;
2643 
2644 		if (priv->vmwa_context) {
2645 			/*
2646 			 * Non-NULL context means we have a virtual machine
2647 			 * and SR-IOV enabled, we have to create VLAN interface
2648 			 * to make hypervisor to setup E-Switch vport
2649 			 * context correctly. We avoid creating the multiple
2650 			 * VLAN interfaces, so we cannot support VLAN tag mask.
2651 			 */
2652 			return rte_flow_error_set(error, EINVAL,
2653 						  RTE_FLOW_ERROR_TYPE_ITEM,
2654 						  item,
2655 						  "VLAN tag mask is not"
2656 						  " supported in virtual"
2657 						  " environment");
2658 		}
2659 	}
2660 	if (spec) {
2661 		vlan_tag = spec->hdr.vlan_tci;
2662 		vlan_tag &= mask->hdr.vlan_tci;
2663 	}
2664 	/*
2665 	 * From verbs perspective an empty VLAN is equivalent
2666 	 * to a packet without VLAN layer.
2667 	 */
2668 	if (!vlan_tag)
2669 		return rte_flow_error_set(error, EINVAL,
2670 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2671 					  item->spec,
2672 					  "VLAN cannot be empty");
2673 	return 0;
2674 }
2675 
2676 /**
2677  * Validate IPV4 item.
2678  *
2679  * @param[in] item
2680  *   Item specification.
2681  * @param[in] item_flags
2682  *   Bit-fields that holds the items detected until now.
2683  * @param[in] last_item
2684  *   Previous validated item in the pattern items.
2685  * @param[in] ether_type
2686  *   Type in the ethernet layer header (including dot1q).
2687  * @param[in] acc_mask
2688  *   Acceptable mask, if NULL default internal default mask
2689  *   will be used to check whether item fields are supported.
2690  * @param[in] range_accepted
2691  *   True if range of values is accepted for specific fields, false otherwise.
2692  * @param[out] error
2693  *   Pointer to error structure.
2694  *
2695  * @return
2696  *   0 on success, a negative errno value otherwise and rte_errno is set.
2697  */
2698 int
2699 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2700 			     uint64_t item_flags,
2701 			     uint64_t last_item,
2702 			     uint16_t ether_type,
2703 			     const struct rte_flow_item_ipv4 *acc_mask,
2704 			     bool range_accepted,
2705 			     struct rte_flow_error *error)
2706 {
2707 	const struct rte_flow_item_ipv4 *mask = item->mask;
2708 	const struct rte_flow_item_ipv4 *spec = item->spec;
2709 	const struct rte_flow_item_ipv4 nic_mask = {
2710 		.hdr = {
2711 			.src_addr = RTE_BE32(0xffffffff),
2712 			.dst_addr = RTE_BE32(0xffffffff),
2713 			.type_of_service = 0xff,
2714 			.next_proto_id = 0xff,
2715 		},
2716 	};
2717 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2718 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2719 				      MLX5_FLOW_LAYER_OUTER_L3;
2720 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2721 				      MLX5_FLOW_LAYER_OUTER_L4;
2722 	int ret;
2723 	uint8_t next_proto = 0xFF;
2724 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2725 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2726 				  MLX5_FLOW_LAYER_INNER_VLAN);
2727 
2728 	if ((last_item & l2_vlan) && ether_type &&
2729 	    ether_type != RTE_ETHER_TYPE_IPV4)
2730 		return rte_flow_error_set(error, EINVAL,
2731 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2732 					  "IPv4 cannot follow L2/VLAN layer "
2733 					  "which ether type is not IPv4");
2734 	if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2735 		if (mask && spec)
2736 			next_proto = mask->hdr.next_proto_id &
2737 				     spec->hdr.next_proto_id;
2738 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2739 			return rte_flow_error_set(error, EINVAL,
2740 						  RTE_FLOW_ERROR_TYPE_ITEM,
2741 						  item,
2742 						  "multiple tunnel "
2743 						  "not supported");
2744 	}
2745 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2746 		return rte_flow_error_set(error, EINVAL,
2747 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2748 					  "wrong tunnel type - IPv6 specified "
2749 					  "but IPv4 item provided");
2750 	if (item_flags & l3m)
2751 		return rte_flow_error_set(error, ENOTSUP,
2752 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2753 					  "multiple L3 layers not supported");
2754 	else if (item_flags & l4m)
2755 		return rte_flow_error_set(error, EINVAL,
2756 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2757 					  "L3 cannot follow an L4 layer.");
2758 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2759 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2760 		return rte_flow_error_set(error, EINVAL,
2761 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2762 					  "L3 cannot follow an NVGRE layer.");
2763 	if (!mask)
2764 		mask = &rte_flow_item_ipv4_mask;
2765 	else if (mask->hdr.next_proto_id != 0 &&
2766 		 mask->hdr.next_proto_id != 0xff)
2767 		return rte_flow_error_set(error, EINVAL,
2768 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2769 					  "partial mask is not supported"
2770 					  " for protocol");
2771 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2772 					acc_mask ? (const uint8_t *)acc_mask
2773 						 : (const uint8_t *)&nic_mask,
2774 					sizeof(struct rte_flow_item_ipv4),
2775 					range_accepted, error);
2776 	if (ret < 0)
2777 		return ret;
2778 	return 0;
2779 }
2780 
2781 /**
2782  * Validate IPV6 item.
2783  *
2784  * @param[in] item
2785  *   Item specification.
2786  * @param[in] item_flags
2787  *   Bit-fields that holds the items detected until now.
2788  * @param[in] last_item
2789  *   Previous validated item in the pattern items.
2790  * @param[in] ether_type
2791  *   Type in the ethernet layer header (including dot1q).
2792  * @param[in] acc_mask
2793  *   Acceptable mask, if NULL default internal default mask
2794  *   will be used to check whether item fields are supported.
2795  * @param[out] error
2796  *   Pointer to error structure.
2797  *
2798  * @return
2799  *   0 on success, a negative errno value otherwise and rte_errno is set.
2800  */
2801 int
2802 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2803 			     uint64_t item_flags,
2804 			     uint64_t last_item,
2805 			     uint16_t ether_type,
2806 			     const struct rte_flow_item_ipv6 *acc_mask,
2807 			     struct rte_flow_error *error)
2808 {
2809 	const struct rte_flow_item_ipv6 *mask = item->mask;
2810 	const struct rte_flow_item_ipv6 *spec = item->spec;
2811 	const struct rte_flow_item_ipv6 nic_mask = {
2812 		.hdr = {
2813 			.src_addr =
2814 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2815 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2816 			.dst_addr =
2817 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2818 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2819 			.vtc_flow = RTE_BE32(0xffffffff),
2820 			.proto = 0xff,
2821 		},
2822 	};
2823 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2824 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2825 				      MLX5_FLOW_LAYER_OUTER_L3;
2826 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2827 				      MLX5_FLOW_LAYER_OUTER_L4;
2828 	int ret;
2829 	uint8_t next_proto = 0xFF;
2830 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2831 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2832 				  MLX5_FLOW_LAYER_INNER_VLAN);
2833 
2834 	if ((last_item & l2_vlan) && ether_type &&
2835 	    ether_type != RTE_ETHER_TYPE_IPV6)
2836 		return rte_flow_error_set(error, EINVAL,
2837 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2838 					  "IPv6 cannot follow L2/VLAN layer "
2839 					  "which ether type is not IPv6");
2840 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2841 		next_proto = spec->hdr.proto;
2842 	if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2843 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2844 			return rte_flow_error_set(error, EINVAL,
2845 						  RTE_FLOW_ERROR_TYPE_ITEM,
2846 						  item,
2847 						  "multiple tunnel "
2848 						  "not supported");
2849 	}
2850 	if (next_proto == IPPROTO_HOPOPTS  ||
2851 	    next_proto == IPPROTO_ROUTING  ||
2852 	    next_proto == IPPROTO_FRAGMENT ||
2853 	    next_proto == IPPROTO_ESP	   ||
2854 	    next_proto == IPPROTO_AH	   ||
2855 	    next_proto == IPPROTO_DSTOPTS)
2856 		return rte_flow_error_set(error, EINVAL,
2857 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2858 					  "IPv6 proto (next header) should "
2859 					  "not be set as extension header");
2860 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2861 		return rte_flow_error_set(error, EINVAL,
2862 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2863 					  "wrong tunnel type - IPv4 specified "
2864 					  "but IPv6 item provided");
2865 	if (item_flags & l3m)
2866 		return rte_flow_error_set(error, ENOTSUP,
2867 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2868 					  "multiple L3 layers not supported");
2869 	else if (item_flags & l4m)
2870 		return rte_flow_error_set(error, EINVAL,
2871 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2872 					  "L3 cannot follow an L4 layer.");
2873 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2874 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2875 		return rte_flow_error_set(error, EINVAL,
2876 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2877 					  "L3 cannot follow an NVGRE layer.");
2878 	if (!mask)
2879 		mask = &rte_flow_item_ipv6_mask;
2880 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2881 					acc_mask ? (const uint8_t *)acc_mask
2882 						 : (const uint8_t *)&nic_mask,
2883 					sizeof(struct rte_flow_item_ipv6),
2884 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2885 	if (ret < 0)
2886 		return ret;
2887 	return 0;
2888 }
2889 
2890 /**
2891  * Validate UDP item.
2892  *
2893  * @param[in] item
2894  *   Item specification.
2895  * @param[in] item_flags
2896  *   Bit-fields that holds the items detected until now.
2897  * @param[in] target_protocol
2898  *   The next protocol in the previous item.
2899  * @param[in] flow_mask
2900  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2901  * @param[out] error
2902  *   Pointer to error structure.
2903  *
2904  * @return
2905  *   0 on success, a negative errno value otherwise and rte_errno is set.
2906  */
2907 int
2908 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2909 			    uint64_t item_flags,
2910 			    uint8_t target_protocol,
2911 			    struct rte_flow_error *error)
2912 {
2913 	const struct rte_flow_item_udp *mask = item->mask;
2914 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2915 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2916 				      MLX5_FLOW_LAYER_OUTER_L3;
2917 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2918 				      MLX5_FLOW_LAYER_OUTER_L4;
2919 	int ret;
2920 
2921 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2922 		return rte_flow_error_set(error, EINVAL,
2923 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2924 					  "protocol filtering not compatible"
2925 					  " with UDP layer");
2926 	if (!(item_flags & l3m))
2927 		return rte_flow_error_set(error, EINVAL,
2928 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2929 					  "L3 is mandatory to filter on L4");
2930 	if (item_flags & l4m)
2931 		return rte_flow_error_set(error, EINVAL,
2932 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2933 					  "multiple L4 layers not supported");
2934 	if (!mask)
2935 		mask = &rte_flow_item_udp_mask;
2936 	ret = mlx5_flow_item_acceptable
2937 		(item, (const uint8_t *)mask,
2938 		 (const uint8_t *)&rte_flow_item_udp_mask,
2939 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2940 		 error);
2941 	if (ret < 0)
2942 		return ret;
2943 	return 0;
2944 }
2945 
2946 /**
2947  * Validate TCP item.
2948  *
2949  * @param[in] item
2950  *   Item specification.
2951  * @param[in] item_flags
2952  *   Bit-fields that holds the items detected until now.
2953  * @param[in] target_protocol
2954  *   The next protocol in the previous item.
2955  * @param[out] error
2956  *   Pointer to error structure.
2957  *
2958  * @return
2959  *   0 on success, a negative errno value otherwise and rte_errno is set.
2960  */
2961 int
2962 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2963 			    uint64_t item_flags,
2964 			    uint8_t target_protocol,
2965 			    const struct rte_flow_item_tcp *flow_mask,
2966 			    struct rte_flow_error *error)
2967 {
2968 	const struct rte_flow_item_tcp *mask = item->mask;
2969 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2970 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2971 				      MLX5_FLOW_LAYER_OUTER_L3;
2972 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2973 				      MLX5_FLOW_LAYER_OUTER_L4;
2974 	int ret;
2975 
2976 	MLX5_ASSERT(flow_mask);
2977 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2978 		return rte_flow_error_set(error, EINVAL,
2979 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2980 					  "protocol filtering not compatible"
2981 					  " with TCP layer");
2982 	if (!(item_flags & l3m))
2983 		return rte_flow_error_set(error, EINVAL,
2984 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2985 					  "L3 is mandatory to filter on L4");
2986 	if (item_flags & l4m)
2987 		return rte_flow_error_set(error, EINVAL,
2988 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2989 					  "multiple L4 layers not supported");
2990 	if (!mask)
2991 		mask = &rte_flow_item_tcp_mask;
2992 	ret = mlx5_flow_item_acceptable
2993 		(item, (const uint8_t *)mask,
2994 		 (const uint8_t *)flow_mask,
2995 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2996 		 error);
2997 	if (ret < 0)
2998 		return ret;
2999 	return 0;
3000 }
3001 
3002 /**
3003  * Validate VXLAN item.
3004  *
3005  * @param[in] dev
3006  *   Pointer to the Ethernet device structure.
3007  * @param[in] udp_dport
3008  *   UDP destination port
3009  * @param[in] item
3010  *   Item specification.
3011  * @param[in] item_flags
3012  *   Bit-fields that holds the items detected until now.
3013  * @param root
3014  *   Whether action is on root table.
3015  * @param[out] error
3016  *   Pointer to error structure.
3017  *
3018  * @return
3019  *   0 on success, a negative errno value otherwise and rte_errno is set.
3020  */
3021 int
3022 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
3023 			      uint16_t udp_dport,
3024 			      const struct rte_flow_item *item,
3025 			      uint64_t item_flags,
3026 			      bool root,
3027 			      struct rte_flow_error *error)
3028 {
3029 	const struct rte_flow_item_vxlan *spec = item->spec;
3030 	const struct rte_flow_item_vxlan *mask = item->mask;
3031 	int ret;
3032 	struct mlx5_priv *priv = dev->data->dev_private;
3033 	union vni {
3034 		uint32_t vlan_id;
3035 		uint8_t vni[4];
3036 	} id = { .vlan_id = 0, };
3037 	const struct rte_flow_item_vxlan nic_mask = {
3038 		.hdr.vni = "\xff\xff\xff",
3039 		.hdr.rsvd1 = 0xff,
3040 	};
3041 	const struct rte_flow_item_vxlan *valid_mask;
3042 
3043 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3044 		return rte_flow_error_set(error, ENOTSUP,
3045 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3046 					  "multiple tunnel layers not"
3047 					  " supported");
3048 	valid_mask = &rte_flow_item_vxlan_mask;
3049 	/*
3050 	 * Verify only UDPv4 is present as defined in
3051 	 * https://tools.ietf.org/html/rfc7348
3052 	 */
3053 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3054 		return rte_flow_error_set(error, EINVAL,
3055 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3056 					  "no outer UDP layer found");
3057 	if (!mask)
3058 		mask = &rte_flow_item_vxlan_mask;
3059 
3060 	if (priv->sh->steering_format_version !=
3061 	    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3062 	    !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
3063 		/* non-root table */
3064 		if (!root && priv->sh->misc5_cap)
3065 			valid_mask = &nic_mask;
3066 		/* Group zero in NIC domain */
3067 		if (!root && priv->sh->tunnel_header_0_1)
3068 			valid_mask = &nic_mask;
3069 	}
3070 	ret = mlx5_flow_item_acceptable
3071 		(item, (const uint8_t *)mask,
3072 		 (const uint8_t *)valid_mask,
3073 		 sizeof(struct rte_flow_item_vxlan),
3074 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3075 	if (ret < 0)
3076 		return ret;
3077 	if (spec) {
3078 		memcpy(&id.vni[1], spec->hdr.vni, 3);
3079 		memcpy(&id.vni[1], mask->hdr.vni, 3);
3080 	}
3081 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3082 		return rte_flow_error_set(error, ENOTSUP,
3083 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3084 					  "VXLAN tunnel must be fully defined");
3085 	return 0;
3086 }
3087 
3088 /**
3089  * Validate VXLAN_GPE item.
3090  *
3091  * @param[in] item
3092  *   Item specification.
3093  * @param[in] item_flags
3094  *   Bit-fields that holds the items detected until now.
3095  * @param[in] priv
3096  *   Pointer to the private data structure.
3097  * @param[in] target_protocol
3098  *   The next protocol in the previous item.
3099  * @param[out] error
3100  *   Pointer to error structure.
3101  *
3102  * @return
3103  *   0 on success, a negative errno value otherwise and rte_errno is set.
3104  */
3105 int
3106 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
3107 				  uint64_t item_flags,
3108 				  struct rte_eth_dev *dev,
3109 				  struct rte_flow_error *error)
3110 {
3111 	struct mlx5_priv *priv = dev->data->dev_private;
3112 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
3113 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
3114 	int ret;
3115 	union vni {
3116 		uint32_t vlan_id;
3117 		uint8_t vni[4];
3118 	} id = { .vlan_id = 0, };
3119 
3120 	if (!priv->sh->config.l3_vxlan_en)
3121 		return rte_flow_error_set(error, ENOTSUP,
3122 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3123 					  "L3 VXLAN is not enabled by device"
3124 					  " parameter and/or not configured in"
3125 					  " firmware");
3126 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3127 		return rte_flow_error_set(error, ENOTSUP,
3128 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3129 					  "multiple tunnel layers not"
3130 					  " supported");
3131 	/*
3132 	 * Verify only UDPv4 is present as defined in
3133 	 * https://tools.ietf.org/html/rfc7348
3134 	 */
3135 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3136 		return rte_flow_error_set(error, EINVAL,
3137 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3138 					  "no outer UDP layer found");
3139 	if (!mask)
3140 		mask = &rte_flow_item_vxlan_gpe_mask;
3141 	ret = mlx5_flow_item_acceptable
3142 		(item, (const uint8_t *)mask,
3143 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
3144 		 sizeof(struct rte_flow_item_vxlan_gpe),
3145 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3146 	if (ret < 0)
3147 		return ret;
3148 	if (spec) {
3149 		if (spec->hdr.proto)
3150 			return rte_flow_error_set(error, ENOTSUP,
3151 						  RTE_FLOW_ERROR_TYPE_ITEM,
3152 						  item,
3153 						  "VxLAN-GPE protocol"
3154 						  " not supported");
3155 		memcpy(&id.vni[1], spec->hdr.vni, 3);
3156 		memcpy(&id.vni[1], mask->hdr.vni, 3);
3157 	}
3158 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3159 		return rte_flow_error_set(error, ENOTSUP,
3160 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3161 					  "VXLAN-GPE tunnel must be fully"
3162 					  " defined");
3163 	return 0;
3164 }
3165 /**
3166  * Validate GRE Key item.
3167  *
3168  * @param[in] item
3169  *   Item specification.
3170  * @param[in] item_flags
3171  *   Bit flags to mark detected items.
3172  * @param[in] gre_item
3173  *   Pointer to gre_item
3174  * @param[out] error
3175  *   Pointer to error structure.
3176  *
3177  * @return
3178  *   0 on success, a negative errno value otherwise and rte_errno is set.
3179  */
3180 int
3181 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
3182 				uint64_t item_flags,
3183 				const struct rte_flow_item *gre_item,
3184 				struct rte_flow_error *error)
3185 {
3186 	const rte_be32_t *mask = item->mask;
3187 	int ret = 0;
3188 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
3189 	const struct rte_flow_item_gre *gre_spec;
3190 	const struct rte_flow_item_gre *gre_mask;
3191 
3192 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
3193 		return rte_flow_error_set(error, ENOTSUP,
3194 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3195 					  "Multiple GRE key not support");
3196 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3197 		return rte_flow_error_set(error, ENOTSUP,
3198 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3199 					  "No preceding GRE header");
3200 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3201 		return rte_flow_error_set(error, ENOTSUP,
3202 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3203 					  "GRE key following a wrong item");
3204 	gre_mask = gre_item->mask;
3205 	if (!gre_mask)
3206 		gre_mask = &rte_flow_item_gre_mask;
3207 	gre_spec = gre_item->spec;
3208 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3209 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3210 		return rte_flow_error_set(error, EINVAL,
3211 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3212 					  "Key bit must be on");
3213 
3214 	if (!mask)
3215 		mask = &gre_key_default_mask;
3216 	ret = mlx5_flow_item_acceptable
3217 		(item, (const uint8_t *)mask,
3218 		 (const uint8_t *)&gre_key_default_mask,
3219 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3220 	return ret;
3221 }
3222 
3223 /**
3224  * Validate GRE optional item.
3225  *
3226  * @param[in] dev
3227  *   Pointer to the Ethernet device structure.
3228  * @param[in] item
3229  *   Item specification.
3230  * @param[in] item_flags
3231  *   Bit flags to mark detected items.
3232  * @param[in] attr
3233  *   Flow rule attributes.
3234  * @param[in] gre_item
3235  *   Pointer to gre_item
3236  * @param[out] error
3237  *   Pointer to error structure.
3238  *
3239  * @return
3240  *   0 on success, a negative errno value otherwise and rte_errno is set.
3241  */
3242 int
3243 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
3244 				   const struct rte_flow_item *item,
3245 				   uint64_t item_flags,
3246 				   const struct rte_flow_attr *attr,
3247 				   const struct rte_flow_item *gre_item,
3248 				   struct rte_flow_error *error)
3249 {
3250 	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
3251 	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
3252 	const struct rte_flow_item_gre_opt *spec = item->spec;
3253 	const struct rte_flow_item_gre_opt *mask = item->mask;
3254 	struct mlx5_priv *priv = dev->data->dev_private;
3255 	int ret = 0;
3256 	struct rte_flow_item_gre_opt nic_mask = {
3257 		.checksum_rsvd = {
3258 			.checksum = RTE_BE16(UINT16_MAX),
3259 			.reserved1 = 0x0,
3260 		},
3261 		.key = {
3262 			.key = RTE_BE32(UINT32_MAX),
3263 		},
3264 		.sequence = {
3265 			.sequence = RTE_BE32(UINT32_MAX),
3266 		},
3267 	};
3268 
3269 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3270 		return rte_flow_error_set(error, ENOTSUP,
3271 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3272 					  "No preceding GRE header");
3273 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3274 		return rte_flow_error_set(error, ENOTSUP,
3275 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3276 					  "GRE option following a wrong item");
3277 	if (!spec || !mask)
3278 		return rte_flow_error_set(error, EINVAL,
3279 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3280 					  "At least one field gre_option(checksum/key/sequence) must be specified");
3281 	if (!gre_mask)
3282 		gre_mask = &rte_flow_item_gre_mask;
3283 	if (mask->checksum_rsvd.checksum)
3284 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3285 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3286 			return rte_flow_error_set(error, EINVAL,
3287 						  RTE_FLOW_ERROR_TYPE_ITEM,
3288 						  item,
3289 						  "Checksum bit must be on");
3290 	if (mask->key.key)
3291 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3292 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3293 			return rte_flow_error_set(error, EINVAL,
3294 						  RTE_FLOW_ERROR_TYPE_ITEM,
3295 						  item, "Key bit must be on");
3296 	if (mask->sequence.sequence)
3297 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3298 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3299 			return rte_flow_error_set(error, EINVAL,
3300 						  RTE_FLOW_ERROR_TYPE_ITEM,
3301 						  item,
3302 						  "Sequence bit must be on");
3303 	if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3304 		if (priv->sh->steering_format_version ==
3305 		    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3306 		    ((attr->group || (attr->transfer && priv->fdb_def_rule)) &&
3307 		     !priv->sh->misc5_cap) ||
3308 		    (!(priv->sh->tunnel_header_0_1 &&
3309 		       priv->sh->tunnel_header_2_3) &&
3310 		    !attr->group && (!attr->transfer || !priv->fdb_def_rule)))
3311 			return rte_flow_error_set(error, EINVAL,
3312 						  RTE_FLOW_ERROR_TYPE_ITEM,
3313 						  item,
3314 						  "Checksum/Sequence not supported");
3315 	}
3316 	ret = mlx5_flow_item_acceptable
3317 		(item, (const uint8_t *)mask,
3318 		 (const uint8_t *)&nic_mask,
3319 		 sizeof(struct rte_flow_item_gre_opt),
3320 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3321 	return ret;
3322 }
3323 
3324 /**
3325  * Validate GRE item.
3326  *
3327  * @param[in] item
3328  *   Item specification.
3329  * @param[in] item_flags
3330  *   Bit flags to mark detected items.
3331  * @param[in] target_protocol
3332  *   The next protocol in the previous item.
3333  * @param[out] error
3334  *   Pointer to error structure.
3335  *
3336  * @return
3337  *   0 on success, a negative errno value otherwise and rte_errno is set.
3338  */
3339 int
3340 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3341 			    uint64_t item_flags,
3342 			    uint8_t target_protocol,
3343 			    struct rte_flow_error *error)
3344 {
3345 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3346 	const struct rte_flow_item_gre *mask = item->mask;
3347 	int ret;
3348 	const struct rte_flow_item_gre nic_mask = {
3349 		.c_rsvd0_ver = RTE_BE16(0xB000),
3350 		.protocol = RTE_BE16(UINT16_MAX),
3351 	};
3352 
3353 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3354 		return rte_flow_error_set(error, EINVAL,
3355 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3356 					  "protocol filtering not compatible"
3357 					  " with this GRE layer");
3358 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3359 		return rte_flow_error_set(error, ENOTSUP,
3360 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3361 					  "multiple tunnel layers not"
3362 					  " supported");
3363 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3364 		return rte_flow_error_set(error, ENOTSUP,
3365 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3366 					  "L3 Layer is missing");
3367 	if (!mask)
3368 		mask = &rte_flow_item_gre_mask;
3369 	ret = mlx5_flow_item_acceptable
3370 		(item, (const uint8_t *)mask,
3371 		 (const uint8_t *)&nic_mask,
3372 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3373 		 error);
3374 	if (ret < 0)
3375 		return ret;
3376 #ifndef HAVE_MLX5DV_DR
3377 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3378 	if (spec && (spec->protocol & mask->protocol))
3379 		return rte_flow_error_set(error, ENOTSUP,
3380 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3381 					  "without MPLS support the"
3382 					  " specification cannot be used for"
3383 					  " filtering");
3384 #endif
3385 #endif
3386 	return 0;
3387 }
3388 
3389 /**
3390  * Validate Geneve item.
3391  *
3392  * @param[in] item
3393  *   Item specification.
3394  * @param[in] itemFlags
3395  *   Bit-fields that holds the items detected until now.
3396  * @param[in] enPriv
3397  *   Pointer to the private data structure.
3398  * @param[out] error
3399  *   Pointer to error structure.
3400  *
3401  * @return
3402  *   0 on success, a negative errno value otherwise and rte_errno is set.
3403  */
3404 
3405 int
3406 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3407 			       uint64_t item_flags,
3408 			       struct rte_eth_dev *dev,
3409 			       struct rte_flow_error *error)
3410 {
3411 	struct mlx5_priv *priv = dev->data->dev_private;
3412 	const struct rte_flow_item_geneve *spec = item->spec;
3413 	const struct rte_flow_item_geneve *mask = item->mask;
3414 	int ret;
3415 	uint16_t gbhdr;
3416 	uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3417 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3418 	const struct rte_flow_item_geneve nic_mask = {
3419 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3420 		.vni = "\xff\xff\xff",
3421 		.protocol = RTE_BE16(UINT16_MAX),
3422 	};
3423 
3424 	if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3425 		return rte_flow_error_set(error, ENOTSUP,
3426 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3427 					  "L3 Geneve is not enabled by device"
3428 					  " parameter and/or not configured in"
3429 					  " firmware");
3430 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3431 		return rte_flow_error_set(error, ENOTSUP,
3432 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3433 					  "multiple tunnel layers not"
3434 					  " supported");
3435 	/*
3436 	 * Verify only UDPv4 is present as defined in
3437 	 * https://tools.ietf.org/html/rfc7348
3438 	 */
3439 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3440 		return rte_flow_error_set(error, EINVAL,
3441 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3442 					  "no outer UDP layer found");
3443 	if (!mask)
3444 		mask = &rte_flow_item_geneve_mask;
3445 	ret = mlx5_flow_item_acceptable
3446 				  (item, (const uint8_t *)mask,
3447 				   (const uint8_t *)&nic_mask,
3448 				   sizeof(struct rte_flow_item_geneve),
3449 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3450 	if (ret)
3451 		return ret;
3452 	if (spec) {
3453 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3454 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3455 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3456 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3457 			return rte_flow_error_set(error, ENOTSUP,
3458 						  RTE_FLOW_ERROR_TYPE_ITEM,
3459 						  item,
3460 						  "Geneve protocol unsupported"
3461 						  " fields are being used");
3462 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3463 			return rte_flow_error_set
3464 					(error, ENOTSUP,
3465 					 RTE_FLOW_ERROR_TYPE_ITEM,
3466 					 item,
3467 					 "Unsupported Geneve options length");
3468 	}
3469 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3470 		return rte_flow_error_set
3471 				    (error, ENOTSUP,
3472 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
3473 				     "Geneve tunnel must be fully defined");
3474 	return 0;
3475 }
3476 
3477 /**
3478  * Validate Geneve TLV option item.
3479  *
3480  * @param[in] item
3481  *   Item specification.
3482  * @param[in] last_item
3483  *   Previous validated item in the pattern items.
3484  * @param[in] geneve_item
3485  *   Previous GENEVE item specification.
3486  * @param[in] dev
3487  *   Pointer to the rte_eth_dev structure.
3488  * @param[out] error
3489  *   Pointer to error structure.
3490  *
3491  * @return
3492  *   0 on success, a negative errno value otherwise and rte_errno is set.
3493  */
3494 int
3495 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3496 				   uint64_t last_item,
3497 				   const struct rte_flow_item *geneve_item,
3498 				   struct rte_eth_dev *dev,
3499 				   struct rte_flow_error *error)
3500 {
3501 	struct mlx5_priv *priv = dev->data->dev_private;
3502 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3503 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3504 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3505 	uint8_t data_max_supported =
3506 			hca_attr->max_geneve_tlv_option_data_len * 4;
3507 	const struct rte_flow_item_geneve *geneve_spec;
3508 	const struct rte_flow_item_geneve *geneve_mask;
3509 	const struct rte_flow_item_geneve_opt *spec = item->spec;
3510 	const struct rte_flow_item_geneve_opt *mask = item->mask;
3511 	unsigned int i;
3512 	unsigned int data_len;
3513 	uint8_t tlv_option_len;
3514 	uint16_t optlen_m, optlen_v;
3515 	const struct rte_flow_item_geneve_opt full_mask = {
3516 		.option_class = RTE_BE16(0xffff),
3517 		.option_type = 0xff,
3518 		.option_len = 0x1f,
3519 	};
3520 
3521 	if (!mask)
3522 		mask = &rte_flow_item_geneve_opt_mask;
3523 	if (!spec)
3524 		return rte_flow_error_set
3525 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3526 			"Geneve TLV opt class/type/length must be specified");
3527 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3528 		return rte_flow_error_set
3529 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3530 			"Geneve TLV opt length exceeds the limit (31)");
3531 	/* Check if class type and length masks are full. */
3532 	if (full_mask.option_class != mask->option_class ||
3533 	    full_mask.option_type != mask->option_type ||
3534 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
3535 		return rte_flow_error_set
3536 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3537 			"Geneve TLV opt class/type/length masks must be full");
3538 	/* Check if length is supported */
3539 	if ((uint32_t)spec->option_len >
3540 			hca_attr->max_geneve_tlv_option_data_len)
3541 		return rte_flow_error_set
3542 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3543 			"Geneve TLV opt length not supported");
3544 	if (hca_attr->max_geneve_tlv_options > 1)
3545 		DRV_LOG(DEBUG,
3546 			"max_geneve_tlv_options supports more than 1 option");
3547 	/* Check GENEVE item preceding. */
3548 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3549 		return rte_flow_error_set
3550 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3551 			"Geneve opt item must be preceded with Geneve item");
3552 	geneve_spec = geneve_item->spec;
3553 	geneve_mask = geneve_item->mask ? geneve_item->mask :
3554 					  &rte_flow_item_geneve_mask;
3555 	/* Check if GENEVE TLV option size doesn't exceed option length */
3556 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3557 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
3558 		tlv_option_len = spec->option_len & mask->option_len;
3559 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3560 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3561 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3562 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3563 		if ((optlen_v & optlen_m) <= tlv_option_len)
3564 			return rte_flow_error_set
3565 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3566 				 "GENEVE TLV option length exceeds optlen");
3567 	}
3568 	/* Check if length is 0 or data is 0. */
3569 	if (spec->data == NULL || spec->option_len == 0)
3570 		return rte_flow_error_set
3571 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3572 			"Geneve TLV opt with zero data/length not supported");
3573 	/* Check not all data & mask are 0. */
3574 	data_len = spec->option_len * 4;
3575 	if (mask->data == NULL) {
3576 		for (i = 0; i < data_len; i++)
3577 			if (spec->data[i])
3578 				break;
3579 		if (i == data_len)
3580 			return rte_flow_error_set(error, ENOTSUP,
3581 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3582 				"Can't match on Geneve option data 0");
3583 	} else {
3584 		for (i = 0; i < data_len; i++)
3585 			if (spec->data[i] & mask->data[i])
3586 				break;
3587 		if (i == data_len)
3588 			return rte_flow_error_set(error, ENOTSUP,
3589 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3590 				"Can't match on Geneve option data and mask 0");
3591 		/* Check data mask supported. */
3592 		for (i = data_max_supported; i < data_len ; i++)
3593 			if (mask->data[i])
3594 				return rte_flow_error_set(error, ENOTSUP,
3595 					RTE_FLOW_ERROR_TYPE_ITEM, item,
3596 					"Data mask is of unsupported size");
3597 	}
3598 	/* Check GENEVE option is supported in NIC. */
3599 	if (!hca_attr->geneve_tlv_opt)
3600 		return rte_flow_error_set
3601 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3602 			"Geneve TLV opt not supported");
3603 	/* Check if we already have geneve option with different type/class. */
3604 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3605 	geneve_opt_resource = sh->geneve_tlv_option_resource;
3606 	if (geneve_opt_resource != NULL)
3607 		if (geneve_opt_resource->option_class != spec->option_class ||
3608 		    geneve_opt_resource->option_type != spec->option_type ||
3609 		    geneve_opt_resource->length != spec->option_len) {
3610 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3611 			return rte_flow_error_set(error, ENOTSUP,
3612 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3613 				"Only one Geneve TLV option supported");
3614 		}
3615 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3616 	return 0;
3617 }
3618 
3619 /**
3620  * Validate MPLS item.
3621  *
3622  * @param[in] dev
3623  *   Pointer to the rte_eth_dev structure.
3624  * @param[in] item
3625  *   Item specification.
3626  * @param[in] item_flags
3627  *   Bit-fields that holds the items detected until now.
3628  * @param[in] prev_layer
3629  *   The protocol layer indicated in previous item.
3630  * @param[out] error
3631  *   Pointer to error structure.
3632  *
3633  * @return
3634  *   0 on success, a negative errno value otherwise and rte_errno is set.
3635  */
3636 int
3637 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3638 			     const struct rte_flow_item *item __rte_unused,
3639 			     uint64_t item_flags __rte_unused,
3640 			     uint64_t prev_layer __rte_unused,
3641 			     struct rte_flow_error *error)
3642 {
3643 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3644 	const struct rte_flow_item_mpls *mask = item->mask;
3645 	struct mlx5_priv *priv = dev->data->dev_private;
3646 	int ret;
3647 
3648 	if (!priv->sh->dev_cap.mpls_en)
3649 		return rte_flow_error_set(error, ENOTSUP,
3650 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3651 					  "MPLS not supported or"
3652 					  " disabled in firmware"
3653 					  " configuration.");
3654 	/* MPLS over UDP, GRE is allowed */
3655 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3656 			    MLX5_FLOW_LAYER_GRE |
3657 			    MLX5_FLOW_LAYER_GRE_KEY)))
3658 		return rte_flow_error_set(error, EINVAL,
3659 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3660 					  "protocol filtering not compatible"
3661 					  " with MPLS layer");
3662 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3663 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3664 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
3665 		return rte_flow_error_set(error, ENOTSUP,
3666 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3667 					  "multiple tunnel layers not"
3668 					  " supported");
3669 	if (!mask)
3670 		mask = &rte_flow_item_mpls_mask;
3671 	ret = mlx5_flow_item_acceptable
3672 		(item, (const uint8_t *)mask,
3673 		 (const uint8_t *)&rte_flow_item_mpls_mask,
3674 		 sizeof(struct rte_flow_item_mpls),
3675 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3676 	if (ret < 0)
3677 		return ret;
3678 	return 0;
3679 #else
3680 	return rte_flow_error_set(error, ENOTSUP,
3681 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
3682 				  "MPLS is not supported by Verbs, please"
3683 				  " update.");
3684 #endif
3685 }
3686 
3687 /**
3688  * Validate NVGRE item.
3689  *
3690  * @param[in] item
3691  *   Item specification.
3692  * @param[in] item_flags
3693  *   Bit flags to mark detected items.
3694  * @param[in] target_protocol
3695  *   The next protocol in the previous item.
3696  * @param[out] error
3697  *   Pointer to error structure.
3698  *
3699  * @return
3700  *   0 on success, a negative errno value otherwise and rte_errno is set.
3701  */
3702 int
3703 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3704 			      uint64_t item_flags,
3705 			      uint8_t target_protocol,
3706 			      struct rte_flow_error *error)
3707 {
3708 	const struct rte_flow_item_nvgre *mask = item->mask;
3709 	int ret;
3710 
3711 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3712 		return rte_flow_error_set(error, EINVAL,
3713 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3714 					  "protocol filtering not compatible"
3715 					  " with this GRE layer");
3716 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3717 		return rte_flow_error_set(error, ENOTSUP,
3718 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3719 					  "multiple tunnel layers not"
3720 					  " supported");
3721 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3722 		return rte_flow_error_set(error, ENOTSUP,
3723 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3724 					  "L3 Layer is missing");
3725 	if (!mask)
3726 		mask = &rte_flow_item_nvgre_mask;
3727 	ret = mlx5_flow_item_acceptable
3728 		(item, (const uint8_t *)mask,
3729 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
3730 		 sizeof(struct rte_flow_item_nvgre),
3731 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3732 	if (ret < 0)
3733 		return ret;
3734 	return 0;
3735 }
3736 
3737 /**
3738  * Validate eCPRI item.
3739  *
3740  * @param[in] item
3741  *   Item specification.
3742  * @param[in] item_flags
3743  *   Bit-fields that holds the items detected until now.
3744  * @param[in] last_item
3745  *   Previous validated item in the pattern items.
3746  * @param[in] ether_type
3747  *   Type in the ethernet layer header (including dot1q).
3748  * @param[in] acc_mask
3749  *   Acceptable mask, if NULL default internal default mask
3750  *   will be used to check whether item fields are supported.
3751  * @param[out] error
3752  *   Pointer to error structure.
3753  *
3754  * @return
3755  *   0 on success, a negative errno value otherwise and rte_errno is set.
3756  */
3757 int
3758 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3759 			      uint64_t item_flags,
3760 			      uint64_t last_item,
3761 			      uint16_t ether_type,
3762 			      const struct rte_flow_item_ecpri *acc_mask,
3763 			      struct rte_flow_error *error)
3764 {
3765 	const struct rte_flow_item_ecpri *mask = item->mask;
3766 	const struct rte_flow_item_ecpri nic_mask = {
3767 		.hdr = {
3768 			.common = {
3769 				.u32 =
3770 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
3771 					.type = 0xFF,
3772 					}).u32),
3773 			},
3774 			.dummy[0] = 0xFFFFFFFF,
3775 		},
3776 	};
3777 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3778 					MLX5_FLOW_LAYER_OUTER_VLAN);
3779 	struct rte_flow_item_ecpri mask_lo;
3780 
3781 	if (!(last_item & outer_l2_vlan) &&
3782 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3783 		return rte_flow_error_set(error, EINVAL,
3784 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3785 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
3786 	if ((last_item & outer_l2_vlan) && ether_type &&
3787 	    ether_type != RTE_ETHER_TYPE_ECPRI)
3788 		return rte_flow_error_set(error, EINVAL,
3789 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3790 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3791 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3792 		return rte_flow_error_set(error, EINVAL,
3793 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3794 					  "eCPRI with tunnel is not supported right now");
3795 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3796 		return rte_flow_error_set(error, ENOTSUP,
3797 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3798 					  "multiple L3 layers not supported");
3799 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3800 		return rte_flow_error_set(error, EINVAL,
3801 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3802 					  "eCPRI cannot coexist with a TCP layer");
3803 	/* In specification, eCPRI could be over UDP layer. */
3804 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3805 		return rte_flow_error_set(error, EINVAL,
3806 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3807 					  "eCPRI over UDP layer is not yet supported right now");
3808 	/* Mask for type field in common header could be zero. */
3809 	if (!mask)
3810 		mask = &rte_flow_item_ecpri_mask;
3811 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3812 	/* Input mask is in big-endian format. */
3813 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3814 		return rte_flow_error_set(error, EINVAL,
3815 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3816 					  "partial mask is not supported for protocol");
3817 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3818 		return rte_flow_error_set(error, EINVAL,
3819 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3820 					  "message header mask must be after a type mask");
3821 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3822 					 acc_mask ? (const uint8_t *)acc_mask
3823 						  : (const uint8_t *)&nic_mask,
3824 					 sizeof(struct rte_flow_item_ecpri),
3825 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3826 }
3827 
3828 static int
3829 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3830 		   const struct rte_flow_attr *attr __rte_unused,
3831 		   const struct rte_flow_item items[] __rte_unused,
3832 		   const struct rte_flow_action actions[] __rte_unused,
3833 		   bool external __rte_unused,
3834 		   int hairpin __rte_unused,
3835 		   struct rte_flow_error *error)
3836 {
3837 	return rte_flow_error_set(error, ENOTSUP,
3838 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3839 }
3840 
3841 static struct mlx5_flow *
3842 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3843 		  const struct rte_flow_attr *attr __rte_unused,
3844 		  const struct rte_flow_item items[] __rte_unused,
3845 		  const struct rte_flow_action actions[] __rte_unused,
3846 		  struct rte_flow_error *error)
3847 {
3848 	rte_flow_error_set(error, ENOTSUP,
3849 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3850 	return NULL;
3851 }
3852 
3853 static int
3854 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3855 		    struct mlx5_flow *dev_flow __rte_unused,
3856 		    const struct rte_flow_attr *attr __rte_unused,
3857 		    const struct rte_flow_item items[] __rte_unused,
3858 		    const struct rte_flow_action actions[] __rte_unused,
3859 		    struct rte_flow_error *error)
3860 {
3861 	return rte_flow_error_set(error, ENOTSUP,
3862 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3863 }
3864 
3865 static int
3866 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3867 		struct rte_flow *flow __rte_unused,
3868 		struct rte_flow_error *error)
3869 {
3870 	return rte_flow_error_set(error, ENOTSUP,
3871 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3872 }
3873 
3874 static void
3875 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3876 		 struct rte_flow *flow __rte_unused)
3877 {
3878 }
3879 
3880 static void
3881 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3882 		  struct rte_flow *flow __rte_unused)
3883 {
3884 }
3885 
3886 static int
3887 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3888 		struct rte_flow *flow __rte_unused,
3889 		const struct rte_flow_action *actions __rte_unused,
3890 		void *data __rte_unused,
3891 		struct rte_flow_error *error)
3892 {
3893 	return rte_flow_error_set(error, ENOTSUP,
3894 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3895 }
3896 
3897 static int
3898 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3899 		      uint32_t domains __rte_unused,
3900 		      uint32_t flags __rte_unused)
3901 {
3902 	return 0;
3903 }
3904 
3905 int
3906 flow_null_get_aged_flows(struct rte_eth_dev *dev,
3907 		    void **context __rte_unused,
3908 		    uint32_t nb_contexts __rte_unused,
3909 		    struct rte_flow_error *error __rte_unused)
3910 {
3911 	DRV_LOG(ERR, "port %u get aged flows is not supported.",
3912 		dev->data->port_id);
3913 	return -ENOTSUP;
3914 }
3915 
3916 uint32_t
3917 flow_null_counter_allocate(struct rte_eth_dev *dev)
3918 {
3919 	DRV_LOG(ERR, "port %u counter allocate is not supported.",
3920 		dev->data->port_id);
3921 	return 0;
3922 }
3923 
3924 void
3925 flow_null_counter_free(struct rte_eth_dev *dev,
3926 			uint32_t counter __rte_unused)
3927 {
3928 	DRV_LOG(ERR, "port %u counter free is not supported.",
3929 		 dev->data->port_id);
3930 }
3931 
3932 int
3933 flow_null_counter_query(struct rte_eth_dev *dev,
3934 			uint32_t counter __rte_unused,
3935 			bool clear __rte_unused,
3936 			uint64_t *pkts __rte_unused,
3937 			uint64_t *bytes __rte_unused,
3938 			void **action __rte_unused)
3939 {
3940 	DRV_LOG(ERR, "port %u counter query is not supported.",
3941 		 dev->data->port_id);
3942 	return -ENOTSUP;
3943 }
3944 
3945 /* Void driver to protect from null pointer reference. */
3946 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3947 	.validate = flow_null_validate,
3948 	.prepare = flow_null_prepare,
3949 	.translate = flow_null_translate,
3950 	.apply = flow_null_apply,
3951 	.remove = flow_null_remove,
3952 	.destroy = flow_null_destroy,
3953 	.query = flow_null_query,
3954 	.sync_domain = flow_null_sync_domain,
3955 	.get_aged_flows = flow_null_get_aged_flows,
3956 	.counter_alloc = flow_null_counter_allocate,
3957 	.counter_free = flow_null_counter_free,
3958 	.counter_query = flow_null_counter_query
3959 };
3960 
3961 /**
3962  * Select flow driver type according to flow attributes and device
3963  * configuration.
3964  *
3965  * @param[in] dev
3966  *   Pointer to the dev structure.
3967  * @param[in] attr
3968  *   Pointer to the flow attributes.
3969  *
3970  * @return
3971  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3972  */
3973 static enum mlx5_flow_drv_type
3974 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3975 {
3976 	struct mlx5_priv *priv = dev->data->dev_private;
3977 	/* The OS can determine first a specific flow type (DV, VERBS) */
3978 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3979 
3980 	if (type != MLX5_FLOW_TYPE_MAX)
3981 		return type;
3982 	/*
3983 	 * Currently when dv_flow_en == 2, only HW steering engine is
3984 	 * supported. New engines can also be chosen here if ready.
3985 	 */
3986 	if (priv->sh->config.dv_flow_en == 2)
3987 		return MLX5_FLOW_TYPE_HW;
3988 	if (!attr)
3989 		return MLX5_FLOW_TYPE_MIN;
3990 	/* If no OS specific type - continue with DV/VERBS selection */
3991 	if (attr->transfer && priv->sh->config.dv_esw_en)
3992 		type = MLX5_FLOW_TYPE_DV;
3993 	if (!attr->transfer)
3994 		type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3995 						     MLX5_FLOW_TYPE_VERBS;
3996 	return type;
3997 }
3998 
3999 #define flow_get_drv_ops(type) flow_drv_ops[type]
4000 
4001 /**
4002  * Flow driver validation API. This abstracts calling driver specific functions.
4003  * The type of flow driver is determined according to flow attributes.
4004  *
4005  * @param[in] dev
4006  *   Pointer to the dev structure.
4007  * @param[in] attr
4008  *   Pointer to the flow attributes.
4009  * @param[in] items
4010  *   Pointer to the list of items.
4011  * @param[in] actions
4012  *   Pointer to the list of actions.
4013  * @param[in] external
4014  *   This flow rule is created by request external to PMD.
4015  * @param[in] hairpin
4016  *   Number of hairpin TX actions, 0 means classic flow.
4017  * @param[out] error
4018  *   Pointer to the error structure.
4019  *
4020  * @return
4021  *   0 on success, a negative errno value otherwise and rte_errno is set.
4022  */
4023 static inline int
4024 flow_drv_validate(struct rte_eth_dev *dev,
4025 		  const struct rte_flow_attr *attr,
4026 		  const struct rte_flow_item items[],
4027 		  const struct rte_flow_action actions[],
4028 		  bool external, int hairpin, struct rte_flow_error *error)
4029 {
4030 	const struct mlx5_flow_driver_ops *fops;
4031 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
4032 
4033 	fops = flow_get_drv_ops(type);
4034 	return fops->validate(dev, attr, items, actions, external,
4035 			      hairpin, error);
4036 }
4037 
4038 /**
4039  * Flow driver preparation API. This abstracts calling driver specific
4040  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
4041  * calculates the size of memory required for device flow, allocates the memory,
4042  * initializes the device flow and returns the pointer.
4043  *
4044  * @note
4045  *   This function initializes device flow structure such as dv or verbs in
4046  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
4047  *   rest. For example, adding returning device flow to flow->dev_flow list and
4048  *   setting backward reference to the flow should be done out of this function.
4049  *   layers field is not filled either.
4050  *
4051  * @param[in] dev
4052  *   Pointer to the dev structure.
4053  * @param[in] attr
4054  *   Pointer to the flow attributes.
4055  * @param[in] items
4056  *   Pointer to the list of items.
4057  * @param[in] actions
4058  *   Pointer to the list of actions.
4059  * @param[in] flow_idx
4060  *   This memory pool index to the flow.
4061  * @param[out] error
4062  *   Pointer to the error structure.
4063  *
4064  * @return
4065  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
4066  */
4067 static inline struct mlx5_flow *
4068 flow_drv_prepare(struct rte_eth_dev *dev,
4069 		 const struct rte_flow *flow,
4070 		 const struct rte_flow_attr *attr,
4071 		 const struct rte_flow_item items[],
4072 		 const struct rte_flow_action actions[],
4073 		 uint32_t flow_idx,
4074 		 struct rte_flow_error *error)
4075 {
4076 	const struct mlx5_flow_driver_ops *fops;
4077 	enum mlx5_flow_drv_type type = flow->drv_type;
4078 	struct mlx5_flow *mlx5_flow = NULL;
4079 
4080 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4081 	fops = flow_get_drv_ops(type);
4082 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
4083 	if (mlx5_flow)
4084 		mlx5_flow->flow_idx = flow_idx;
4085 	return mlx5_flow;
4086 }
4087 
4088 /**
4089  * Flow driver translation API. This abstracts calling driver specific
4090  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
4091  * translates a generic flow into a driver flow. flow_drv_prepare() must
4092  * precede.
4093  *
4094  * @note
4095  *   dev_flow->layers could be filled as a result of parsing during translation
4096  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
4097  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
4098  *   flow->actions could be overwritten even though all the expanded dev_flows
4099  *   have the same actions.
4100  *
4101  * @param[in] dev
4102  *   Pointer to the rte dev structure.
4103  * @param[in, out] dev_flow
4104  *   Pointer to the mlx5 flow.
4105  * @param[in] attr
4106  *   Pointer to the flow attributes.
4107  * @param[in] items
4108  *   Pointer to the list of items.
4109  * @param[in] actions
4110  *   Pointer to the list of actions.
4111  * @param[out] error
4112  *   Pointer to the error structure.
4113  *
4114  * @return
4115  *   0 on success, a negative errno value otherwise and rte_errno is set.
4116  */
4117 static inline int
4118 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
4119 		   const struct rte_flow_attr *attr,
4120 		   const struct rte_flow_item items[],
4121 		   const struct rte_flow_action actions[],
4122 		   struct rte_flow_error *error)
4123 {
4124 	const struct mlx5_flow_driver_ops *fops;
4125 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
4126 
4127 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4128 	fops = flow_get_drv_ops(type);
4129 	return fops->translate(dev, dev_flow, attr, items, actions, error);
4130 }
4131 
4132 /**
4133  * Flow driver apply API. This abstracts calling driver specific functions.
4134  * Parent flow (rte_flow) should have driver type (drv_type). It applies
4135  * translated driver flows on to device. flow_drv_translate() must precede.
4136  *
4137  * @param[in] dev
4138  *   Pointer to Ethernet device structure.
4139  * @param[in, out] flow
4140  *   Pointer to flow structure.
4141  * @param[out] error
4142  *   Pointer to error structure.
4143  *
4144  * @return
4145  *   0 on success, a negative errno value otherwise and rte_errno is set.
4146  */
4147 static inline int
4148 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
4149 	       struct rte_flow_error *error)
4150 {
4151 	const struct mlx5_flow_driver_ops *fops;
4152 	enum mlx5_flow_drv_type type = flow->drv_type;
4153 
4154 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4155 	fops = flow_get_drv_ops(type);
4156 	return fops->apply(dev, flow, error);
4157 }
4158 
4159 /**
4160  * Flow driver destroy API. This abstracts calling driver specific functions.
4161  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
4162  * on device and releases resources of the flow.
4163  *
4164  * @param[in] dev
4165  *   Pointer to Ethernet device.
4166  * @param[in, out] flow
4167  *   Pointer to flow structure.
4168  */
4169 static inline void
4170 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
4171 {
4172 	const struct mlx5_flow_driver_ops *fops;
4173 	enum mlx5_flow_drv_type type = flow->drv_type;
4174 
4175 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4176 	fops = flow_get_drv_ops(type);
4177 	fops->destroy(dev, flow);
4178 }
4179 
4180 /**
4181  * Flow driver find RSS policy tbl API. This abstracts calling driver
4182  * specific functions. Parent flow (rte_flow) should have driver
4183  * type (drv_type). It will find the RSS policy table that has the rss_desc.
4184  *
4185  * @param[in] dev
4186  *   Pointer to Ethernet device.
4187  * @param[in, out] flow
4188  *   Pointer to flow structure.
4189  * @param[in] policy
4190  *   Pointer to meter policy table.
4191  * @param[in] rss_desc
4192  *   Pointer to rss_desc
4193  */
4194 static struct mlx5_flow_meter_sub_policy *
4195 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
4196 		struct rte_flow *flow,
4197 		struct mlx5_flow_meter_policy *policy,
4198 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
4199 {
4200 	const struct mlx5_flow_driver_ops *fops;
4201 	enum mlx5_flow_drv_type type = flow->drv_type;
4202 
4203 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4204 	fops = flow_get_drv_ops(type);
4205 	return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
4206 }
4207 
4208 /**
4209  * Flow driver color tag rule API. This abstracts calling driver
4210  * specific functions. Parent flow (rte_flow) should have driver
4211  * type (drv_type). It will create the color tag rules in hierarchy meter.
4212  *
4213  * @param[in] dev
4214  *   Pointer to Ethernet device.
4215  * @param[in, out] flow
4216  *   Pointer to flow structure.
4217  * @param[in] fm
4218  *   Pointer to flow meter structure.
4219  * @param[in] src_port
4220  *   The src port this extra rule should use.
4221  * @param[in] item
4222  *   The src port id match item.
4223  * @param[out] error
4224  *   Pointer to error structure.
4225  */
4226 static int
4227 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
4228 		struct rte_flow *flow,
4229 		struct mlx5_flow_meter_info *fm,
4230 		int32_t src_port,
4231 		const struct rte_flow_item *item,
4232 		struct rte_flow_error *error)
4233 {
4234 	const struct mlx5_flow_driver_ops *fops;
4235 	enum mlx5_flow_drv_type type = flow->drv_type;
4236 
4237 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4238 	fops = flow_get_drv_ops(type);
4239 	return fops->meter_hierarchy_rule_create(dev, fm,
4240 						src_port, item, error);
4241 }
4242 
4243 /**
4244  * Get RSS action from the action list.
4245  *
4246  * @param[in] dev
4247  *   Pointer to Ethernet device.
4248  * @param[in] actions
4249  *   Pointer to the list of actions.
4250  * @param[in] flow
4251  *   Parent flow structure pointer.
4252  *
4253  * @return
4254  *   Pointer to the RSS action if exist, else return NULL.
4255  */
4256 static const struct rte_flow_action_rss*
4257 flow_get_rss_action(struct rte_eth_dev *dev,
4258 		    const struct rte_flow_action actions[])
4259 {
4260 	struct mlx5_priv *priv = dev->data->dev_private;
4261 	const struct rte_flow_action_rss *rss = NULL;
4262 	struct mlx5_meter_policy_action_container *acg;
4263 	struct mlx5_meter_policy_action_container *acy;
4264 
4265 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4266 		switch (actions->type) {
4267 		case RTE_FLOW_ACTION_TYPE_RSS:
4268 			rss = actions->conf;
4269 			break;
4270 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
4271 		{
4272 			const struct rte_flow_action_sample *sample =
4273 								actions->conf;
4274 			const struct rte_flow_action *act = sample->actions;
4275 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
4276 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
4277 					rss = act->conf;
4278 			break;
4279 		}
4280 		case RTE_FLOW_ACTION_TYPE_METER:
4281 		{
4282 			uint32_t mtr_idx;
4283 			struct mlx5_flow_meter_info *fm;
4284 			struct mlx5_flow_meter_policy *policy;
4285 			const struct rte_flow_action_meter *mtr = actions->conf;
4286 
4287 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
4288 			if (fm && !fm->def_policy) {
4289 				policy = mlx5_flow_meter_policy_find(dev,
4290 						fm->policy_id, NULL);
4291 				MLX5_ASSERT(policy);
4292 				if (policy->is_hierarchy) {
4293 					policy =
4294 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
4295 									policy);
4296 					if (!policy)
4297 						return NULL;
4298 				}
4299 				if (policy->is_rss) {
4300 					acg =
4301 					&policy->act_cnt[RTE_COLOR_GREEN];
4302 					acy =
4303 					&policy->act_cnt[RTE_COLOR_YELLOW];
4304 					if (acg->fate_action ==
4305 					    MLX5_FLOW_FATE_SHARED_RSS)
4306 						rss = acg->rss->conf;
4307 					else if (acy->fate_action ==
4308 						 MLX5_FLOW_FATE_SHARED_RSS)
4309 						rss = acy->rss->conf;
4310 				}
4311 			}
4312 			break;
4313 		}
4314 		default:
4315 			break;
4316 		}
4317 	}
4318 	return rss;
4319 }
4320 
4321 /**
4322  * Get ASO age action by index.
4323  *
4324  * @param[in] dev
4325  *   Pointer to the Ethernet device structure.
4326  * @param[in] age_idx
4327  *   Index to the ASO age action.
4328  *
4329  * @return
4330  *   The specified ASO age action.
4331  */
4332 struct mlx5_aso_age_action*
4333 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4334 {
4335 	uint16_t pool_idx = age_idx & UINT16_MAX;
4336 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4337 	struct mlx5_priv *priv = dev->data->dev_private;
4338 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4339 	struct mlx5_aso_age_pool *pool;
4340 
4341 	rte_rwlock_read_lock(&mng->resize_rwl);
4342 	pool = mng->pools[pool_idx];
4343 	rte_rwlock_read_unlock(&mng->resize_rwl);
4344 	return &pool->actions[offset - 1];
4345 }
4346 
4347 /* maps indirect action to translated direct in some actions array */
4348 struct mlx5_translated_action_handle {
4349 	struct rte_flow_action_handle *action; /**< Indirect action handle. */
4350 	int index; /**< Index in related array of rte_flow_action. */
4351 };
4352 
4353 /**
4354  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4355  * direct action if translation possible.
4356  * This functionality used to run same execution path for both direct and
4357  * indirect actions on flow create. All necessary preparations for indirect
4358  * action handling should be performed on *handle* actions list returned
4359  * from this call.
4360  *
4361  * @param[in] dev
4362  *   Pointer to Ethernet device.
4363  * @param[in] actions
4364  *   List of actions to translate.
4365  * @param[out] handle
4366  *   List to store translated indirect action object handles.
4367  * @param[in, out] indir_n
4368  *   Size of *handle* array. On return should be updated with number of
4369  *   indirect actions retrieved from the *actions* list.
4370  * @param[out] translated_actions
4371  *   List of actions where all indirect actions were translated to direct
4372  *   if possible. NULL if no translation took place.
4373  * @param[out] error
4374  *   Pointer to the error structure.
4375  *
4376  * @return
4377  *   0 on success, a negative errno value otherwise and rte_errno is set.
4378  */
4379 static int
4380 flow_action_handles_translate(struct rte_eth_dev *dev,
4381 			      const struct rte_flow_action actions[],
4382 			      struct mlx5_translated_action_handle *handle,
4383 			      int *indir_n,
4384 			      struct rte_flow_action **translated_actions,
4385 			      struct rte_flow_error *error)
4386 {
4387 	struct mlx5_priv *priv = dev->data->dev_private;
4388 	struct rte_flow_action *translated = NULL;
4389 	size_t actions_size;
4390 	int n;
4391 	int copied_n = 0;
4392 	struct mlx5_translated_action_handle *handle_end = NULL;
4393 
4394 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4395 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4396 			continue;
4397 		if (copied_n == *indir_n) {
4398 			return rte_flow_error_set
4399 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4400 				 NULL, "too many shared actions");
4401 		}
4402 		rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4403 			   sizeof(actions[n].conf));
4404 		handle[copied_n].index = n;
4405 		copied_n++;
4406 	}
4407 	n++;
4408 	*indir_n = copied_n;
4409 	if (!copied_n)
4410 		return 0;
4411 	actions_size = sizeof(struct rte_flow_action) * n;
4412 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4413 	if (!translated) {
4414 		rte_errno = ENOMEM;
4415 		return -ENOMEM;
4416 	}
4417 	memcpy(translated, actions, actions_size);
4418 	for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4419 		struct mlx5_shared_action_rss *shared_rss;
4420 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4421 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4422 		uint32_t idx = act_idx &
4423 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4424 
4425 		switch (type) {
4426 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4427 			shared_rss = mlx5_ipool_get
4428 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4429 			translated[handle->index].type =
4430 				RTE_FLOW_ACTION_TYPE_RSS;
4431 			translated[handle->index].conf =
4432 				&shared_rss->origin;
4433 			break;
4434 		case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4435 			translated[handle->index].type =
4436 						(enum rte_flow_action_type)
4437 						MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4438 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4439 			break;
4440 		case MLX5_INDIRECT_ACTION_TYPE_METER_MARK:
4441 			translated[handle->index].type =
4442 						(enum rte_flow_action_type)
4443 						MLX5_RTE_FLOW_ACTION_TYPE_METER_MARK;
4444 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4445 			break;
4446 		case MLX5_INDIRECT_ACTION_TYPE_AGE:
4447 			if (priv->sh->flow_hit_aso_en) {
4448 				translated[handle->index].type =
4449 					(enum rte_flow_action_type)
4450 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4451 				translated[handle->index].conf =
4452 							 (void *)(uintptr_t)idx;
4453 				break;
4454 			}
4455 			/* Fall-through */
4456 		case MLX5_INDIRECT_ACTION_TYPE_CT:
4457 			if (priv->sh->ct_aso_en) {
4458 				translated[handle->index].type =
4459 					RTE_FLOW_ACTION_TYPE_CONNTRACK;
4460 				translated[handle->index].conf =
4461 							 (void *)(uintptr_t)idx;
4462 				break;
4463 			}
4464 			/* Fall-through */
4465 		default:
4466 			mlx5_free(translated);
4467 			return rte_flow_error_set
4468 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4469 				 NULL, "invalid indirect action type");
4470 		}
4471 	}
4472 	*translated_actions = translated;
4473 	return 0;
4474 }
4475 
4476 /**
4477  * Get Shared RSS action from the action list.
4478  *
4479  * @param[in] dev
4480  *   Pointer to Ethernet device.
4481  * @param[in] shared
4482  *   Pointer to the list of actions.
4483  * @param[in] shared_n
4484  *   Actions list length.
4485  *
4486  * @return
4487  *   The MLX5 RSS action ID if exists, otherwise return 0.
4488  */
4489 static uint32_t
4490 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4491 			   struct mlx5_translated_action_handle *handle,
4492 			   int shared_n)
4493 {
4494 	struct mlx5_translated_action_handle *handle_end;
4495 	struct mlx5_priv *priv = dev->data->dev_private;
4496 	struct mlx5_shared_action_rss *shared_rss;
4497 
4498 
4499 	for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4500 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4501 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4502 		uint32_t idx = act_idx &
4503 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4504 		switch (type) {
4505 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4506 			shared_rss = mlx5_ipool_get
4507 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4508 									   idx);
4509 			__atomic_fetch_add(&shared_rss->refcnt, 1,
4510 					   __ATOMIC_RELAXED);
4511 			return idx;
4512 		default:
4513 			break;
4514 		}
4515 	}
4516 	return 0;
4517 }
4518 
4519 static unsigned int
4520 find_graph_root(uint32_t rss_level)
4521 {
4522 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4523 			       MLX5_EXPANSION_ROOT_OUTER;
4524 }
4525 
4526 /**
4527  *  Get layer flags from the prefix flow.
4528  *
4529  *  Some flows may be split to several subflows, the prefix subflow gets the
4530  *  match items and the suffix sub flow gets the actions.
4531  *  Some actions need the user defined match item flags to get the detail for
4532  *  the action.
4533  *  This function helps the suffix flow to get the item layer flags from prefix
4534  *  subflow.
4535  *
4536  * @param[in] dev_flow
4537  *   Pointer the created prefix subflow.
4538  *
4539  * @return
4540  *   The layers get from prefix subflow.
4541  */
4542 static inline uint64_t
4543 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4544 {
4545 	uint64_t layers = 0;
4546 
4547 	/*
4548 	 * Layers bits could be localization, but usually the compiler will
4549 	 * help to do the optimization work for source code.
4550 	 * If no decap actions, use the layers directly.
4551 	 */
4552 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4553 		return dev_flow->handle->layers;
4554 	/* Convert L3 layers with decap action. */
4555 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4556 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4557 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4558 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4559 	/* Convert L4 layers with decap action.  */
4560 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4561 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4562 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4563 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4564 	return layers;
4565 }
4566 
4567 /**
4568  * Get metadata split action information.
4569  *
4570  * @param[in] actions
4571  *   Pointer to the list of actions.
4572  * @param[out] qrss
4573  *   Pointer to the return pointer.
4574  * @param[out] qrss_type
4575  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4576  *   if no QUEUE/RSS is found.
4577  * @param[out] encap_idx
4578  *   Pointer to the index of the encap action if exists, otherwise the last
4579  *   action index.
4580  *
4581  * @return
4582  *   Total number of actions.
4583  */
4584 static int
4585 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4586 				       const struct rte_flow_action **qrss,
4587 				       int *encap_idx)
4588 {
4589 	const struct rte_flow_action_raw_encap *raw_encap;
4590 	int actions_n = 0;
4591 	int raw_decap_idx = -1;
4592 
4593 	*encap_idx = -1;
4594 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4595 		switch (actions->type) {
4596 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4597 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4598 			*encap_idx = actions_n;
4599 			break;
4600 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4601 			raw_decap_idx = actions_n;
4602 			break;
4603 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4604 			raw_encap = actions->conf;
4605 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4606 				*encap_idx = raw_decap_idx != -1 ?
4607 						      raw_decap_idx : actions_n;
4608 			break;
4609 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4610 		case RTE_FLOW_ACTION_TYPE_RSS:
4611 			*qrss = actions;
4612 			break;
4613 		default:
4614 			break;
4615 		}
4616 		actions_n++;
4617 	}
4618 	if (*encap_idx == -1)
4619 		*encap_idx = actions_n;
4620 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4621 	return actions_n + 1;
4622 }
4623 
4624 /**
4625  * Check if the action will change packet.
4626  *
4627  * @param dev
4628  *   Pointer to Ethernet device.
4629  * @param[in] type
4630  *   action type.
4631  *
4632  * @return
4633  *   true if action will change packet, false otherwise.
4634  */
4635 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4636 					  enum rte_flow_action_type type)
4637 {
4638 	struct mlx5_priv *priv = dev->data->dev_private;
4639 
4640 	switch (type) {
4641 	case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4642 	case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4643 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4644 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4645 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4646 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4647 	case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4648 	case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4649 	case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4650 	case RTE_FLOW_ACTION_TYPE_SET_TTL:
4651 	case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4652 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4653 	case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4654 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4655 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4656 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4657 	case RTE_FLOW_ACTION_TYPE_SET_META:
4658 	case RTE_FLOW_ACTION_TYPE_SET_TAG:
4659 	case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4660 	case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4661 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4662 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4663 	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4664 	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4665 	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4666 	case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4667 	case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4668 	case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4669 	case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4670 		return true;
4671 	case RTE_FLOW_ACTION_TYPE_FLAG:
4672 	case RTE_FLOW_ACTION_TYPE_MARK:
4673 		if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
4674 		    priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_META32_HWS)
4675 			return true;
4676 		else
4677 			return false;
4678 	default:
4679 		return false;
4680 	}
4681 }
4682 
4683 /**
4684  * Check meter action from the action list.
4685  *
4686  * @param dev
4687  *   Pointer to Ethernet device.
4688  * @param[in] actions
4689  *   Pointer to the list of actions.
4690  * @param[out] has_mtr
4691  *   Pointer to the meter exist flag.
4692  * @param[out] has_modify
4693  *   Pointer to the flag showing there's packet change action.
4694  * @param[out] meter_id
4695  *   Pointer to the meter id.
4696  *
4697  * @return
4698  *   Total number of actions.
4699  */
4700 static int
4701 flow_check_meter_action(struct rte_eth_dev *dev,
4702 			const struct rte_flow_action actions[],
4703 			bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4704 {
4705 	const struct rte_flow_action_meter *mtr = NULL;
4706 	int actions_n = 0;
4707 
4708 	MLX5_ASSERT(has_mtr);
4709 	*has_mtr = false;
4710 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4711 		switch (actions->type) {
4712 		case RTE_FLOW_ACTION_TYPE_METER:
4713 			mtr = actions->conf;
4714 			*meter_id = mtr->mtr_id;
4715 			*has_mtr = true;
4716 			break;
4717 		default:
4718 			break;
4719 		}
4720 		if (!*has_mtr)
4721 			*has_modify |= flow_check_modify_action_type(dev,
4722 								actions->type);
4723 		actions_n++;
4724 	}
4725 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4726 	return actions_n + 1;
4727 }
4728 
4729 /**
4730  * Check if the flow should be split due to hairpin.
4731  * The reason for the split is that in current HW we can't
4732  * support encap and push-vlan on Rx, so if a flow contains
4733  * these actions we move it to Tx.
4734  *
4735  * @param dev
4736  *   Pointer to Ethernet device.
4737  * @param[in] attr
4738  *   Flow rule attributes.
4739  * @param[in] actions
4740  *   Associated actions (list terminated by the END action).
4741  *
4742  * @return
4743  *   > 0 the number of actions and the flow should be split,
4744  *   0 when no split required.
4745  */
4746 static int
4747 flow_check_hairpin_split(struct rte_eth_dev *dev,
4748 			 const struct rte_flow_attr *attr,
4749 			 const struct rte_flow_action actions[])
4750 {
4751 	int queue_action = 0;
4752 	int action_n = 0;
4753 	int split = 0;
4754 	int push_vlan = 0;
4755 	const struct rte_flow_action_queue *queue;
4756 	const struct rte_flow_action_rss *rss;
4757 	const struct rte_flow_action_raw_encap *raw_encap;
4758 	const struct rte_eth_hairpin_conf *conf;
4759 
4760 	if (!attr->ingress)
4761 		return 0;
4762 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4763 		if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
4764 			push_vlan = 1;
4765 		switch (actions->type) {
4766 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4767 			queue = actions->conf;
4768 			if (queue == NULL)
4769 				return 0;
4770 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4771 			if (conf == NULL || conf->tx_explicit != 0)
4772 				return 0;
4773 			queue_action = 1;
4774 			action_n++;
4775 			break;
4776 		case RTE_FLOW_ACTION_TYPE_RSS:
4777 			rss = actions->conf;
4778 			if (rss == NULL || rss->queue_num == 0)
4779 				return 0;
4780 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4781 			if (conf == NULL || conf->tx_explicit != 0)
4782 				return 0;
4783 			queue_action = 1;
4784 			action_n++;
4785 			break;
4786 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4787 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4788 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4789 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4790 			split++;
4791 			action_n++;
4792 			break;
4793 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4794 			if (push_vlan)
4795 				split++;
4796 			action_n++;
4797 			break;
4798 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4799 			raw_encap = actions->conf;
4800 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4801 				split++;
4802 			action_n++;
4803 			break;
4804 		default:
4805 			action_n++;
4806 			break;
4807 		}
4808 	}
4809 	if (split && queue_action)
4810 		return action_n;
4811 	return 0;
4812 }
4813 
4814 /* Declare flow create/destroy prototype in advance. */
4815 static uint32_t
4816 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4817 		 const struct rte_flow_attr *attr,
4818 		 const struct rte_flow_item items[],
4819 		 const struct rte_flow_action actions[],
4820 		 bool external, struct rte_flow_error *error);
4821 
4822 static void
4823 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4824 		  uint32_t flow_idx);
4825 
4826 int
4827 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4828 		      struct mlx5_list_entry *entry, void *cb_ctx)
4829 {
4830 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4831 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4832 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4833 
4834 	return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4835 }
4836 
4837 struct mlx5_list_entry *
4838 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4839 {
4840 	struct rte_eth_dev *dev = tool_ctx;
4841 	struct mlx5_priv *priv = dev->data->dev_private;
4842 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4843 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4844 	struct rte_flow_error *error = ctx->error;
4845 	uint32_t idx = 0;
4846 	int ret;
4847 	uint32_t mark_id = *(uint32_t *)(ctx->data);
4848 	struct rte_flow_attr attr = {
4849 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4850 		.ingress = 1,
4851 	};
4852 	struct mlx5_rte_flow_item_tag tag_spec = {
4853 		.data = mark_id,
4854 	};
4855 	struct rte_flow_item items[] = {
4856 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4857 	};
4858 	struct rte_flow_action_mark ftag = {
4859 		.id = mark_id,
4860 	};
4861 	struct mlx5_flow_action_copy_mreg cp_mreg = {
4862 		.dst = REG_B,
4863 		.src = REG_NON,
4864 	};
4865 	struct rte_flow_action_jump jump = {
4866 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4867 	};
4868 	struct rte_flow_action actions[] = {
4869 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4870 	};
4871 
4872 	/* Fill the register fields in the flow. */
4873 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4874 	if (ret < 0)
4875 		return NULL;
4876 	tag_spec.id = ret;
4877 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4878 	if (ret < 0)
4879 		return NULL;
4880 	cp_mreg.src = ret;
4881 	/* Provide the full width of FLAG specific value. */
4882 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4883 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4884 	/* Build a new flow. */
4885 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
4886 		items[0] = (struct rte_flow_item){
4887 			.type = (enum rte_flow_item_type)
4888 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4889 			.spec = &tag_spec,
4890 		};
4891 		items[1] = (struct rte_flow_item){
4892 			.type = RTE_FLOW_ITEM_TYPE_END,
4893 		};
4894 		actions[0] = (struct rte_flow_action){
4895 			.type = (enum rte_flow_action_type)
4896 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4897 			.conf = &ftag,
4898 		};
4899 		actions[1] = (struct rte_flow_action){
4900 			.type = (enum rte_flow_action_type)
4901 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4902 			.conf = &cp_mreg,
4903 		};
4904 		actions[2] = (struct rte_flow_action){
4905 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4906 			.conf = &jump,
4907 		};
4908 		actions[3] = (struct rte_flow_action){
4909 			.type = RTE_FLOW_ACTION_TYPE_END,
4910 		};
4911 	} else {
4912 		/* Default rule, wildcard match. */
4913 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4914 		items[0] = (struct rte_flow_item){
4915 			.type = RTE_FLOW_ITEM_TYPE_END,
4916 		};
4917 		actions[0] = (struct rte_flow_action){
4918 			.type = (enum rte_flow_action_type)
4919 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4920 			.conf = &cp_mreg,
4921 		};
4922 		actions[1] = (struct rte_flow_action){
4923 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4924 			.conf = &jump,
4925 		};
4926 		actions[2] = (struct rte_flow_action){
4927 			.type = RTE_FLOW_ACTION_TYPE_END,
4928 		};
4929 	}
4930 	/* Build a new entry. */
4931 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4932 	if (!mcp_res) {
4933 		rte_errno = ENOMEM;
4934 		return NULL;
4935 	}
4936 	mcp_res->idx = idx;
4937 	mcp_res->mark_id = mark_id;
4938 	/*
4939 	 * The copy Flows are not included in any list. There
4940 	 * ones are referenced from other Flows and can not
4941 	 * be applied, removed, deleted in arbitrary order
4942 	 * by list traversing.
4943 	 */
4944 	mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4945 					&attr, items, actions, false, error);
4946 	if (!mcp_res->rix_flow) {
4947 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4948 		return NULL;
4949 	}
4950 	return &mcp_res->hlist_ent;
4951 }
4952 
4953 struct mlx5_list_entry *
4954 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4955 		      void *cb_ctx __rte_unused)
4956 {
4957 	struct rte_eth_dev *dev = tool_ctx;
4958 	struct mlx5_priv *priv = dev->data->dev_private;
4959 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4960 	uint32_t idx = 0;
4961 
4962 	mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4963 	if (!mcp_res) {
4964 		rte_errno = ENOMEM;
4965 		return NULL;
4966 	}
4967 	memcpy(mcp_res, oentry, sizeof(*mcp_res));
4968 	mcp_res->idx = idx;
4969 	return &mcp_res->hlist_ent;
4970 }
4971 
4972 void
4973 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4974 {
4975 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4976 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4977 	struct rte_eth_dev *dev = tool_ctx;
4978 	struct mlx5_priv *priv = dev->data->dev_private;
4979 
4980 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4981 }
4982 
4983 /**
4984  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4985  *
4986  * As mark_id is unique, if there's already a registered flow for the mark_id,
4987  * return by increasing the reference counter of the resource. Otherwise, create
4988  * the resource (mcp_res) and flow.
4989  *
4990  * Flow looks like,
4991  *   - If ingress port is ANY and reg_c[1] is mark_id,
4992  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4993  *
4994  * For default flow (zero mark_id), flow is like,
4995  *   - If ingress port is ANY,
4996  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4997  *
4998  * @param dev
4999  *   Pointer to Ethernet device.
5000  * @param mark_id
5001  *   ID of MARK action, zero means default flow for META.
5002  * @param[out] error
5003  *   Perform verbose error reporting if not NULL.
5004  *
5005  * @return
5006  *   Associated resource on success, NULL otherwise and rte_errno is set.
5007  */
5008 static struct mlx5_flow_mreg_copy_resource *
5009 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
5010 			  struct rte_flow_error *error)
5011 {
5012 	struct mlx5_priv *priv = dev->data->dev_private;
5013 	struct mlx5_list_entry *entry;
5014 	struct mlx5_flow_cb_ctx ctx = {
5015 		.dev = dev,
5016 		.error = error,
5017 		.data = &mark_id,
5018 	};
5019 
5020 	/* Check if already registered. */
5021 	MLX5_ASSERT(priv->mreg_cp_tbl);
5022 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
5023 	if (!entry)
5024 		return NULL;
5025 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
5026 			    hlist_ent);
5027 }
5028 
5029 void
5030 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
5031 {
5032 	struct mlx5_flow_mreg_copy_resource *mcp_res =
5033 			       container_of(entry, typeof(*mcp_res), hlist_ent);
5034 	struct rte_eth_dev *dev = tool_ctx;
5035 	struct mlx5_priv *priv = dev->data->dev_private;
5036 
5037 	MLX5_ASSERT(mcp_res->rix_flow);
5038 	flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
5039 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
5040 }
5041 
5042 /**
5043  * Release flow in RX_CP_TBL.
5044  *
5045  * @param dev
5046  *   Pointer to Ethernet device.
5047  * @flow
5048  *   Parent flow for wich copying is provided.
5049  */
5050 static void
5051 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
5052 			  struct rte_flow *flow)
5053 {
5054 	struct mlx5_flow_mreg_copy_resource *mcp_res;
5055 	struct mlx5_priv *priv = dev->data->dev_private;
5056 
5057 	if (!flow->rix_mreg_copy)
5058 		return;
5059 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
5060 				 flow->rix_mreg_copy);
5061 	if (!mcp_res || !priv->mreg_cp_tbl)
5062 		return;
5063 	MLX5_ASSERT(mcp_res->rix_flow);
5064 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
5065 	flow->rix_mreg_copy = 0;
5066 }
5067 
5068 /**
5069  * Remove the default copy action from RX_CP_TBL.
5070  *
5071  * This functions is called in the mlx5_dev_start(). No thread safe
5072  * is guaranteed.
5073  *
5074  * @param dev
5075  *   Pointer to Ethernet device.
5076  */
5077 static void
5078 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
5079 {
5080 	struct mlx5_list_entry *entry;
5081 	struct mlx5_priv *priv = dev->data->dev_private;
5082 	struct mlx5_flow_cb_ctx ctx;
5083 	uint32_t mark_id;
5084 
5085 	/* Check if default flow is registered. */
5086 	if (!priv->mreg_cp_tbl)
5087 		return;
5088 	mark_id = MLX5_DEFAULT_COPY_ID;
5089 	ctx.data = &mark_id;
5090 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
5091 	if (!entry)
5092 		return;
5093 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
5094 }
5095 
5096 /**
5097  * Add the default copy action in RX_CP_TBL.
5098  *
5099  * This functions is called in the mlx5_dev_start(). No thread safe
5100  * is guaranteed.
5101  *
5102  * @param dev
5103  *   Pointer to Ethernet device.
5104  * @param[out] error
5105  *   Perform verbose error reporting if not NULL.
5106  *
5107  * @return
5108  *   0 for success, negative value otherwise and rte_errno is set.
5109  */
5110 static int
5111 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
5112 				  struct rte_flow_error *error)
5113 {
5114 	struct mlx5_priv *priv = dev->data->dev_private;
5115 	struct mlx5_flow_mreg_copy_resource *mcp_res;
5116 	struct mlx5_flow_cb_ctx ctx;
5117 	uint32_t mark_id;
5118 
5119 	/* Check whether extensive metadata feature is engaged. */
5120 	if (!priv->sh->config.dv_flow_en ||
5121 	    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5122 	    !mlx5_flow_ext_mreg_supported(dev) ||
5123 	    !priv->sh->dv_regc0_mask)
5124 		return 0;
5125 	/*
5126 	 * Add default mreg copy flow may be called multiple time, but
5127 	 * only be called once in stop. Avoid register it twice.
5128 	 */
5129 	mark_id = MLX5_DEFAULT_COPY_ID;
5130 	ctx.data = &mark_id;
5131 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
5132 		return 0;
5133 	mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
5134 	if (!mcp_res)
5135 		return -rte_errno;
5136 	return 0;
5137 }
5138 
5139 /**
5140  * Add a flow of copying flow metadata registers in RX_CP_TBL.
5141  *
5142  * All the flow having Q/RSS action should be split by
5143  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
5144  * performs the following,
5145  *   - CQE->flow_tag := reg_c[1] (MARK)
5146  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5147  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
5148  * but there should be a flow per each MARK ID set by MARK action.
5149  *
5150  * For the aforementioned reason, if there's a MARK action in flow's action
5151  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
5152  * the MARK ID to CQE's flow_tag like,
5153  *   - If reg_c[1] is mark_id,
5154  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
5155  *
5156  * For SET_META action which stores value in reg_c[0], as the destination is
5157  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
5158  * MARK ID means the default flow. The default flow looks like,
5159  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
5160  *
5161  * @param dev
5162  *   Pointer to Ethernet device.
5163  * @param flow
5164  *   Pointer to flow structure.
5165  * @param[in] actions
5166  *   Pointer to the list of actions.
5167  * @param[out] error
5168  *   Perform verbose error reporting if not NULL.
5169  *
5170  * @return
5171  *   0 on success, negative value otherwise and rte_errno is set.
5172  */
5173 static int
5174 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
5175 			    struct rte_flow *flow,
5176 			    const struct rte_flow_action *actions,
5177 			    struct rte_flow_error *error)
5178 {
5179 	struct mlx5_priv *priv = dev->data->dev_private;
5180 	struct mlx5_sh_config *config = &priv->sh->config;
5181 	struct mlx5_flow_mreg_copy_resource *mcp_res;
5182 	const struct rte_flow_action_mark *mark;
5183 
5184 	/* Check whether extensive metadata feature is engaged. */
5185 	if (!config->dv_flow_en ||
5186 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5187 	    !mlx5_flow_ext_mreg_supported(dev) ||
5188 	    !priv->sh->dv_regc0_mask)
5189 		return 0;
5190 	/* Find MARK action. */
5191 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5192 		switch (actions->type) {
5193 		case RTE_FLOW_ACTION_TYPE_FLAG:
5194 			mcp_res = flow_mreg_add_copy_action
5195 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
5196 			if (!mcp_res)
5197 				return -rte_errno;
5198 			flow->rix_mreg_copy = mcp_res->idx;
5199 			return 0;
5200 		case RTE_FLOW_ACTION_TYPE_MARK:
5201 			mark = (const struct rte_flow_action_mark *)
5202 				actions->conf;
5203 			mcp_res =
5204 				flow_mreg_add_copy_action(dev, mark->id, error);
5205 			if (!mcp_res)
5206 				return -rte_errno;
5207 			flow->rix_mreg_copy = mcp_res->idx;
5208 			return 0;
5209 		default:
5210 			break;
5211 		}
5212 	}
5213 	return 0;
5214 }
5215 
5216 #define MLX5_MAX_SPLIT_ACTIONS 24
5217 #define MLX5_MAX_SPLIT_ITEMS 24
5218 
5219 /**
5220  * Split the hairpin flow.
5221  * Since HW can't support encap and push-vlan on Rx, we move these
5222  * actions to Tx.
5223  * If the count action is after the encap then we also
5224  * move the count action. in this case the count will also measure
5225  * the outer bytes.
5226  *
5227  * @param dev
5228  *   Pointer to Ethernet device.
5229  * @param[in] actions
5230  *   Associated actions (list terminated by the END action).
5231  * @param[out] actions_rx
5232  *   Rx flow actions.
5233  * @param[out] actions_tx
5234  *   Tx flow actions..
5235  * @param[out] pattern_tx
5236  *   The pattern items for the Tx flow.
5237  * @param[out] flow_id
5238  *   The flow ID connected to this flow.
5239  *
5240  * @return
5241  *   0 on success.
5242  */
5243 static int
5244 flow_hairpin_split(struct rte_eth_dev *dev,
5245 		   const struct rte_flow_action actions[],
5246 		   struct rte_flow_action actions_rx[],
5247 		   struct rte_flow_action actions_tx[],
5248 		   struct rte_flow_item pattern_tx[],
5249 		   uint32_t flow_id)
5250 {
5251 	const struct rte_flow_action_raw_encap *raw_encap;
5252 	const struct rte_flow_action_raw_decap *raw_decap;
5253 	struct mlx5_rte_flow_action_set_tag *set_tag;
5254 	struct rte_flow_action *tag_action;
5255 	struct mlx5_rte_flow_item_tag *tag_item;
5256 	struct rte_flow_item *item;
5257 	char *addr;
5258 	int push_vlan = 0;
5259 	int encap = 0;
5260 
5261 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5262 		if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
5263 			push_vlan = 1;
5264 		switch (actions->type) {
5265 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5266 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5267 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5268 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5269 			rte_memcpy(actions_tx, actions,
5270 			       sizeof(struct rte_flow_action));
5271 			actions_tx++;
5272 			break;
5273 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5274 			if (push_vlan) {
5275 				rte_memcpy(actions_tx, actions,
5276 					   sizeof(struct rte_flow_action));
5277 				actions_tx++;
5278 			} else {
5279 				rte_memcpy(actions_rx, actions,
5280 					   sizeof(struct rte_flow_action));
5281 				actions_rx++;
5282 			}
5283 			break;
5284 		case RTE_FLOW_ACTION_TYPE_COUNT:
5285 			if (encap) {
5286 				rte_memcpy(actions_tx, actions,
5287 					   sizeof(struct rte_flow_action));
5288 				actions_tx++;
5289 			} else {
5290 				rte_memcpy(actions_rx, actions,
5291 					   sizeof(struct rte_flow_action));
5292 				actions_rx++;
5293 			}
5294 			break;
5295 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5296 			raw_encap = actions->conf;
5297 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
5298 				memcpy(actions_tx, actions,
5299 				       sizeof(struct rte_flow_action));
5300 				actions_tx++;
5301 				encap = 1;
5302 			} else {
5303 				rte_memcpy(actions_rx, actions,
5304 					   sizeof(struct rte_flow_action));
5305 				actions_rx++;
5306 			}
5307 			break;
5308 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5309 			raw_decap = actions->conf;
5310 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
5311 				memcpy(actions_tx, actions,
5312 				       sizeof(struct rte_flow_action));
5313 				actions_tx++;
5314 			} else {
5315 				rte_memcpy(actions_rx, actions,
5316 					   sizeof(struct rte_flow_action));
5317 				actions_rx++;
5318 			}
5319 			break;
5320 		default:
5321 			rte_memcpy(actions_rx, actions,
5322 				   sizeof(struct rte_flow_action));
5323 			actions_rx++;
5324 			break;
5325 		}
5326 	}
5327 	/* Add set meta action and end action for the Rx flow. */
5328 	tag_action = actions_rx;
5329 	tag_action->type = (enum rte_flow_action_type)
5330 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5331 	actions_rx++;
5332 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
5333 	actions_rx++;
5334 	set_tag = (void *)actions_rx;
5335 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5336 		.id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
5337 		.data = flow_id,
5338 	};
5339 	MLX5_ASSERT(set_tag->id > REG_NON);
5340 	tag_action->conf = set_tag;
5341 	/* Create Tx item list. */
5342 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5343 	addr = (void *)&pattern_tx[2];
5344 	item = pattern_tx;
5345 	item->type = (enum rte_flow_item_type)
5346 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5347 	tag_item = (void *)addr;
5348 	tag_item->data = flow_id;
5349 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5350 	MLX5_ASSERT(set_tag->id > REG_NON);
5351 	item->spec = tag_item;
5352 	addr += sizeof(struct mlx5_rte_flow_item_tag);
5353 	tag_item = (void *)addr;
5354 	tag_item->data = UINT32_MAX;
5355 	tag_item->id = UINT16_MAX;
5356 	item->mask = tag_item;
5357 	item->last = NULL;
5358 	item++;
5359 	item->type = RTE_FLOW_ITEM_TYPE_END;
5360 	return 0;
5361 }
5362 
5363 /**
5364  * The last stage of splitting chain, just creates the subflow
5365  * without any modification.
5366  *
5367  * @param[in] dev
5368  *   Pointer to Ethernet device.
5369  * @param[in] flow
5370  *   Parent flow structure pointer.
5371  * @param[in, out] sub_flow
5372  *   Pointer to return the created subflow, may be NULL.
5373  * @param[in] attr
5374  *   Flow rule attributes.
5375  * @param[in] items
5376  *   Pattern specification (list terminated by the END pattern item).
5377  * @param[in] actions
5378  *   Associated actions (list terminated by the END action).
5379  * @param[in] flow_split_info
5380  *   Pointer to flow split info structure.
5381  * @param[out] error
5382  *   Perform verbose error reporting if not NULL.
5383  * @return
5384  *   0 on success, negative value otherwise
5385  */
5386 static int
5387 flow_create_split_inner(struct rte_eth_dev *dev,
5388 			struct rte_flow *flow,
5389 			struct mlx5_flow **sub_flow,
5390 			const struct rte_flow_attr *attr,
5391 			const struct rte_flow_item items[],
5392 			const struct rte_flow_action actions[],
5393 			struct mlx5_flow_split_info *flow_split_info,
5394 			struct rte_flow_error *error)
5395 {
5396 	struct mlx5_flow *dev_flow;
5397 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5398 
5399 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5400 				    flow_split_info->flow_idx, error);
5401 	if (!dev_flow)
5402 		return -rte_errno;
5403 	dev_flow->flow = flow;
5404 	dev_flow->external = flow_split_info->external;
5405 	dev_flow->skip_scale = flow_split_info->skip_scale;
5406 	/* Subflow object was created, we must include one in the list. */
5407 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5408 		      dev_flow->handle, next);
5409 	/*
5410 	 * If dev_flow is as one of the suffix flow, some actions in suffix
5411 	 * flow may need some user defined item layer flags, and pass the
5412 	 * Metadata rxq mark flag to suffix flow as well.
5413 	 */
5414 	if (flow_split_info->prefix_layers)
5415 		dev_flow->handle->layers = flow_split_info->prefix_layers;
5416 	if (flow_split_info->prefix_mark) {
5417 		MLX5_ASSERT(wks);
5418 		wks->mark = 1;
5419 	}
5420 	if (sub_flow)
5421 		*sub_flow = dev_flow;
5422 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5423 	dev_flow->dv.table_id = flow_split_info->table_id;
5424 #endif
5425 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5426 }
5427 
5428 /**
5429  * Get the sub policy of a meter.
5430  *
5431  * @param[in] dev
5432  *   Pointer to Ethernet device.
5433  * @param[in] flow
5434  *   Parent flow structure pointer.
5435  * @param wks
5436  *   Pointer to thread flow work space.
5437  * @param[in] attr
5438  *   Flow rule attributes.
5439  * @param[in] items
5440  *   Pattern specification (list terminated by the END pattern item).
5441  * @param[out] error
5442  *   Perform verbose error reporting if not NULL.
5443  *
5444  * @return
5445  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5446  */
5447 static struct mlx5_flow_meter_sub_policy *
5448 get_meter_sub_policy(struct rte_eth_dev *dev,
5449 		     struct rte_flow *flow,
5450 		     struct mlx5_flow_workspace *wks,
5451 		     const struct rte_flow_attr *attr,
5452 		     const struct rte_flow_item items[],
5453 		     struct rte_flow_error *error)
5454 {
5455 	struct mlx5_flow_meter_policy *policy;
5456 	struct mlx5_flow_meter_policy *final_policy;
5457 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5458 
5459 	policy = wks->policy;
5460 	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5461 	if (final_policy->is_rss || final_policy->is_queue) {
5462 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5463 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5464 		uint32_t i;
5465 
5466 		/*
5467 		 * This is a tmp dev_flow,
5468 		 * no need to register any matcher for it in translate.
5469 		 */
5470 		wks->skip_matcher_reg = 1;
5471 		for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5472 			struct mlx5_flow dev_flow = {0};
5473 			struct mlx5_flow_handle dev_handle = { {0} };
5474 			uint8_t fate = final_policy->act_cnt[i].fate_action;
5475 
5476 			if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5477 				const struct rte_flow_action_rss *rss_act =
5478 					final_policy->act_cnt[i].rss->conf;
5479 				struct rte_flow_action rss_actions[2] = {
5480 					[0] = {
5481 					.type = RTE_FLOW_ACTION_TYPE_RSS,
5482 					.conf = rss_act,
5483 					},
5484 					[1] = {
5485 					.type = RTE_FLOW_ACTION_TYPE_END,
5486 					.conf = NULL,
5487 					}
5488 				};
5489 
5490 				dev_flow.handle = &dev_handle;
5491 				dev_flow.ingress = attr->ingress;
5492 				dev_flow.flow = flow;
5493 				dev_flow.external = 0;
5494 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5495 				dev_flow.dv.transfer = attr->transfer;
5496 #endif
5497 				/**
5498 				 * Translate RSS action to get rss hash fields.
5499 				 */
5500 				if (flow_drv_translate(dev, &dev_flow, attr,
5501 						items, rss_actions, error))
5502 					goto exit;
5503 				rss_desc_v[i] = wks->rss_desc;
5504 				rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5505 				rss_desc_v[i].hash_fields =
5506 						dev_flow.hash_fields;
5507 				rss_desc_v[i].queue_num =
5508 						rss_desc_v[i].hash_fields ?
5509 						rss_desc_v[i].queue_num : 1;
5510 				rss_desc_v[i].tunnel =
5511 						!!(dev_flow.handle->layers &
5512 						   MLX5_FLOW_LAYER_TUNNEL);
5513 				/* Use the RSS queues in the containers. */
5514 				rss_desc_v[i].queue =
5515 					(uint16_t *)(uintptr_t)rss_act->queue;
5516 				rss_desc[i] = &rss_desc_v[i];
5517 			} else if (fate == MLX5_FLOW_FATE_QUEUE) {
5518 				/* This is queue action. */
5519 				rss_desc_v[i] = wks->rss_desc;
5520 				rss_desc_v[i].key_len = 0;
5521 				rss_desc_v[i].hash_fields = 0;
5522 				rss_desc_v[i].queue =
5523 					&final_policy->act_cnt[i].queue;
5524 				rss_desc_v[i].queue_num = 1;
5525 				rss_desc[i] = &rss_desc_v[i];
5526 			} else {
5527 				rss_desc[i] = NULL;
5528 			}
5529 		}
5530 		sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5531 						flow, policy, rss_desc);
5532 	} else {
5533 		enum mlx5_meter_domain mtr_domain =
5534 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5535 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5536 						MLX5_MTR_DOMAIN_INGRESS);
5537 		sub_policy = policy->sub_policys[mtr_domain][0];
5538 	}
5539 	if (!sub_policy)
5540 		rte_flow_error_set(error, EINVAL,
5541 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5542 				   "Failed to get meter sub-policy.");
5543 exit:
5544 	return sub_policy;
5545 }
5546 
5547 /**
5548  * Split the meter flow.
5549  *
5550  * As meter flow will split to three sub flow, other than meter
5551  * action, the other actions make sense to only meter accepts
5552  * the packet. If it need to be dropped, no other additional
5553  * actions should be take.
5554  *
5555  * One kind of special action which decapsulates the L3 tunnel
5556  * header will be in the prefix sub flow, as not to take the
5557  * L3 tunnel header into account.
5558  *
5559  * @param[in] dev
5560  *   Pointer to Ethernet device.
5561  * @param[in] flow
5562  *   Parent flow structure pointer.
5563  * @param wks
5564  *   Pointer to thread flow work space.
5565  * @param[in] attr
5566  *   Flow rule attributes.
5567  * @param[in] items
5568  *   Pattern specification (list terminated by the END pattern item).
5569  * @param[out] sfx_items
5570  *   Suffix flow match items (list terminated by the END pattern item).
5571  * @param[in] actions
5572  *   Associated actions (list terminated by the END action).
5573  * @param[out] actions_sfx
5574  *   Suffix flow actions.
5575  * @param[out] actions_pre
5576  *   Prefix flow actions.
5577  * @param[out] mtr_flow_id
5578  *   Pointer to meter flow id.
5579  * @param[out] error
5580  *   Perform verbose error reporting if not NULL.
5581  *
5582  * @return
5583  *   0 on success, a negative errno value otherwise and rte_errno is set.
5584  */
5585 static int
5586 flow_meter_split_prep(struct rte_eth_dev *dev,
5587 		      struct rte_flow *flow,
5588 		      struct mlx5_flow_workspace *wks,
5589 		      const struct rte_flow_attr *attr,
5590 		      const struct rte_flow_item items[],
5591 		      struct rte_flow_item sfx_items[],
5592 		      const struct rte_flow_action actions[],
5593 		      struct rte_flow_action actions_sfx[],
5594 		      struct rte_flow_action actions_pre[],
5595 		      uint32_t *mtr_flow_id,
5596 		      struct rte_flow_error *error)
5597 {
5598 	struct mlx5_priv *priv = dev->data->dev_private;
5599 	struct mlx5_flow_meter_info *fm = wks->fm;
5600 	struct rte_flow_action *tag_action = NULL;
5601 	struct rte_flow_item *tag_item;
5602 	struct mlx5_rte_flow_action_set_tag *set_tag;
5603 	const struct rte_flow_action_raw_encap *raw_encap;
5604 	const struct rte_flow_action_raw_decap *raw_decap;
5605 	struct mlx5_rte_flow_item_tag *tag_item_spec;
5606 	struct mlx5_rte_flow_item_tag *tag_item_mask;
5607 	uint32_t tag_id = 0;
5608 	struct rte_flow_item *vlan_item_dst = NULL;
5609 	const struct rte_flow_item *vlan_item_src = NULL;
5610 	const struct rte_flow_item *orig_items = items;
5611 	struct rte_flow_action *hw_mtr_action;
5612 	struct rte_flow_action *action_pre_head = NULL;
5613 	uint16_t flow_src_port = priv->representor_id;
5614 	bool mtr_first;
5615 	uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5616 	uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5617 				MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5618 	uint32_t flow_id = 0;
5619 	uint32_t flow_id_reversed = 0;
5620 	uint8_t flow_id_bits = 0;
5621 	bool after_meter = false;
5622 	int shift;
5623 
5624 	/* Prepare the suffix subflow items. */
5625 	tag_item = sfx_items++;
5626 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5627 		int item_type = items->type;
5628 
5629 		switch (item_type) {
5630 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
5631 		case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT:
5632 		case RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR:
5633 			if (mlx5_flow_get_item_vport_id(dev, items, &flow_src_port, NULL, error))
5634 				return -rte_errno;
5635 			if (!fm->def_policy && wks->policy->hierarchy_match_port &&
5636 			    flow_src_port != priv->representor_id) {
5637 				if (flow_drv_mtr_hierarchy_rule_create(dev,
5638 								flow, fm,
5639 								flow_src_port,
5640 								items,
5641 								error))
5642 					return -rte_errno;
5643 			}
5644 			memcpy(sfx_items, items, sizeof(*sfx_items));
5645 			sfx_items++;
5646 			break;
5647 		case RTE_FLOW_ITEM_TYPE_VLAN:
5648 			/* Determine if copy vlan item below. */
5649 			vlan_item_src = items;
5650 			vlan_item_dst = sfx_items++;
5651 			vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5652 			break;
5653 		default:
5654 			break;
5655 		}
5656 	}
5657 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5658 	sfx_items++;
5659 	mtr_first = priv->sh->meter_aso_en &&
5660 		(attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5661 	/* For ASO meter, meter must be before tag in TX direction. */
5662 	if (mtr_first) {
5663 		action_pre_head = actions_pre++;
5664 		/* Leave space for tag action. */
5665 		tag_action = actions_pre++;
5666 	}
5667 	/* Prepare the actions for prefix and suffix flow. */
5668 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5669 		struct rte_flow_action *action_cur = NULL;
5670 
5671 		switch (actions->type) {
5672 		case RTE_FLOW_ACTION_TYPE_METER:
5673 			if (mtr_first) {
5674 				action_cur = action_pre_head;
5675 			} else {
5676 				/* Leave space for tag action. */
5677 				tag_action = actions_pre++;
5678 				action_cur = actions_pre++;
5679 			}
5680 			after_meter = true;
5681 			break;
5682 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5683 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5684 			action_cur = actions_pre++;
5685 			break;
5686 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5687 			raw_encap = actions->conf;
5688 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5689 				action_cur = actions_pre++;
5690 			break;
5691 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5692 			raw_decap = actions->conf;
5693 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5694 				action_cur = actions_pre++;
5695 			break;
5696 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5697 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5698 			if (vlan_item_dst && vlan_item_src) {
5699 				memcpy(vlan_item_dst, vlan_item_src,
5700 					sizeof(*vlan_item_dst));
5701 				/*
5702 				 * Convert to internal match item, it is used
5703 				 * for vlan push and set vid.
5704 				 */
5705 				vlan_item_dst->type = (enum rte_flow_item_type)
5706 						MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5707 			}
5708 			break;
5709 		case RTE_FLOW_ACTION_TYPE_COUNT:
5710 			if (fm->def_policy)
5711 				action_cur = after_meter ?
5712 						actions_sfx++ : actions_pre++;
5713 			break;
5714 		default:
5715 			break;
5716 		}
5717 		if (!action_cur)
5718 			action_cur = (fm->def_policy) ?
5719 					actions_sfx++ : actions_pre++;
5720 		memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5721 	}
5722 	/* Add end action to the actions. */
5723 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5724 	if (priv->sh->meter_aso_en) {
5725 		/**
5726 		 * For ASO meter, need to add an extra jump action explicitly,
5727 		 * to jump from meter to policer table.
5728 		 */
5729 		struct mlx5_flow_meter_sub_policy *sub_policy;
5730 		struct mlx5_flow_tbl_data_entry *tbl_data;
5731 
5732 		if (!fm->def_policy) {
5733 			sub_policy = get_meter_sub_policy(dev, flow, wks,
5734 							  attr, orig_items,
5735 							  error);
5736 			if (!sub_policy)
5737 				return -rte_errno;
5738 		} else {
5739 			enum mlx5_meter_domain mtr_domain =
5740 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5741 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5742 						MLX5_MTR_DOMAIN_INGRESS);
5743 
5744 			sub_policy =
5745 			&priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5746 		}
5747 		tbl_data = container_of(sub_policy->tbl_rsc,
5748 					struct mlx5_flow_tbl_data_entry, tbl);
5749 		hw_mtr_action = actions_pre++;
5750 		hw_mtr_action->type = (enum rte_flow_action_type)
5751 				      MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5752 		hw_mtr_action->conf = tbl_data->jump.action;
5753 	}
5754 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5755 	actions_pre++;
5756 	if (!tag_action)
5757 		return rte_flow_error_set(error, ENOMEM,
5758 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5759 					  NULL, "No tag action space.");
5760 	if (!mtr_flow_id) {
5761 		tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5762 		goto exit;
5763 	}
5764 	/* Only default-policy Meter creates mtr flow id. */
5765 	if (fm->def_policy) {
5766 		mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5767 		if (!tag_id)
5768 			return rte_flow_error_set(error, ENOMEM,
5769 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5770 					"Failed to allocate meter flow id.");
5771 		flow_id = tag_id - 1;
5772 		flow_id_bits = (!flow_id) ? 1 :
5773 				(MLX5_REG_BITS - __builtin_clz(flow_id));
5774 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5775 		    mtr_reg_bits) {
5776 			mlx5_ipool_free(fm->flow_ipool, tag_id);
5777 			return rte_flow_error_set(error, EINVAL,
5778 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5779 					"Meter flow id exceeds max limit.");
5780 		}
5781 		if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5782 			priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5783 	}
5784 	/* Build tag actions and items for meter_id/meter flow_id. */
5785 	set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5786 	tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5787 	tag_item_mask = tag_item_spec + 1;
5788 	/* Both flow_id and meter_id share the same register. */
5789 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5790 		.id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5791 							    0, error),
5792 		.offset = mtr_id_offset,
5793 		.length = mtr_reg_bits,
5794 		.data = flow->meter,
5795 	};
5796 	/*
5797 	 * The color Reg bits used by flow_id are growing from
5798 	 * msb to lsb, so must do bit reverse for flow_id val in RegC.
5799 	 */
5800 	for (shift = 0; shift < flow_id_bits; shift++)
5801 		flow_id_reversed = (flow_id_reversed << 1) |
5802 				((flow_id >> shift) & 0x1);
5803 	set_tag->data |=
5804 		flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5805 	tag_item_spec->id = set_tag->id;
5806 	tag_item_spec->data = set_tag->data << mtr_id_offset;
5807 	tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5808 	tag_action->type = (enum rte_flow_action_type)
5809 				MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5810 	tag_action->conf = set_tag;
5811 	tag_item->type = (enum rte_flow_item_type)
5812 				MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5813 	tag_item->spec = tag_item_spec;
5814 	tag_item->last = NULL;
5815 	tag_item->mask = tag_item_mask;
5816 exit:
5817 	if (mtr_flow_id)
5818 		*mtr_flow_id = tag_id;
5819 	return 0;
5820 }
5821 
5822 /**
5823  * Split action list having QUEUE/RSS for metadata register copy.
5824  *
5825  * Once Q/RSS action is detected in user's action list, the flow action
5826  * should be split in order to copy metadata registers, which will happen in
5827  * RX_CP_TBL like,
5828  *   - CQE->flow_tag := reg_c[1] (MARK)
5829  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5830  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5831  * This is because the last action of each flow must be a terminal action
5832  * (QUEUE, RSS or DROP).
5833  *
5834  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5835  * stored and kept in the mlx5_flow structure per each sub_flow.
5836  *
5837  * The Q/RSS action is replaced with,
5838  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5839  * And the following JUMP action is added at the end,
5840  *   - JUMP, to RX_CP_TBL.
5841  *
5842  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5843  * flow_create_split_metadata() routine. The flow will look like,
5844  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5845  *
5846  * @param dev
5847  *   Pointer to Ethernet device.
5848  * @param[out] split_actions
5849  *   Pointer to store split actions to jump to CP_TBL.
5850  * @param[in] actions
5851  *   Pointer to the list of original flow actions.
5852  * @param[in] qrss
5853  *   Pointer to the Q/RSS action.
5854  * @param[in] actions_n
5855  *   Number of original actions.
5856  * @param[in] mtr_sfx
5857  *   Check if it is in meter suffix table.
5858  * @param[out] error
5859  *   Perform verbose error reporting if not NULL.
5860  *
5861  * @return
5862  *   non-zero unique flow_id on success, otherwise 0 and
5863  *   error/rte_error are set.
5864  */
5865 static uint32_t
5866 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5867 			  struct rte_flow_action *split_actions,
5868 			  const struct rte_flow_action *actions,
5869 			  const struct rte_flow_action *qrss,
5870 			  int actions_n, int mtr_sfx,
5871 			  struct rte_flow_error *error)
5872 {
5873 	struct mlx5_priv *priv = dev->data->dev_private;
5874 	struct mlx5_rte_flow_action_set_tag *set_tag;
5875 	struct rte_flow_action_jump *jump;
5876 	const int qrss_idx = qrss - actions;
5877 	uint32_t flow_id = 0;
5878 	int ret = 0;
5879 
5880 	/*
5881 	 * Given actions will be split
5882 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5883 	 * - Add jump to mreg CP_TBL.
5884 	 * As a result, there will be one more action.
5885 	 */
5886 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5887 	/* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5888 	++actions_n;
5889 	set_tag = (void *)(split_actions + actions_n);
5890 	/*
5891 	 * If we are not the meter suffix flow, add the tag action.
5892 	 * Since meter suffix flow already has the tag added.
5893 	 */
5894 	if (!mtr_sfx) {
5895 		/*
5896 		 * Allocate the new subflow ID. This one is unique within
5897 		 * device and not shared with representors. Otherwise,
5898 		 * we would have to resolve multi-thread access synch
5899 		 * issue. Each flow on the shared device is appended
5900 		 * with source vport identifier, so the resulting
5901 		 * flows will be unique in the shared (by master and
5902 		 * representors) domain even if they have coinciding
5903 		 * IDs.
5904 		 */
5905 		mlx5_ipool_malloc(priv->sh->ipool
5906 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5907 		if (!flow_id)
5908 			return rte_flow_error_set(error, ENOMEM,
5909 						  RTE_FLOW_ERROR_TYPE_ACTION,
5910 						  NULL, "can't allocate id "
5911 						  "for split Q/RSS subflow");
5912 		/* Internal SET_TAG action to set flow ID. */
5913 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
5914 			.data = flow_id,
5915 		};
5916 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5917 		if (ret < 0)
5918 			return ret;
5919 		set_tag->id = ret;
5920 		/* Construct new actions array. */
5921 		/* Replace QUEUE/RSS action. */
5922 		split_actions[qrss_idx] = (struct rte_flow_action){
5923 			.type = (enum rte_flow_action_type)
5924 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5925 			.conf = set_tag,
5926 		};
5927 	} else {
5928 		/*
5929 		 * If we are the suffix flow of meter, tag already exist.
5930 		 * Set the QUEUE/RSS action to void.
5931 		 */
5932 		split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5933 	}
5934 	/* JUMP action to jump to mreg copy table (CP_TBL). */
5935 	jump = (void *)(set_tag + 1);
5936 	*jump = (struct rte_flow_action_jump){
5937 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5938 	};
5939 	split_actions[actions_n - 2] = (struct rte_flow_action){
5940 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
5941 		.conf = jump,
5942 	};
5943 	split_actions[actions_n - 1] = (struct rte_flow_action){
5944 		.type = RTE_FLOW_ACTION_TYPE_END,
5945 	};
5946 	return flow_id;
5947 }
5948 
5949 /**
5950  * Extend the given action list for Tx metadata copy.
5951  *
5952  * Copy the given action list to the ext_actions and add flow metadata register
5953  * copy action in order to copy reg_a set by WQE to reg_c[0].
5954  *
5955  * @param[out] ext_actions
5956  *   Pointer to the extended action list.
5957  * @param[in] actions
5958  *   Pointer to the list of actions.
5959  * @param[in] actions_n
5960  *   Number of actions in the list.
5961  * @param[out] error
5962  *   Perform verbose error reporting if not NULL.
5963  * @param[in] encap_idx
5964  *   The encap action index.
5965  *
5966  * @return
5967  *   0 on success, negative value otherwise
5968  */
5969 static int
5970 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5971 		       struct rte_flow_action *ext_actions,
5972 		       const struct rte_flow_action *actions,
5973 		       int actions_n, struct rte_flow_error *error,
5974 		       int encap_idx)
5975 {
5976 	struct mlx5_flow_action_copy_mreg *cp_mreg =
5977 		(struct mlx5_flow_action_copy_mreg *)
5978 			(ext_actions + actions_n + 1);
5979 	int ret;
5980 
5981 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5982 	if (ret < 0)
5983 		return ret;
5984 	cp_mreg->dst = ret;
5985 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5986 	if (ret < 0)
5987 		return ret;
5988 	cp_mreg->src = ret;
5989 	if (encap_idx != 0)
5990 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5991 	if (encap_idx == actions_n - 1) {
5992 		ext_actions[actions_n - 1] = (struct rte_flow_action){
5993 			.type = (enum rte_flow_action_type)
5994 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5995 			.conf = cp_mreg,
5996 		};
5997 		ext_actions[actions_n] = (struct rte_flow_action){
5998 			.type = RTE_FLOW_ACTION_TYPE_END,
5999 		};
6000 	} else {
6001 		ext_actions[encap_idx] = (struct rte_flow_action){
6002 			.type = (enum rte_flow_action_type)
6003 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6004 			.conf = cp_mreg,
6005 		};
6006 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
6007 				sizeof(*ext_actions) * (actions_n - encap_idx));
6008 	}
6009 	return 0;
6010 }
6011 
6012 /**
6013  * Check the match action from the action list.
6014  *
6015  * @param[in] actions
6016  *   Pointer to the list of actions.
6017  * @param[in] attr
6018  *   Flow rule attributes.
6019  * @param[in] action
6020  *   The action to be check if exist.
6021  * @param[out] match_action_pos
6022  *   Pointer to the position of the matched action if exists, otherwise is -1.
6023  * @param[out] qrss_action_pos
6024  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
6025  * @param[out] modify_after_mirror
6026  *   Pointer to the flag of modify action after FDB mirroring.
6027  *
6028  * @return
6029  *   > 0 the total number of actions.
6030  *   0 if not found match action in action list.
6031  */
6032 static int
6033 flow_check_match_action(const struct rte_flow_action actions[],
6034 			const struct rte_flow_attr *attr,
6035 			enum rte_flow_action_type action,
6036 			int *match_action_pos, int *qrss_action_pos,
6037 			int *modify_after_mirror)
6038 {
6039 	const struct rte_flow_action_sample *sample;
6040 	const struct rte_flow_action_raw_decap *decap;
6041 	int actions_n = 0;
6042 	uint32_t ratio = 0;
6043 	int sub_type = 0;
6044 	int flag = 0;
6045 	int fdb_mirror = 0;
6046 
6047 	*match_action_pos = -1;
6048 	*qrss_action_pos = -1;
6049 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
6050 		if (actions->type == action) {
6051 			flag = 1;
6052 			*match_action_pos = actions_n;
6053 		}
6054 		switch (actions->type) {
6055 		case RTE_FLOW_ACTION_TYPE_QUEUE:
6056 		case RTE_FLOW_ACTION_TYPE_RSS:
6057 			*qrss_action_pos = actions_n;
6058 			break;
6059 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
6060 			sample = actions->conf;
6061 			ratio = sample->ratio;
6062 			sub_type = ((const struct rte_flow_action *)
6063 					(sample->actions))->type;
6064 			if (ratio == 1 && attr->transfer &&
6065 			    sub_type != RTE_FLOW_ACTION_TYPE_END)
6066 				fdb_mirror = 1;
6067 			break;
6068 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
6069 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
6070 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
6071 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
6072 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
6073 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
6074 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
6075 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
6076 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
6077 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
6078 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
6079 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
6080 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
6081 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
6082 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
6083 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
6084 		case RTE_FLOW_ACTION_TYPE_FLAG:
6085 		case RTE_FLOW_ACTION_TYPE_MARK:
6086 		case RTE_FLOW_ACTION_TYPE_SET_META:
6087 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
6088 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
6089 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
6090 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
6091 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
6092 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
6093 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
6094 		case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
6095 		case RTE_FLOW_ACTION_TYPE_METER:
6096 			if (fdb_mirror)
6097 				*modify_after_mirror = 1;
6098 			break;
6099 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
6100 			decap = actions->conf;
6101 			while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
6102 				;
6103 			actions_n++;
6104 			if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
6105 				const struct rte_flow_action_raw_encap *encap =
6106 								actions->conf;
6107 				if (decap->size <=
6108 					MLX5_ENCAPSULATION_DECISION_SIZE &&
6109 				    encap->size >
6110 					MLX5_ENCAPSULATION_DECISION_SIZE)
6111 					/* L3 encap. */
6112 					break;
6113 			}
6114 			if (fdb_mirror)
6115 				*modify_after_mirror = 1;
6116 			break;
6117 		default:
6118 			break;
6119 		}
6120 		actions_n++;
6121 	}
6122 	if (flag && fdb_mirror && !*modify_after_mirror) {
6123 		/* FDB mirroring uses the destination array to implement
6124 		 * instead of FLOW_SAMPLER object.
6125 		 */
6126 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
6127 			flag = 0;
6128 	}
6129 	/* Count RTE_FLOW_ACTION_TYPE_END. */
6130 	return flag ? actions_n + 1 : 0;
6131 }
6132 
6133 #define SAMPLE_SUFFIX_ITEM 3
6134 
6135 /**
6136  * Split the sample flow.
6137  *
6138  * As sample flow will split to two sub flow, sample flow with
6139  * sample action, the other actions will move to new suffix flow.
6140  *
6141  * Also add unique tag id with tag action in the sample flow,
6142  * the same tag id will be as match in the suffix flow.
6143  *
6144  * @param dev
6145  *   Pointer to Ethernet device.
6146  * @param[in] add_tag
6147  *   Add extra tag action flag.
6148  * @param[out] sfx_items
6149  *   Suffix flow match items (list terminated by the END pattern item).
6150  * @param[in] actions
6151  *   Associated actions (list terminated by the END action).
6152  * @param[out] actions_sfx
6153  *   Suffix flow actions.
6154  * @param[out] actions_pre
6155  *   Prefix flow actions.
6156  * @param[in] actions_n
6157  *  The total number of actions.
6158  * @param[in] sample_action_pos
6159  *   The sample action position.
6160  * @param[in] qrss_action_pos
6161  *   The Queue/RSS action position.
6162  * @param[in] jump_table
6163  *   Add extra jump action flag.
6164  * @param[out] error
6165  *   Perform verbose error reporting if not NULL.
6166  *
6167  * @return
6168  *   0 on success, or unique flow_id, a negative errno value
6169  *   otherwise and rte_errno is set.
6170  */
6171 static int
6172 flow_sample_split_prep(struct rte_eth_dev *dev,
6173 		       int add_tag,
6174 		       const struct rte_flow_item items[],
6175 		       struct rte_flow_item sfx_items[],
6176 		       const struct rte_flow_action actions[],
6177 		       struct rte_flow_action actions_sfx[],
6178 		       struct rte_flow_action actions_pre[],
6179 		       int actions_n,
6180 		       int sample_action_pos,
6181 		       int qrss_action_pos,
6182 		       int jump_table,
6183 		       struct rte_flow_error *error)
6184 {
6185 	struct mlx5_priv *priv = dev->data->dev_private;
6186 	struct mlx5_rte_flow_action_set_tag *set_tag;
6187 	struct mlx5_rte_flow_item_tag *tag_spec;
6188 	struct mlx5_rte_flow_item_tag *tag_mask;
6189 	struct rte_flow_action_jump *jump_action;
6190 	uint32_t tag_id = 0;
6191 	int append_index = 0;
6192 	int set_tag_idx = -1;
6193 	int index;
6194 	int ret;
6195 
6196 	if (sample_action_pos < 0)
6197 		return rte_flow_error_set(error, EINVAL,
6198 					  RTE_FLOW_ERROR_TYPE_ACTION,
6199 					  NULL, "invalid position of sample "
6200 					  "action in list");
6201 	/* Prepare the actions for prefix and suffix flow. */
6202 	if (add_tag) {
6203 		/* Update the new added tag action index preceding
6204 		 * the PUSH_VLAN or ENCAP action.
6205 		 */
6206 		const struct rte_flow_action_raw_encap *raw_encap;
6207 		const struct rte_flow_action *action = actions;
6208 		int encap_idx;
6209 		int action_idx = 0;
6210 		int raw_decap_idx = -1;
6211 		int push_vlan_idx = -1;
6212 		for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
6213 			switch (action->type) {
6214 			case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
6215 				raw_decap_idx = action_idx;
6216 				break;
6217 			case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
6218 				raw_encap = action->conf;
6219 				if (raw_encap->size >
6220 					MLX5_ENCAPSULATION_DECISION_SIZE) {
6221 					encap_idx = raw_decap_idx != -1 ?
6222 						    raw_decap_idx : action_idx;
6223 					if (encap_idx < sample_action_pos &&
6224 					    push_vlan_idx == -1)
6225 						set_tag_idx = encap_idx;
6226 				}
6227 				break;
6228 			case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
6229 			case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
6230 				encap_idx = action_idx;
6231 				if (encap_idx < sample_action_pos &&
6232 				    push_vlan_idx == -1)
6233 					set_tag_idx = encap_idx;
6234 				break;
6235 			case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
6236 			case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
6237 				if (action_idx < sample_action_pos &&
6238 				    push_vlan_idx == -1) {
6239 					set_tag_idx = action_idx;
6240 					push_vlan_idx = action_idx;
6241 				}
6242 				break;
6243 			default:
6244 				break;
6245 			}
6246 			action_idx++;
6247 		}
6248 	}
6249 	/* Prepare the actions for prefix and suffix flow. */
6250 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
6251 		index = qrss_action_pos;
6252 		/* Put the preceding the Queue/RSS action into prefix flow. */
6253 		if (index != 0)
6254 			memcpy(actions_pre, actions,
6255 			       sizeof(struct rte_flow_action) * index);
6256 		/* Put others preceding the sample action into prefix flow. */
6257 		if (sample_action_pos > index + 1)
6258 			memcpy(actions_pre + index, actions + index + 1,
6259 			       sizeof(struct rte_flow_action) *
6260 			       (sample_action_pos - index - 1));
6261 		index = sample_action_pos - 1;
6262 		/* Put Queue/RSS action into Suffix flow. */
6263 		memcpy(actions_sfx, actions + qrss_action_pos,
6264 		       sizeof(struct rte_flow_action));
6265 		actions_sfx++;
6266 	} else if (add_tag && set_tag_idx >= 0) {
6267 		if (set_tag_idx > 0)
6268 			memcpy(actions_pre, actions,
6269 			       sizeof(struct rte_flow_action) * set_tag_idx);
6270 		memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
6271 		       sizeof(struct rte_flow_action) *
6272 		       (sample_action_pos - set_tag_idx));
6273 		index = sample_action_pos;
6274 	} else {
6275 		index = sample_action_pos;
6276 		if (index != 0)
6277 			memcpy(actions_pre, actions,
6278 			       sizeof(struct rte_flow_action) * index);
6279 	}
6280 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
6281 	 * For CX6DX and above, metadata registers Cx preserve their value,
6282 	 * add an extra tag action for NIC-RX and E-Switch Domain.
6283 	 */
6284 	if (add_tag) {
6285 		/* Prepare the prefix tag action. */
6286 		append_index++;
6287 		set_tag = (void *)(actions_pre + actions_n + append_index);
6288 		/* Trust VF/SF on CX5 not supported meter so that the reserved
6289 		 * metadata regC is REG_NON, back to use application tag
6290 		 * index 0.
6291 		 */
6292 		if (unlikely(priv->mtr_color_reg == REG_NON))
6293 			ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
6294 		else
6295 			ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
6296 		if (ret < 0)
6297 			return ret;
6298 		mlx5_ipool_malloc(priv->sh->ipool
6299 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
6300 		*set_tag = (struct mlx5_rte_flow_action_set_tag) {
6301 			.id = ret,
6302 			.data = tag_id,
6303 		};
6304 		/* Prepare the suffix subflow items. */
6305 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
6306 		tag_spec->data = tag_id;
6307 		tag_spec->id = set_tag->id;
6308 		tag_mask = tag_spec + 1;
6309 		tag_mask->data = UINT32_MAX;
6310 		for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
6311 			if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID ||
6312 			    items->type == RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR ||
6313 			    items->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
6314 				memcpy(sfx_items, items, sizeof(*sfx_items));
6315 				sfx_items++;
6316 				break;
6317 			}
6318 		}
6319 		sfx_items[0] = (struct rte_flow_item){
6320 			.type = (enum rte_flow_item_type)
6321 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6322 			.spec = tag_spec,
6323 			.last = NULL,
6324 			.mask = tag_mask,
6325 		};
6326 		sfx_items[1] = (struct rte_flow_item){
6327 			.type = (enum rte_flow_item_type)
6328 				RTE_FLOW_ITEM_TYPE_END,
6329 		};
6330 		/* Prepare the tag action in prefix subflow. */
6331 		set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
6332 		actions_pre[set_tag_idx] =
6333 			(struct rte_flow_action){
6334 			.type = (enum rte_flow_action_type)
6335 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
6336 			.conf = set_tag,
6337 		};
6338 		/* Update next sample position due to add one tag action */
6339 		index += 1;
6340 	}
6341 	/* Copy the sample action into prefix flow. */
6342 	memcpy(actions_pre + index, actions + sample_action_pos,
6343 	       sizeof(struct rte_flow_action));
6344 	index += 1;
6345 	/* For the modify action after the sample action in E-Switch mirroring,
6346 	 * Add the extra jump action in prefix subflow and jump into the next
6347 	 * table, then do the modify action in the new table.
6348 	 */
6349 	if (jump_table) {
6350 		/* Prepare the prefix jump action. */
6351 		append_index++;
6352 		jump_action = (void *)(actions_pre + actions_n + append_index);
6353 		jump_action->group = jump_table;
6354 		actions_pre[index++] =
6355 			(struct rte_flow_action){
6356 			.type = (enum rte_flow_action_type)
6357 				RTE_FLOW_ACTION_TYPE_JUMP,
6358 			.conf = jump_action,
6359 		};
6360 	}
6361 	actions_pre[index] = (struct rte_flow_action){
6362 		.type = (enum rte_flow_action_type)
6363 			RTE_FLOW_ACTION_TYPE_END,
6364 	};
6365 	/* Put the actions after sample into Suffix flow. */
6366 	memcpy(actions_sfx, actions + sample_action_pos + 1,
6367 	       sizeof(struct rte_flow_action) *
6368 	       (actions_n - sample_action_pos - 1));
6369 	return tag_id;
6370 }
6371 
6372 /**
6373  * The splitting for metadata feature.
6374  *
6375  * - Q/RSS action on NIC Rx should be split in order to pass by
6376  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6377  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6378  *
6379  * - All the actions on NIC Tx should have a mreg copy action to
6380  *   copy reg_a from WQE to reg_c[0].
6381  *
6382  * @param dev
6383  *   Pointer to Ethernet device.
6384  * @param[in] flow
6385  *   Parent flow structure pointer.
6386  * @param[in] attr
6387  *   Flow rule attributes.
6388  * @param[in] items
6389  *   Pattern specification (list terminated by the END pattern item).
6390  * @param[in] actions
6391  *   Associated actions (list terminated by the END action).
6392  * @param[in] flow_split_info
6393  *   Pointer to flow split info structure.
6394  * @param[out] error
6395  *   Perform verbose error reporting if not NULL.
6396  * @return
6397  *   0 on success, negative value otherwise
6398  */
6399 static int
6400 flow_create_split_metadata(struct rte_eth_dev *dev,
6401 			   struct rte_flow *flow,
6402 			   const struct rte_flow_attr *attr,
6403 			   const struct rte_flow_item items[],
6404 			   const struct rte_flow_action actions[],
6405 			   struct mlx5_flow_split_info *flow_split_info,
6406 			   struct rte_flow_error *error)
6407 {
6408 	struct mlx5_priv *priv = dev->data->dev_private;
6409 	struct mlx5_sh_config *config = &priv->sh->config;
6410 	const struct rte_flow_action *qrss = NULL;
6411 	struct rte_flow_action *ext_actions = NULL;
6412 	struct mlx5_flow *dev_flow = NULL;
6413 	uint32_t qrss_id = 0;
6414 	int mtr_sfx = 0;
6415 	size_t act_size;
6416 	int actions_n;
6417 	int encap_idx;
6418 	int ret;
6419 
6420 	/* Check whether extensive metadata feature is engaged. */
6421 	if (!config->dv_flow_en ||
6422 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6423 	    !mlx5_flow_ext_mreg_supported(dev))
6424 		return flow_create_split_inner(dev, flow, NULL, attr, items,
6425 					       actions, flow_split_info, error);
6426 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6427 							   &encap_idx);
6428 	if (qrss) {
6429 		/* Exclude hairpin flows from splitting. */
6430 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6431 			const struct rte_flow_action_queue *queue;
6432 
6433 			queue = qrss->conf;
6434 			if (mlx5_rxq_is_hairpin(dev, queue->index))
6435 				qrss = NULL;
6436 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6437 			const struct rte_flow_action_rss *rss;
6438 
6439 			rss = qrss->conf;
6440 			if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6441 				qrss = NULL;
6442 		}
6443 	}
6444 	if (qrss) {
6445 		/* Check if it is in meter suffix table. */
6446 		mtr_sfx = attr->group ==
6447 			  ((attr->transfer && priv->fdb_def_rule) ?
6448 			  (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6449 			  MLX5_FLOW_TABLE_LEVEL_METER);
6450 		/*
6451 		 * Q/RSS action on NIC Rx should be split in order to pass by
6452 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
6453 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
6454 		 */
6455 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6456 			   sizeof(struct rte_flow_action_set_tag) +
6457 			   sizeof(struct rte_flow_action_jump);
6458 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6459 					  SOCKET_ID_ANY);
6460 		if (!ext_actions)
6461 			return rte_flow_error_set(error, ENOMEM,
6462 						  RTE_FLOW_ERROR_TYPE_ACTION,
6463 						  NULL, "no memory to split "
6464 						  "metadata flow");
6465 		/*
6466 		 * Create the new actions list with removed Q/RSS action
6467 		 * and appended set tag and jump to register copy table
6468 		 * (RX_CP_TBL). We should preallocate unique tag ID here
6469 		 * in advance, because it is needed for set tag action.
6470 		 */
6471 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6472 						    qrss, actions_n,
6473 						    mtr_sfx, error);
6474 		if (!mtr_sfx && !qrss_id) {
6475 			ret = -rte_errno;
6476 			goto exit;
6477 		}
6478 	} else if (attr->egress) {
6479 		/*
6480 		 * All the actions on NIC Tx should have a metadata register
6481 		 * copy action to copy reg_a from WQE to reg_c[meta]
6482 		 */
6483 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6484 			   sizeof(struct mlx5_flow_action_copy_mreg);
6485 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6486 					  SOCKET_ID_ANY);
6487 		if (!ext_actions)
6488 			return rte_flow_error_set(error, ENOMEM,
6489 						  RTE_FLOW_ERROR_TYPE_ACTION,
6490 						  NULL, "no memory to split "
6491 						  "metadata flow");
6492 		/* Create the action list appended with copy register. */
6493 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6494 					     actions_n, error, encap_idx);
6495 		if (ret < 0)
6496 			goto exit;
6497 	}
6498 	/* Add the unmodified original or prefix subflow. */
6499 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6500 				      items, ext_actions ? ext_actions :
6501 				      actions, flow_split_info, error);
6502 	if (ret < 0)
6503 		goto exit;
6504 	MLX5_ASSERT(dev_flow);
6505 	if (qrss) {
6506 		const struct rte_flow_attr q_attr = {
6507 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6508 			.ingress = 1,
6509 		};
6510 		/* Internal PMD action to set register. */
6511 		struct mlx5_rte_flow_item_tag q_tag_spec = {
6512 			.data = qrss_id,
6513 			.id = REG_NON,
6514 		};
6515 		struct rte_flow_item q_items[] = {
6516 			{
6517 				.type = (enum rte_flow_item_type)
6518 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6519 				.spec = &q_tag_spec,
6520 				.last = NULL,
6521 				.mask = NULL,
6522 			},
6523 			{
6524 				.type = RTE_FLOW_ITEM_TYPE_END,
6525 			},
6526 		};
6527 		struct rte_flow_action q_actions[] = {
6528 			{
6529 				.type = qrss->type,
6530 				.conf = qrss->conf,
6531 			},
6532 			{
6533 				.type = RTE_FLOW_ACTION_TYPE_END,
6534 			},
6535 		};
6536 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6537 
6538 		/*
6539 		 * Configure the tag item only if there is no meter subflow.
6540 		 * Since tag is already marked in the meter suffix subflow
6541 		 * we can just use the meter suffix items as is.
6542 		 */
6543 		if (qrss_id) {
6544 			/* Not meter subflow. */
6545 			MLX5_ASSERT(!mtr_sfx);
6546 			/*
6547 			 * Put unique id in prefix flow due to it is destroyed
6548 			 * after suffix flow and id will be freed after there
6549 			 * is no actual flows with this id and identifier
6550 			 * reallocation becomes possible (for example, for
6551 			 * other flows in other threads).
6552 			 */
6553 			dev_flow->handle->split_flow_id = qrss_id;
6554 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6555 						   error);
6556 			if (ret < 0)
6557 				goto exit;
6558 			q_tag_spec.id = ret;
6559 		}
6560 		dev_flow = NULL;
6561 		/* Add suffix subflow to execute Q/RSS. */
6562 		flow_split_info->prefix_layers = layers;
6563 		flow_split_info->prefix_mark = 0;
6564 		flow_split_info->table_id = 0;
6565 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6566 					      &q_attr, mtr_sfx ? items :
6567 					      q_items, q_actions,
6568 					      flow_split_info, error);
6569 		if (ret < 0)
6570 			goto exit;
6571 		/* qrss ID should be freed if failed. */
6572 		qrss_id = 0;
6573 		MLX5_ASSERT(dev_flow);
6574 	}
6575 
6576 exit:
6577 	/*
6578 	 * We do not destroy the partially created sub_flows in case of error.
6579 	 * These ones are included into parent flow list and will be destroyed
6580 	 * by flow_drv_destroy.
6581 	 */
6582 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6583 			qrss_id);
6584 	mlx5_free(ext_actions);
6585 	return ret;
6586 }
6587 
6588 /**
6589  * Create meter internal drop flow with the original pattern.
6590  *
6591  * @param dev
6592  *   Pointer to Ethernet device.
6593  * @param[in] flow
6594  *   Parent flow structure pointer.
6595  * @param[in] attr
6596  *   Flow rule attributes.
6597  * @param[in] items
6598  *   Pattern specification (list terminated by the END pattern item).
6599  * @param[in] flow_split_info
6600  *   Pointer to flow split info structure.
6601  * @param[in] fm
6602  *   Pointer to flow meter structure.
6603  * @param[out] error
6604  *   Perform verbose error reporting if not NULL.
6605  * @return
6606  *   0 on success, negative value otherwise
6607  */
6608 static uint32_t
6609 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6610 			struct rte_flow *flow,
6611 			const struct rte_flow_attr *attr,
6612 			const struct rte_flow_item items[],
6613 			struct mlx5_flow_split_info *flow_split_info,
6614 			struct mlx5_flow_meter_info *fm,
6615 			struct rte_flow_error *error)
6616 {
6617 	struct mlx5_flow *dev_flow = NULL;
6618 	struct rte_flow_attr drop_attr = *attr;
6619 	struct rte_flow_action drop_actions[3];
6620 	struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6621 
6622 	MLX5_ASSERT(fm->drop_cnt);
6623 	drop_actions[0].type =
6624 		(enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6625 	drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6626 	drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6627 	drop_actions[1].conf = NULL;
6628 	drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6629 	drop_actions[2].conf = NULL;
6630 	drop_split_info.external = false;
6631 	drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6632 	drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6633 	drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6634 	return flow_create_split_inner(dev, flow, &dev_flow,
6635 				&drop_attr, items, drop_actions,
6636 				&drop_split_info, error);
6637 }
6638 
6639 /**
6640  * The splitting for meter feature.
6641  *
6642  * - The meter flow will be split to two flows as prefix and
6643  *   suffix flow. The packets make sense only it pass the prefix
6644  *   meter action.
6645  *
6646  * - Reg_C_5 is used for the packet to match betweend prefix and
6647  *   suffix flow.
6648  *
6649  * @param dev
6650  *   Pointer to Ethernet device.
6651  * @param[in] flow
6652  *   Parent flow structure pointer.
6653  * @param[in] attr
6654  *   Flow rule attributes.
6655  * @param[in] items
6656  *   Pattern specification (list terminated by the END pattern item).
6657  * @param[in] actions
6658  *   Associated actions (list terminated by the END action).
6659  * @param[in] flow_split_info
6660  *   Pointer to flow split info structure.
6661  * @param[out] error
6662  *   Perform verbose error reporting if not NULL.
6663  * @return
6664  *   0 on success, negative value otherwise
6665  */
6666 static int
6667 flow_create_split_meter(struct rte_eth_dev *dev,
6668 			struct rte_flow *flow,
6669 			const struct rte_flow_attr *attr,
6670 			const struct rte_flow_item items[],
6671 			const struct rte_flow_action actions[],
6672 			struct mlx5_flow_split_info *flow_split_info,
6673 			struct rte_flow_error *error)
6674 {
6675 	struct mlx5_priv *priv = dev->data->dev_private;
6676 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6677 	struct rte_flow_action *sfx_actions = NULL;
6678 	struct rte_flow_action *pre_actions = NULL;
6679 	struct rte_flow_item *sfx_items = NULL;
6680 	struct mlx5_flow *dev_flow = NULL;
6681 	struct rte_flow_attr sfx_attr = *attr;
6682 	struct mlx5_flow_meter_info *fm = NULL;
6683 	uint8_t skip_scale_restore;
6684 	bool has_mtr = false;
6685 	bool has_modify = false;
6686 	bool set_mtr_reg = true;
6687 	bool is_mtr_hierarchy = false;
6688 	uint32_t meter_id = 0;
6689 	uint32_t mtr_idx = 0;
6690 	uint32_t mtr_flow_id = 0;
6691 	size_t act_size;
6692 	size_t item_size;
6693 	int actions_n = 0;
6694 	int ret = 0;
6695 
6696 	if (priv->mtr_en)
6697 		actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6698 						    &has_modify, &meter_id);
6699 	if (has_mtr) {
6700 		if (flow->meter) {
6701 			fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6702 			if (!fm)
6703 				return rte_flow_error_set(error, EINVAL,
6704 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6705 						NULL, "Meter not found.");
6706 		} else {
6707 			fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6708 			if (!fm)
6709 				return rte_flow_error_set(error, EINVAL,
6710 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6711 						NULL, "Meter not found.");
6712 			ret = mlx5_flow_meter_attach(priv, fm,
6713 						     &sfx_attr, error);
6714 			if (ret)
6715 				return -rte_errno;
6716 			flow->meter = mtr_idx;
6717 		}
6718 		MLX5_ASSERT(wks);
6719 		wks->fm = fm;
6720 		if (!fm->def_policy) {
6721 			wks->policy = mlx5_flow_meter_policy_find(dev,
6722 								  fm->policy_id,
6723 								  NULL);
6724 			MLX5_ASSERT(wks->policy);
6725 			if (wks->policy->mark)
6726 				wks->mark = 1;
6727 			if (wks->policy->is_hierarchy) {
6728 				wks->final_policy =
6729 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
6730 								wks->policy);
6731 				if (!wks->final_policy)
6732 					return rte_flow_error_set(error,
6733 					EINVAL,
6734 					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6735 				"Failed to find terminal policy of hierarchy.");
6736 				is_mtr_hierarchy = true;
6737 			}
6738 		}
6739 		/*
6740 		 * If it isn't default-policy Meter, and
6741 		 * 1. Not meter hierarchy and there's no action in flow to change
6742 		 *    packet (modify/encap/decap etc.), OR
6743 		 * 2. No drop count needed for this meter.
6744 		 * Then no need to use regC to save meter id anymore.
6745 		 */
6746 		if (!fm->def_policy && ((!has_modify && !is_mtr_hierarchy) || !fm->drop_cnt))
6747 			set_mtr_reg = false;
6748 		/* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6749 #define METER_PREFIX_ACTION 7
6750 		act_size = (sizeof(struct rte_flow_action) *
6751 			    (actions_n + METER_PREFIX_ACTION)) +
6752 			   sizeof(struct mlx5_rte_flow_action_set_tag);
6753 		/* Suffix items: tag, vlan, port id, end. */
6754 #define METER_SUFFIX_ITEM 4
6755 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6756 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6757 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6758 					  0, SOCKET_ID_ANY);
6759 		if (!sfx_actions)
6760 			return rte_flow_error_set(error, ENOMEM,
6761 						  RTE_FLOW_ERROR_TYPE_ACTION,
6762 						  NULL, "no memory to split "
6763 						  "meter flow");
6764 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6765 			     act_size);
6766 		/* There's no suffix flow for meter of non-default policy. */
6767 		if (!fm->def_policy)
6768 			pre_actions = sfx_actions + 1;
6769 		else
6770 			pre_actions = sfx_actions + actions_n;
6771 		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6772 					    items, sfx_items, actions,
6773 					    sfx_actions, pre_actions,
6774 					    (set_mtr_reg ? &mtr_flow_id : NULL),
6775 					    error);
6776 		if (ret) {
6777 			ret = -rte_errno;
6778 			goto exit;
6779 		}
6780 		/* Add the prefix subflow. */
6781 		skip_scale_restore = flow_split_info->skip_scale;
6782 		flow_split_info->skip_scale |=
6783 			1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6784 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6785 					      attr, items, pre_actions,
6786 					      flow_split_info, error);
6787 		flow_split_info->skip_scale = skip_scale_restore;
6788 		if (ret) {
6789 			if (mtr_flow_id)
6790 				mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6791 			ret = -rte_errno;
6792 			goto exit;
6793 		}
6794 		if (mtr_flow_id) {
6795 			dev_flow->handle->split_flow_id = mtr_flow_id;
6796 			dev_flow->handle->is_meter_flow_id = 1;
6797 		}
6798 		if (!fm->def_policy) {
6799 			if (!set_mtr_reg && fm->drop_cnt)
6800 				ret =
6801 			flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6802 							&sfx_attr, items,
6803 							flow_split_info,
6804 							fm, error);
6805 			goto exit;
6806 		}
6807 		/* Setting the sfx group atrr. */
6808 		sfx_attr.group = sfx_attr.transfer ?
6809 				(MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6810 				 MLX5_FLOW_TABLE_LEVEL_METER;
6811 		flow_split_info->prefix_layers =
6812 				flow_get_prefix_layer_flags(dev_flow);
6813 		flow_split_info->prefix_mark |= wks->mark;
6814 		flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6815 	}
6816 	/* Add the prefix subflow. */
6817 	ret = flow_create_split_metadata(dev, flow,
6818 					 &sfx_attr, sfx_items ?
6819 					 sfx_items : items,
6820 					 sfx_actions ? sfx_actions : actions,
6821 					 flow_split_info, error);
6822 exit:
6823 	if (sfx_actions)
6824 		mlx5_free(sfx_actions);
6825 	return ret;
6826 }
6827 
6828 /**
6829  * The splitting for sample feature.
6830  *
6831  * Once Sample action is detected in the action list, the flow actions should
6832  * be split into prefix sub flow and suffix sub flow.
6833  *
6834  * The original items remain in the prefix sub flow, all actions preceding the
6835  * sample action and the sample action itself will be copied to the prefix
6836  * sub flow, the actions following the sample action will be copied to the
6837  * suffix sub flow, Queue action always be located in the suffix sub flow.
6838  *
6839  * In order to make the packet from prefix sub flow matches with suffix sub
6840  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6841  * flow uses tag item with the unique flow id.
6842  *
6843  * @param dev
6844  *   Pointer to Ethernet device.
6845  * @param[in] flow
6846  *   Parent flow structure pointer.
6847  * @param[in] attr
6848  *   Flow rule attributes.
6849  * @param[in] items
6850  *   Pattern specification (list terminated by the END pattern item).
6851  * @param[in] actions
6852  *   Associated actions (list terminated by the END action).
6853  * @param[in] flow_split_info
6854  *   Pointer to flow split info structure.
6855  * @param[out] error
6856  *   Perform verbose error reporting if not NULL.
6857  * @return
6858  *   0 on success, negative value otherwise
6859  */
6860 static int
6861 flow_create_split_sample(struct rte_eth_dev *dev,
6862 			 struct rte_flow *flow,
6863 			 const struct rte_flow_attr *attr,
6864 			 const struct rte_flow_item items[],
6865 			 const struct rte_flow_action actions[],
6866 			 struct mlx5_flow_split_info *flow_split_info,
6867 			 struct rte_flow_error *error)
6868 {
6869 	struct mlx5_priv *priv = dev->data->dev_private;
6870 	struct rte_flow_action *sfx_actions = NULL;
6871 	struct rte_flow_action *pre_actions = NULL;
6872 	struct rte_flow_item *sfx_items = NULL;
6873 	struct mlx5_flow *dev_flow = NULL;
6874 	struct rte_flow_attr sfx_attr = *attr;
6875 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6876 	struct mlx5_flow_dv_sample_resource *sample_res;
6877 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6878 	struct mlx5_flow_tbl_resource *sfx_tbl;
6879 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6880 #endif
6881 	size_t act_size;
6882 	size_t item_size;
6883 	uint32_t fdb_tx = 0;
6884 	int32_t tag_id = 0;
6885 	int actions_n = 0;
6886 	int sample_action_pos;
6887 	int qrss_action_pos;
6888 	int add_tag = 0;
6889 	int modify_after_mirror = 0;
6890 	uint16_t jump_table = 0;
6891 	const uint32_t next_ft_step = 1;
6892 	int ret = 0;
6893 	struct mlx5_priv *item_port_priv = NULL;
6894 	const struct rte_flow_item *item;
6895 
6896 	if (priv->sampler_en)
6897 		actions_n = flow_check_match_action(actions, attr,
6898 					RTE_FLOW_ACTION_TYPE_SAMPLE,
6899 					&sample_action_pos, &qrss_action_pos,
6900 					&modify_after_mirror);
6901 	if (actions_n) {
6902 		/* The prefix actions must includes sample, tag, end. */
6903 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6904 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
6905 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6906 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6907 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6908 					  item_size), 0, SOCKET_ID_ANY);
6909 		if (!sfx_actions)
6910 			return rte_flow_error_set(error, ENOMEM,
6911 						  RTE_FLOW_ERROR_TYPE_ACTION,
6912 						  NULL, "no memory to split "
6913 						  "sample flow");
6914 		for (item = items; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
6915 			if (item->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
6916 				const struct rte_flow_item_port_id *spec;
6917 
6918 				spec = (const struct rte_flow_item_port_id *)item->spec;
6919 				if (spec)
6920 					item_port_priv =
6921 						mlx5_port_to_eswitch_info(spec->id, true);
6922 				break;
6923 			} else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
6924 				const struct rte_flow_item_ethdev *spec;
6925 
6926 				spec = (const struct rte_flow_item_ethdev *)item->spec;
6927 				if (spec)
6928 					item_port_priv =
6929 						mlx5_port_to_eswitch_info(spec->port_id, true);
6930 				break;
6931 			} else if (item->type == RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR) {
6932 				const struct rte_flow_item_ethdev *spec;
6933 
6934 				spec = (const struct rte_flow_item_ethdev *)item->spec;
6935 				if (spec)
6936 					item_port_priv =
6937 						mlx5_port_to_eswitch_info(spec->port_id, true);
6938 				break;
6939 			}
6940 		}
6941 		/* The representor_id is UINT16_MAX for uplink. */
6942 		fdb_tx = (attr->transfer &&
6943 			  flow_source_vport_representor(priv, item_port_priv));
6944 		/*
6945 		 * When reg_c_preserve is set, metadata registers Cx preserve
6946 		 * their value even through packet duplication.
6947 		 */
6948 		add_tag = (!fdb_tx ||
6949 			   priv->sh->cdev->config.hca_attr.reg_c_preserve);
6950 		if (add_tag)
6951 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6952 					+ act_size);
6953 		if (modify_after_mirror)
6954 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6955 				     next_ft_step;
6956 		pre_actions = sfx_actions + actions_n;
6957 		tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
6958 						actions, sfx_actions,
6959 						pre_actions, actions_n,
6960 						sample_action_pos,
6961 						qrss_action_pos, jump_table,
6962 						error);
6963 		if (tag_id < 0 || (add_tag && !tag_id)) {
6964 			ret = -rte_errno;
6965 			goto exit;
6966 		}
6967 		if (modify_after_mirror)
6968 			flow_split_info->skip_scale =
6969 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6970 		/* Add the prefix subflow. */
6971 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6972 					      items, pre_actions,
6973 					      flow_split_info, error);
6974 		if (ret) {
6975 			ret = -rte_errno;
6976 			goto exit;
6977 		}
6978 		dev_flow->handle->split_flow_id = tag_id;
6979 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6980 		if (!modify_after_mirror) {
6981 			/* Set the sfx group attr. */
6982 			sample_res = (struct mlx5_flow_dv_sample_resource *)
6983 						dev_flow->dv.sample_res;
6984 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
6985 						sample_res->normal_path_tbl;
6986 			sfx_tbl_data = container_of(sfx_tbl,
6987 						struct mlx5_flow_tbl_data_entry,
6988 						tbl);
6989 			sfx_attr.group = sfx_attr.transfer ?
6990 			(sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6991 		} else {
6992 			MLX5_ASSERT(attr->transfer);
6993 			sfx_attr.group = jump_table;
6994 		}
6995 		flow_split_info->prefix_layers =
6996 				flow_get_prefix_layer_flags(dev_flow);
6997 		MLX5_ASSERT(wks);
6998 		flow_split_info->prefix_mark |= wks->mark;
6999 		/* Suffix group level already be scaled with factor, set
7000 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
7001 		 * again in translation.
7002 		 */
7003 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
7004 #endif
7005 	}
7006 	/* Add the suffix subflow. */
7007 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
7008 				      sfx_items ? sfx_items : items,
7009 				      sfx_actions ? sfx_actions : actions,
7010 				      flow_split_info, error);
7011 exit:
7012 	if (sfx_actions)
7013 		mlx5_free(sfx_actions);
7014 	return ret;
7015 }
7016 
7017 /**
7018  * Split the flow to subflow set. The splitters might be linked
7019  * in the chain, like this:
7020  * flow_create_split_outer() calls:
7021  *   flow_create_split_meter() calls:
7022  *     flow_create_split_metadata(meter_subflow_0) calls:
7023  *       flow_create_split_inner(metadata_subflow_0)
7024  *       flow_create_split_inner(metadata_subflow_1)
7025  *       flow_create_split_inner(metadata_subflow_2)
7026  *     flow_create_split_metadata(meter_subflow_1) calls:
7027  *       flow_create_split_inner(metadata_subflow_0)
7028  *       flow_create_split_inner(metadata_subflow_1)
7029  *       flow_create_split_inner(metadata_subflow_2)
7030  *
7031  * This provide flexible way to add new levels of flow splitting.
7032  * The all of successfully created subflows are included to the
7033  * parent flow dev_flow list.
7034  *
7035  * @param dev
7036  *   Pointer to Ethernet device.
7037  * @param[in] flow
7038  *   Parent flow structure pointer.
7039  * @param[in] attr
7040  *   Flow rule attributes.
7041  * @param[in] items
7042  *   Pattern specification (list terminated by the END pattern item).
7043  * @param[in] actions
7044  *   Associated actions (list terminated by the END action).
7045  * @param[in] flow_split_info
7046  *   Pointer to flow split info structure.
7047  * @param[out] error
7048  *   Perform verbose error reporting if not NULL.
7049  * @return
7050  *   0 on success, negative value otherwise
7051  */
7052 static int
7053 flow_create_split_outer(struct rte_eth_dev *dev,
7054 			struct rte_flow *flow,
7055 			const struct rte_flow_attr *attr,
7056 			const struct rte_flow_item items[],
7057 			const struct rte_flow_action actions[],
7058 			struct mlx5_flow_split_info *flow_split_info,
7059 			struct rte_flow_error *error)
7060 {
7061 	int ret;
7062 
7063 	ret = flow_create_split_sample(dev, flow, attr, items,
7064 				       actions, flow_split_info, error);
7065 	MLX5_ASSERT(ret <= 0);
7066 	return ret;
7067 }
7068 
7069 static inline struct mlx5_flow_tunnel *
7070 flow_tunnel_from_rule(const struct mlx5_flow *flow)
7071 {
7072 	struct mlx5_flow_tunnel *tunnel;
7073 
7074 #pragma GCC diagnostic push
7075 #pragma GCC diagnostic ignored "-Wcast-qual"
7076 	tunnel = (typeof(tunnel))flow->tunnel;
7077 #pragma GCC diagnostic pop
7078 
7079 	return tunnel;
7080 }
7081 
7082 /**
7083  * Adjust flow RSS workspace if needed.
7084  *
7085  * @param wks
7086  *   Pointer to thread flow work space.
7087  * @param rss_desc
7088  *   Pointer to RSS descriptor.
7089  * @param[in] nrssq_num
7090  *   New RSS queue number.
7091  *
7092  * @return
7093  *   0 on success, -1 otherwise and rte_errno is set.
7094  */
7095 static int
7096 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
7097 			  struct mlx5_flow_rss_desc *rss_desc,
7098 			  uint32_t nrssq_num)
7099 {
7100 	if (likely(nrssq_num <= wks->rssq_num))
7101 		return 0;
7102 	rss_desc->queue = realloc(rss_desc->queue,
7103 			  sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
7104 	if (!rss_desc->queue) {
7105 		rte_errno = ENOMEM;
7106 		return -1;
7107 	}
7108 	wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
7109 	return 0;
7110 }
7111 
7112 /**
7113  * Create a flow and add it to @p list.
7114  *
7115  * @param dev
7116  *   Pointer to Ethernet device.
7117  * @param list
7118  *   Pointer to a TAILQ flow list. If this parameter NULL,
7119  *   no list insertion occurred, flow is just created,
7120  *   this is caller's responsibility to track the
7121  *   created flow.
7122  * @param[in] attr
7123  *   Flow rule attributes.
7124  * @param[in] items
7125  *   Pattern specification (list terminated by the END pattern item).
7126  * @param[in] actions
7127  *   Associated actions (list terminated by the END action).
7128  * @param[in] external
7129  *   This flow rule is created by request external to PMD.
7130  * @param[out] error
7131  *   Perform verbose error reporting if not NULL.
7132  *
7133  * @return
7134  *   A flow index on success, 0 otherwise and rte_errno is set.
7135  */
7136 static uint32_t
7137 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7138 		 const struct rte_flow_attr *attr,
7139 		 const struct rte_flow_item items[],
7140 		 const struct rte_flow_action original_actions[],
7141 		 bool external, struct rte_flow_error *error)
7142 {
7143 	struct mlx5_priv *priv = dev->data->dev_private;
7144 	struct rte_flow *flow = NULL;
7145 	struct mlx5_flow *dev_flow;
7146 	const struct rte_flow_action_rss *rss = NULL;
7147 	struct mlx5_translated_action_handle
7148 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7149 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7150 	union {
7151 		struct mlx5_flow_expand_rss buf;
7152 		uint8_t buffer[8192];
7153 	} expand_buffer;
7154 	union {
7155 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
7156 		uint8_t buffer[2048];
7157 	} actions_rx;
7158 	union {
7159 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
7160 		uint8_t buffer[2048];
7161 	} actions_hairpin_tx;
7162 	union {
7163 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
7164 		uint8_t buffer[2048];
7165 	} items_tx;
7166 	struct mlx5_rte_flow_item_sq sq_specs[RTE_MAX_QUEUES_PER_PORT];
7167 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
7168 	struct mlx5_flow_rss_desc *rss_desc;
7169 	const struct rte_flow_action *p_actions_rx;
7170 	uint32_t i;
7171 	uint32_t idx = 0;
7172 	int hairpin_flow;
7173 	struct rte_flow_attr attr_tx = { .priority = 0 };
7174 	const struct rte_flow_action *actions;
7175 	struct rte_flow_action *translated_actions = NULL;
7176 	struct mlx5_flow_tunnel *tunnel;
7177 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
7178 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
7179 	struct mlx5_flow_split_info flow_split_info = {
7180 		.external = !!external,
7181 		.skip_scale = 0,
7182 		.flow_idx = 0,
7183 		.prefix_mark = 0,
7184 		.prefix_layers = 0,
7185 		.table_id = 0
7186 	};
7187 	int ret;
7188 
7189 	MLX5_ASSERT(wks);
7190 	rss_desc = &wks->rss_desc;
7191 	ret = flow_action_handles_translate(dev, original_actions,
7192 					    indir_actions,
7193 					    &indir_actions_n,
7194 					    &translated_actions, error);
7195 	if (ret < 0) {
7196 		MLX5_ASSERT(translated_actions == NULL);
7197 		return 0;
7198 	}
7199 	actions = translated_actions ? translated_actions : original_actions;
7200 	p_actions_rx = actions;
7201 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7202 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
7203 				external, hairpin_flow, error);
7204 	if (ret < 0)
7205 		goto error_before_hairpin_split;
7206 	flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
7207 	if (!flow) {
7208 		rte_errno = ENOMEM;
7209 		goto error_before_hairpin_split;
7210 	}
7211 	if (hairpin_flow > 0) {
7212 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
7213 			rte_errno = EINVAL;
7214 			goto error_before_hairpin_split;
7215 		}
7216 		flow_hairpin_split(dev, actions, actions_rx.actions,
7217 				   actions_hairpin_tx.actions, items_tx.items,
7218 				   idx);
7219 		p_actions_rx = actions_rx.actions;
7220 	}
7221 	flow_split_info.flow_idx = idx;
7222 	flow->drv_type = flow_get_drv_type(dev, attr);
7223 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
7224 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
7225 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
7226 	/* RSS Action only works on NIC RX domain */
7227 	if (attr->ingress)
7228 		rss = flow_get_rss_action(dev, p_actions_rx);
7229 	if (rss) {
7230 		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
7231 			return 0;
7232 		/*
7233 		 * The following information is required by
7234 		 * mlx5_flow_hashfields_adjust() in advance.
7235 		 */
7236 		rss_desc->level = rss->level;
7237 		/* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
7238 		rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
7239 	}
7240 	flow->dev_handles = 0;
7241 	if (rss && rss->types) {
7242 		unsigned int graph_root;
7243 
7244 		graph_root = find_graph_root(rss->level);
7245 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
7246 					   items, rss->types,
7247 					   mlx5_support_expansion, graph_root);
7248 		MLX5_ASSERT(ret > 0 &&
7249 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
7250 		if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
7251 			for (i = 0; i < buf->entries; ++i)
7252 				mlx5_dbg__print_pattern(buf->entry[i].pattern);
7253 		}
7254 	} else {
7255 		ret = mlx5_flow_expand_sqn((struct mlx5_flow_expand_sqn *)buf,
7256 					   sizeof(expand_buffer.buffer),
7257 					   items, sq_specs);
7258 		if (ret) {
7259 			rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
7260 					   NULL, "not enough memory for rte_flow");
7261 			goto error;
7262 		}
7263 		if (buf->entries == 0) {
7264 			buf->entries = 1;
7265 			buf->entry[0].pattern = (void *)(uintptr_t)items;
7266 		}
7267 	}
7268 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
7269 						      indir_actions_n);
7270 	for (i = 0; i < buf->entries; ++i) {
7271 		/* Initialize flow split data. */
7272 		flow_split_info.prefix_layers = 0;
7273 		flow_split_info.prefix_mark = 0;
7274 		flow_split_info.skip_scale = 0;
7275 		/*
7276 		 * The splitter may create multiple dev_flows,
7277 		 * depending on configuration. In the simplest
7278 		 * case it just creates unmodified original flow.
7279 		 */
7280 		ret = flow_create_split_outer(dev, flow, attr,
7281 					      buf->entry[i].pattern,
7282 					      p_actions_rx, &flow_split_info,
7283 					      error);
7284 		if (ret < 0)
7285 			goto error;
7286 		if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
7287 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
7288 							   p_actions_rx,
7289 							   idx,
7290 							   wks->flows[0].tunnel,
7291 							   &default_miss_ctx,
7292 							   error);
7293 			if (ret < 0) {
7294 				mlx5_free(default_miss_ctx.queue);
7295 				goto error;
7296 			}
7297 		}
7298 	}
7299 	/* Create the tx flow. */
7300 	if (hairpin_flow) {
7301 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
7302 		attr_tx.ingress = 0;
7303 		attr_tx.egress = 1;
7304 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
7305 					 actions_hairpin_tx.actions,
7306 					 idx, error);
7307 		if (!dev_flow)
7308 			goto error;
7309 		dev_flow->flow = flow;
7310 		dev_flow->external = 0;
7311 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
7312 			      dev_flow->handle, next);
7313 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
7314 					 items_tx.items,
7315 					 actions_hairpin_tx.actions, error);
7316 		if (ret < 0)
7317 			goto error;
7318 	}
7319 	/*
7320 	 * Update the metadata register copy table. If extensive
7321 	 * metadata feature is enabled and registers are supported
7322 	 * we might create the extra rte_flow for each unique
7323 	 * MARK/FLAG action ID.
7324 	 *
7325 	 * The table is updated for ingress and transfer flows only, because
7326 	 * the egress Flows belong to the different device and
7327 	 * copy table should be updated in peer NIC Rx domain.
7328 	 */
7329 	if ((attr->ingress || attr->transfer) &&
7330 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
7331 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
7332 		if (ret)
7333 			goto error;
7334 	}
7335 	/*
7336 	 * If the flow is external (from application) OR device is started,
7337 	 * OR mreg discover, then apply immediately.
7338 	 */
7339 	if (external || dev->data->dev_started ||
7340 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
7341 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
7342 		ret = flow_drv_apply(dev, flow, error);
7343 		if (ret < 0)
7344 			goto error;
7345 	}
7346 	flow->type = type;
7347 	flow_rxq_flags_set(dev, flow);
7348 	rte_free(translated_actions);
7349 	tunnel = flow_tunnel_from_rule(wks->flows);
7350 	if (tunnel) {
7351 		flow->tunnel = 1;
7352 		flow->tunnel_id = tunnel->tunnel_id;
7353 		__atomic_fetch_add(&tunnel->refctn, 1, __ATOMIC_RELAXED);
7354 		mlx5_free(default_miss_ctx.queue);
7355 	}
7356 	mlx5_flow_pop_thread_workspace();
7357 	return idx;
7358 error:
7359 	MLX5_ASSERT(flow);
7360 	ret = rte_errno; /* Save rte_errno before cleanup. */
7361 	flow_mreg_del_copy_action(dev, flow);
7362 	flow_drv_destroy(dev, flow);
7363 	if (rss_desc->shared_rss)
7364 		__atomic_fetch_sub(&((struct mlx5_shared_action_rss *)
7365 			mlx5_ipool_get
7366 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7367 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
7368 	mlx5_ipool_free(priv->flows[type], idx);
7369 	rte_errno = ret; /* Restore rte_errno. */
7370 	ret = rte_errno;
7371 	rte_errno = ret;
7372 error_before_hairpin_split:
7373 	mlx5_flow_pop_thread_workspace();
7374 	rte_free(translated_actions);
7375 	return 0;
7376 }
7377 
7378 /**
7379  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7380  * incoming packets to table 1.
7381  *
7382  * Other flow rules, requested for group n, will be created in
7383  * e-switch table n+1.
7384  * Jump action to e-switch group n will be created to group n+1.
7385  *
7386  * Used when working in switchdev mode, to utilise advantages of table 1
7387  * and above.
7388  *
7389  * @param dev
7390  *   Pointer to Ethernet device.
7391  *
7392  * @return
7393  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7394  */
7395 struct rte_flow *
7396 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7397 {
7398 	const struct rte_flow_attr attr = {
7399 		.group = 0,
7400 		.priority = 0,
7401 		.ingress = 0,
7402 		.egress = 0,
7403 		.transfer = 1,
7404 	};
7405 	const struct rte_flow_item pattern = {
7406 		.type = RTE_FLOW_ITEM_TYPE_END,
7407 	};
7408 	struct rte_flow_action_jump jump = {
7409 		.group = 1,
7410 	};
7411 	const struct rte_flow_action actions[] = {
7412 		{
7413 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7414 			.conf = &jump,
7415 		},
7416 		{
7417 			.type = RTE_FLOW_ACTION_TYPE_END,
7418 		},
7419 	};
7420 	struct rte_flow_error error;
7421 
7422 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7423 						   &attr, &pattern,
7424 						   actions, false, &error);
7425 }
7426 
7427 /**
7428  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7429  * and sq number, directs all packets to peer vport.
7430  *
7431  * @param dev
7432  *   Pointer to Ethernet device.
7433  * @param sq_num
7434  *   SQ number.
7435  *
7436  * @return
7437  *   Flow ID on success, 0 otherwise and rte_errno is set.
7438  */
7439 uint32_t
7440 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sq_num)
7441 {
7442 	struct rte_flow_attr attr = {
7443 		.group = 0,
7444 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7445 		.ingress = 0,
7446 		.egress = 0,
7447 		.transfer = 1,
7448 	};
7449 	struct rte_flow_item_port_id port_spec = {
7450 		.id = MLX5_PORT_ESW_MGR,
7451 	};
7452 	struct mlx5_rte_flow_item_sq sq_spec = {
7453 		.queue = sq_num,
7454 	};
7455 	struct rte_flow_item pattern[] = {
7456 		{
7457 			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7458 			.spec = &port_spec,
7459 		},
7460 		{
7461 			.type = (enum rte_flow_item_type)
7462 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
7463 			.spec = &sq_spec,
7464 		},
7465 		{
7466 			.type = RTE_FLOW_ITEM_TYPE_END,
7467 		},
7468 	};
7469 	struct rte_flow_action_jump jump = {
7470 		.group = 1,
7471 	};
7472 	struct rte_flow_action_port_id port = {
7473 		.id = dev->data->port_id,
7474 	};
7475 	struct rte_flow_action actions[] = {
7476 		{
7477 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7478 			.conf = &jump,
7479 		},
7480 		{
7481 			.type = RTE_FLOW_ACTION_TYPE_END,
7482 		},
7483 	};
7484 	struct rte_flow_error error;
7485 
7486 	/*
7487 	 * Creates group 0, highest priority jump flow.
7488 	 * Matches txq to bypass kernel packets.
7489 	 */
7490 	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7491 			     false, &error) == 0)
7492 		return 0;
7493 	/* Create group 1, lowest priority redirect flow for txq. */
7494 	attr.group = 1;
7495 	actions[0].conf = &port;
7496 	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7497 	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7498 				actions, false, &error);
7499 }
7500 
7501 /**
7502  * Validate a flow supported by the NIC.
7503  *
7504  * @see rte_flow_validate()
7505  * @see rte_flow_ops
7506  */
7507 int
7508 mlx5_flow_validate(struct rte_eth_dev *dev,
7509 		   const struct rte_flow_attr *attr,
7510 		   const struct rte_flow_item items[],
7511 		   const struct rte_flow_action original_actions[],
7512 		   struct rte_flow_error *error)
7513 {
7514 	int hairpin_flow;
7515 	struct mlx5_translated_action_handle
7516 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7517 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7518 	const struct rte_flow_action *actions;
7519 	struct rte_flow_action *translated_actions = NULL;
7520 	int ret = flow_action_handles_translate(dev, original_actions,
7521 						indir_actions,
7522 						&indir_actions_n,
7523 						&translated_actions, error);
7524 
7525 	if (ret)
7526 		return ret;
7527 	actions = translated_actions ? translated_actions : original_actions;
7528 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7529 	ret = flow_drv_validate(dev, attr, items, actions,
7530 				true, hairpin_flow, error);
7531 	rte_free(translated_actions);
7532 	return ret;
7533 }
7534 
7535 static int
7536 mlx5_flow_cache_flow_info(struct rte_eth_dev *dev,
7537 			  const struct rte_flow_attr *attr,
7538 			  const uint32_t orig_prio,
7539 			  const struct rte_flow_item *items,
7540 			  const struct rte_flow_action *actions,
7541 			  uint32_t flow_idx)
7542 {
7543 	struct mlx5_priv *priv = dev->data->dev_private;
7544 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
7545 	struct mlx5_dv_flow_info *flow_info, *tmp_info;
7546 	struct rte_flow_error error;
7547 	int len, ret;
7548 
7549 	flow_info = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*flow_info), 0, SOCKET_ID_ANY);
7550 	if (!flow_info) {
7551 		DRV_LOG(ERR, "No enough memory for flow_info caching.");
7552 		return -1;
7553 	}
7554 	flow_info->orig_prio = orig_prio;
7555 	flow_info->attr = *attr;
7556 	/* Standby mode rule awlays saves it in low priority entry. */
7557 	flow_info->flow_idx_low_prio = flow_idx;
7558 
7559 	/* Store matching items. */
7560 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_PATTERN, NULL, 0, items, &error);
7561 	if (ret <= 0) {
7562 		DRV_LOG(ERR, "Can't get items length.");
7563 		goto end;
7564 	}
7565 	len = RTE_ALIGN(ret, 16);
7566 	flow_info->items = mlx5_malloc(MLX5_MEM_ZERO, len, 0, SOCKET_ID_ANY);
7567 	if (!flow_info->items) {
7568 		DRV_LOG(ERR, "No enough memory for items caching.");
7569 		goto end;
7570 	}
7571 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_PATTERN, flow_info->items, ret, items, &error);
7572 	if (ret <= 0) {
7573 		DRV_LOG(ERR, "Can't duplicate items.");
7574 		goto end;
7575 	}
7576 
7577 	/* Store flow actions. */
7578 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_ACTIONS, NULL, 0, actions, &error);
7579 	if (ret <= 0) {
7580 		DRV_LOG(ERR, "Can't get actions length.");
7581 		goto end;
7582 	}
7583 	len = RTE_ALIGN(ret, 16);
7584 	flow_info->actions = mlx5_malloc(MLX5_MEM_ZERO, len, 0, SOCKET_ID_ANY);
7585 	if (!flow_info->actions) {
7586 		DRV_LOG(ERR, "No enough memory for actions caching.");
7587 		goto end;
7588 	}
7589 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_ACTIONS, flow_info->actions, ret, actions, &error);
7590 	if (ret <= 0) {
7591 		DRV_LOG(ERR, "Can't duplicate actions.");
7592 		goto end;
7593 	}
7594 
7595 	/* Insert to the list end. */
7596 	if (LIST_EMPTY(&mode_info->hot_upgrade)) {
7597 		LIST_INSERT_HEAD(&mode_info->hot_upgrade, flow_info,  next);
7598 	} else {
7599 		tmp_info = LIST_FIRST(&mode_info->hot_upgrade);
7600 		while (LIST_NEXT(tmp_info, next))
7601 			tmp_info = LIST_NEXT(tmp_info, next);
7602 		LIST_INSERT_AFTER(tmp_info, flow_info, next);
7603 	}
7604 	return 0;
7605 end:
7606 	if (flow_info->items)
7607 		mlx5_free(flow_info->items);
7608 	if (flow_info->actions)
7609 		mlx5_free(flow_info->actions);
7610 	mlx5_free(flow_info);
7611 	return -1;
7612 }
7613 
7614 static int
7615 mlx5_flow_cache_flow_toggle(struct rte_eth_dev *dev, bool orig_prio)
7616 {
7617 	struct mlx5_priv *priv = dev->data->dev_private;
7618 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
7619 	struct mlx5_dv_flow_info *flow_info;
7620 	struct rte_flow_attr attr;
7621 	struct rte_flow_error error;
7622 	struct rte_flow *high, *low;
7623 
7624 	flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7625 	while (flow_info) {
7626 		/* DUP flow may have the same priority. */
7627 		if (flow_info->orig_prio != flow_info->attr.priority) {
7628 			attr = flow_info->attr;
7629 			if (orig_prio)
7630 				attr.priority = flow_info->orig_prio;
7631 			flow_info->flow_idx_high_prio = flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7632 					&attr, flow_info->items, flow_info->actions,
7633 					true, &error);
7634 			if (!flow_info->flow_idx_high_prio) {
7635 				DRV_LOG(ERR, "Priority toggle failed internally.");
7636 				goto err;
7637 			}
7638 		}
7639 		flow_info = LIST_NEXT(flow_info, next);
7640 	}
7641 	/* Delete the low priority rules and swap the flow handle. */
7642 	flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7643 	while (flow_info) {
7644 		MLX5_ASSERT(flow_info->flow_idx_low_prio);
7645 		if (flow_info->orig_prio != flow_info->attr.priority) {
7646 			high = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7647 					flow_info->flow_idx_high_prio);
7648 			low = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7649 					flow_info->flow_idx_low_prio);
7650 			if (high && low) {
7651 				RTE_SWAP(*low, *high);
7652 				flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7653 						  flow_info->flow_idx_low_prio);
7654 				flow_info->flow_idx_high_prio = 0;
7655 			}
7656 		}
7657 		flow_info = LIST_NEXT(flow_info, next);
7658 	}
7659 	return 0;
7660 err:
7661 	/* Destroy preceding successful high priority rules. */
7662 	flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7663 	while (flow_info) {
7664 		if (flow_info->orig_prio != flow_info->attr.priority) {
7665 			if (flow_info->flow_idx_high_prio)
7666 				flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7667 						  flow_info->flow_idx_high_prio);
7668 			else
7669 				break;
7670 			flow_info->flow_idx_high_prio = 0;
7671 		}
7672 		flow_info = LIST_NEXT(flow_info, next);
7673 	}
7674 	return -1;
7675 }
7676 
7677 /**
7678  * Set the mode of the flow engine of a process to active or standby during live migration.
7679  *
7680  * @param[in] mode
7681  *   MLX5 flow engine mode, @see `enum mlx5_flow_engine_mode`.
7682  * @param[in] flags
7683  *   Flow engine mode specific flags.
7684  *
7685  * @return
7686  *   Negative value on error, positive on success.
7687  */
7688 int
7689 rte_pmd_mlx5_flow_engine_set_mode(enum mlx5_flow_engine_mode mode, uint32_t flags)
7690 {
7691 	struct mlx5_priv *priv;
7692 	struct mlx5_flow_engine_mode_info *mode_info;
7693 	struct mlx5_dv_flow_info *flow_info, *tmp_info;
7694 	uint16_t port, port_id;
7695 	uint16_t toggle_num = 0;
7696 	struct rte_eth_dev *dev;
7697 	enum mlx5_flow_engine_mode orig_mode;
7698 	uint32_t orig_flags;
7699 	bool need_toggle = false;
7700 
7701 	/* Check if flags combinations are supported. */
7702 	if (flags && flags != MLX5_FLOW_ENGINE_FLAG_STANDBY_DUP_INGRESS) {
7703 		DRV_LOG(ERR, "Doesn't support such flags %u", flags);
7704 		return -1;
7705 	}
7706 	MLX5_ETH_FOREACH_DEV(port, NULL) {
7707 		dev = &rte_eth_devices[port];
7708 		priv = dev->data->dev_private;
7709 		mode_info = &priv->mode_info;
7710 		/* No mode change. Assume all devices hold the same mode. */
7711 		if (mode_info->mode == mode) {
7712 			DRV_LOG(INFO, "Process flow engine has been in mode %u", mode);
7713 			if (mode_info->mode_flag != flags && !LIST_EMPTY(&mode_info->hot_upgrade)) {
7714 				DRV_LOG(ERR, "Port %u has rule cache with different flag %u\n",
7715 						port, mode_info->mode_flag);
7716 				orig_mode = mode_info->mode;
7717 				orig_flags = mode_info->mode_flag;
7718 				goto err;
7719 			}
7720 			mode_info->mode_flag = flags;
7721 			toggle_num++;
7722 			continue;
7723 		}
7724 		/* Active -> standby. */
7725 		if (mode == MLX5_FLOW_ENGINE_MODE_STANDBY) {
7726 			if (!LIST_EMPTY(&mode_info->hot_upgrade)) {
7727 				DRV_LOG(ERR, "Cached rule existed");
7728 				orig_mode = mode_info->mode;
7729 				orig_flags = mode_info->mode_flag;
7730 				goto err;
7731 			}
7732 			mode_info->mode_flag = flags;
7733 			mode_info->mode = mode;
7734 			toggle_num++;
7735 		/* Standby -> active. */
7736 		} else if (mode == MLX5_FLOW_ENGINE_MODE_ACTIVE) {
7737 			if (LIST_EMPTY(&mode_info->hot_upgrade)) {
7738 				DRV_LOG(INFO, "No cached rule existed");
7739 			} else {
7740 				if (mlx5_flow_cache_flow_toggle(dev, true)) {
7741 					orig_mode = mode_info->mode;
7742 					orig_flags = mode_info->mode_flag;
7743 					need_toggle = true;
7744 					goto err;
7745 				}
7746 			}
7747 			toggle_num++;
7748 		}
7749 	}
7750 	if (mode == MLX5_FLOW_ENGINE_MODE_ACTIVE) {
7751 		/* Clear cache flow rules. */
7752 		MLX5_ETH_FOREACH_DEV(port, NULL) {
7753 			priv = rte_eth_devices[port].data->dev_private;
7754 			mode_info = &priv->mode_info;
7755 			flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7756 			while (flow_info) {
7757 				tmp_info = LIST_NEXT(flow_info, next);
7758 				LIST_REMOVE(flow_info, next);
7759 				mlx5_free(flow_info->actions);
7760 				mlx5_free(flow_info->items);
7761 				mlx5_free(flow_info);
7762 				flow_info = tmp_info;
7763 			}
7764 			MLX5_ASSERT(LIST_EMPTY(&mode_info->hot_upgrade));
7765 		}
7766 	}
7767 	return toggle_num;
7768 err:
7769 	/* Rollback all preceding successful ports. */
7770 	MLX5_ETH_FOREACH_DEV(port_id, NULL) {
7771 		if (port_id == port)
7772 			break;
7773 		priv = rte_eth_devices[port_id].data->dev_private;
7774 		mode_info = &priv->mode_info;
7775 		if (need_toggle && !LIST_EMPTY(&mode_info->hot_upgrade) &&
7776 		    mlx5_flow_cache_flow_toggle(dev, false))
7777 			return -EPERM;
7778 		mode_info->mode = orig_mode;
7779 		mode_info->mode_flag = orig_flags;
7780 	}
7781 	return -EINVAL;
7782 }
7783 /**
7784  * Create a flow.
7785  *
7786  * @see rte_flow_create()
7787  * @see rte_flow_ops
7788  */
7789 struct rte_flow *
7790 mlx5_flow_create(struct rte_eth_dev *dev,
7791 		 const struct rte_flow_attr *attr,
7792 		 const struct rte_flow_item items[],
7793 		 const struct rte_flow_action actions[],
7794 		 struct rte_flow_error *error)
7795 {
7796 	struct mlx5_priv *priv = dev->data->dev_private;
7797 	struct rte_flow_attr *new_attr = (void *)(uintptr_t)attr;
7798 	uint32_t prio = attr->priority;
7799 	uint32_t flow_idx;
7800 
7801 	if (priv->sh->config.dv_flow_en == 2) {
7802 		rte_flow_error_set(error, ENOTSUP,
7803 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7804 			  NULL,
7805 			  "Flow non-Q creation not supported");
7806 		return NULL;
7807 	}
7808 	/*
7809 	 * If the device is not started yet, it is not allowed to created a
7810 	 * flow from application. PMD default flows and traffic control flows
7811 	 * are not affected.
7812 	 */
7813 	if (unlikely(!dev->data->dev_started)) {
7814 		DRV_LOG(DEBUG, "port %u is not started when "
7815 			"inserting a flow", dev->data->port_id);
7816 		rte_flow_error_set(error, ENODEV,
7817 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7818 				   NULL,
7819 				   "port not started");
7820 		return NULL;
7821 	}
7822 	if (unlikely(mlx5_need_cache_flow(priv, attr))) {
7823 		if (attr->transfer ||
7824 		    (attr->ingress &&
7825 		    !(priv->mode_info.mode_flag & MLX5_FLOW_ENGINE_FLAG_STANDBY_DUP_INGRESS)))
7826 			new_attr->priority += 1;
7827 	}
7828 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, attr, items, actions, true, error);
7829 	if (!flow_idx)
7830 		return NULL;
7831 	if (unlikely(mlx5_need_cache_flow(priv, attr))) {
7832 		if (mlx5_flow_cache_flow_info(dev, attr, prio, items, actions, flow_idx)) {
7833 			flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
7834 			flow_idx = 0;
7835 		}
7836 	}
7837 	return (void *)(uintptr_t)flow_idx;
7838 }
7839 
7840 /**
7841  * Destroy a flow in a list.
7842  *
7843  * @param dev
7844  *   Pointer to Ethernet device.
7845  * @param[in] flow_idx
7846  *   Index of flow to destroy.
7847  */
7848 static void
7849 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7850 		  uint32_t flow_idx)
7851 {
7852 	struct mlx5_priv *priv = dev->data->dev_private;
7853 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7854 
7855 	if (!flow)
7856 		return;
7857 	MLX5_ASSERT(flow->type == type);
7858 	/*
7859 	 * Update RX queue flags only if port is started, otherwise it is
7860 	 * already clean.
7861 	 */
7862 	if (dev->data->dev_started)
7863 		flow_rxq_flags_trim(dev, flow);
7864 	flow_drv_destroy(dev, flow);
7865 	if (flow->tunnel) {
7866 		struct mlx5_flow_tunnel *tunnel;
7867 
7868 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7869 		RTE_VERIFY(tunnel);
7870 		if (!(__atomic_fetch_sub(&tunnel->refctn, 1, __ATOMIC_RELAXED) - 1))
7871 			mlx5_flow_tunnel_free(dev, tunnel);
7872 	}
7873 	flow_mreg_del_copy_action(dev, flow);
7874 	mlx5_ipool_free(priv->flows[type], flow_idx);
7875 }
7876 
7877 /**
7878  * Destroy all flows.
7879  *
7880  * @param dev
7881  *   Pointer to Ethernet device.
7882  * @param type
7883  *   Flow type to be flushed.
7884  * @param active
7885  *   If flushing is called actively.
7886  */
7887 void
7888 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7889 		     bool active)
7890 {
7891 	struct mlx5_priv *priv = dev->data->dev_private;
7892 	uint32_t num_flushed = 0, fidx = 1;
7893 	struct rte_flow *flow;
7894 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
7895 	struct mlx5_dv_flow_info *flow_info;
7896 
7897 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7898 	if (priv->sh->config.dv_flow_en == 2 &&
7899 	    type == MLX5_FLOW_TYPE_GEN) {
7900 		flow_hw_q_flow_flush(dev, NULL);
7901 		return;
7902 	}
7903 #endif
7904 
7905 	MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7906 		flow_list_destroy(dev, type, fidx);
7907 		if (unlikely(mlx5_need_cache_flow(priv, NULL) && type == MLX5_FLOW_TYPE_GEN)) {
7908 			flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7909 			while (flow_info) {
7910 				/* Romove the cache flow info. */
7911 				if (flow_info->flow_idx_low_prio == (uint32_t)(uintptr_t)fidx) {
7912 					MLX5_ASSERT(!flow_info->flow_idx_high_prio);
7913 					LIST_REMOVE(flow_info, next);
7914 					mlx5_free(flow_info->items);
7915 					mlx5_free(flow_info->actions);
7916 					mlx5_free(flow_info);
7917 					break;
7918 				}
7919 				flow_info = LIST_NEXT(flow_info, next);
7920 			}
7921 		}
7922 		num_flushed++;
7923 	}
7924 	if (active) {
7925 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7926 			dev->data->port_id, num_flushed);
7927 	}
7928 }
7929 
7930 /**
7931  * Stop all default actions for flows.
7932  *
7933  * @param dev
7934  *   Pointer to Ethernet device.
7935  */
7936 void
7937 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7938 {
7939 	flow_mreg_del_default_copy_action(dev);
7940 	flow_rxq_flags_clear(dev);
7941 }
7942 
7943 /**
7944  * Set rxq flag.
7945  *
7946  * @param[in] dev
7947  *   Pointer to the rte_eth_dev structure.
7948  * @param[in] enable
7949  *   Flag to enable or not.
7950  */
7951 void
7952 flow_hw_rxq_flag_set(struct rte_eth_dev *dev, bool enable)
7953 {
7954 	struct mlx5_priv *priv = dev->data->dev_private;
7955 	unsigned int i;
7956 
7957 	if ((!priv->mark_enabled && !enable) ||
7958 	    (priv->mark_enabled && enable))
7959 		return;
7960 	for (i = 0; i < priv->rxqs_n; ++i) {
7961 		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i);
7962 
7963 		/* With RXQ start/stop feature, RXQ might be stopped. */
7964 		if (!rxq_ctrl)
7965 			continue;
7966 		rxq_ctrl->rxq.mark = enable;
7967 	}
7968 	priv->mark_enabled = enable;
7969 }
7970 
7971 /**
7972  * Start all default actions for flows.
7973  *
7974  * @param dev
7975  *   Pointer to Ethernet device.
7976  * @return
7977  *   0 on success, a negative errno value otherwise and rte_errno is set.
7978  */
7979 int
7980 mlx5_flow_start_default(struct rte_eth_dev *dev)
7981 {
7982 	struct rte_flow_error error;
7983 
7984 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7985 	return flow_mreg_add_default_copy_action(dev, &error);
7986 }
7987 
7988 /**
7989  * Release key of thread specific flow workspace data.
7990  */
7991 void
7992 flow_release_workspace(void *data)
7993 {
7994 	struct mlx5_flow_workspace *wks = data;
7995 	struct mlx5_flow_workspace *next;
7996 
7997 	while (wks) {
7998 		next = wks->next;
7999 		free(wks->rss_desc.queue);
8000 		free(wks);
8001 		wks = next;
8002 	}
8003 }
8004 
8005 /**
8006  * Get thread specific current flow workspace.
8007  *
8008  * @return pointer to thread specific flow workspace data, NULL on error.
8009  */
8010 struct mlx5_flow_workspace*
8011 mlx5_flow_get_thread_workspace(void)
8012 {
8013 	struct mlx5_flow_workspace *data;
8014 
8015 	data = mlx5_flow_os_get_specific_workspace();
8016 	MLX5_ASSERT(data && data->inuse);
8017 	if (!data || !data->inuse)
8018 		DRV_LOG(ERR, "flow workspace not initialized.");
8019 	return data;
8020 }
8021 
8022 /**
8023  * Allocate and init new flow workspace.
8024  *
8025  * @return pointer to flow workspace data, NULL on error.
8026  */
8027 static struct mlx5_flow_workspace*
8028 flow_alloc_thread_workspace(void)
8029 {
8030 	struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
8031 
8032 	if (!data) {
8033 		DRV_LOG(ERR, "Failed to allocate flow workspace "
8034 			"memory.");
8035 		return NULL;
8036 	}
8037 	data->rss_desc.queue = calloc(1,
8038 			sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
8039 	if (!data->rss_desc.queue)
8040 		goto err;
8041 	data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
8042 	return data;
8043 err:
8044 	free(data->rss_desc.queue);
8045 	free(data);
8046 	return NULL;
8047 }
8048 
8049 /**
8050  * Get new thread specific flow workspace.
8051  *
8052  * If current workspace inuse, create new one and set as current.
8053  *
8054  * @return pointer to thread specific flow workspace data, NULL on error.
8055  */
8056 struct mlx5_flow_workspace*
8057 mlx5_flow_push_thread_workspace(void)
8058 {
8059 	struct mlx5_flow_workspace *curr;
8060 	struct mlx5_flow_workspace *data;
8061 
8062 	curr = mlx5_flow_os_get_specific_workspace();
8063 	if (!curr) {
8064 		data = flow_alloc_thread_workspace();
8065 		if (!data)
8066 			return NULL;
8067 	} else if (!curr->inuse) {
8068 		data = curr;
8069 	} else if (curr->next) {
8070 		data = curr->next;
8071 	} else {
8072 		data = flow_alloc_thread_workspace();
8073 		if (!data)
8074 			return NULL;
8075 		curr->next = data;
8076 		data->prev = curr;
8077 	}
8078 	data->inuse = 1;
8079 	data->flow_idx = 0;
8080 	/* Set as current workspace */
8081 	if (mlx5_flow_os_set_specific_workspace(data))
8082 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
8083 	return data;
8084 }
8085 
8086 /**
8087  * Close current thread specific flow workspace.
8088  *
8089  * If previous workspace available, set it as current.
8090  *
8091  * @return pointer to thread specific flow workspace data, NULL on error.
8092  */
8093 void
8094 mlx5_flow_pop_thread_workspace(void)
8095 {
8096 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
8097 
8098 	if (!data)
8099 		return;
8100 	if (!data->inuse) {
8101 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
8102 		return;
8103 	}
8104 	data->inuse = 0;
8105 	if (!data->prev)
8106 		return;
8107 	if (mlx5_flow_os_set_specific_workspace(data->prev))
8108 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
8109 }
8110 
8111 /**
8112  * Verify the flow list is empty
8113  *
8114  * @param dev
8115  *  Pointer to Ethernet device.
8116  *
8117  * @return the number of flows not released.
8118  */
8119 int
8120 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
8121 {
8122 	struct mlx5_priv *priv = dev->data->dev_private;
8123 	struct rte_flow *flow;
8124 	uint32_t idx = 0;
8125 	int ret = 0, i;
8126 
8127 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
8128 		MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
8129 			DRV_LOG(DEBUG, "port %u flow %p still referenced",
8130 				dev->data->port_id, (void *)flow);
8131 			ret++;
8132 		}
8133 	}
8134 	return ret;
8135 }
8136 
8137 /**
8138  * Enable default hairpin egress flow.
8139  *
8140  * @param dev
8141  *   Pointer to Ethernet device.
8142  * @param sq_num
8143  *   The SQ hw number.
8144  *
8145  * @return
8146  *   0 on success, a negative errno value otherwise and rte_errno is set.
8147  */
8148 int
8149 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
8150 			    uint32_t sq_num)
8151 {
8152 	const struct rte_flow_attr attr = {
8153 		.egress = 1,
8154 		.priority = 0,
8155 	};
8156 	struct mlx5_rte_flow_item_sq queue_spec = {
8157 		.queue = sq_num,
8158 	};
8159 	struct mlx5_rte_flow_item_sq queue_mask = {
8160 		.queue = UINT32_MAX,
8161 	};
8162 	struct rte_flow_item items[] = {
8163 		{
8164 			.type = (enum rte_flow_item_type)
8165 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
8166 			.spec = &queue_spec,
8167 			.last = NULL,
8168 			.mask = &queue_mask,
8169 		},
8170 		{
8171 			.type = RTE_FLOW_ITEM_TYPE_END,
8172 		},
8173 	};
8174 	struct rte_flow_action_jump jump = {
8175 		.group = MLX5_HAIRPIN_TX_TABLE,
8176 	};
8177 	struct rte_flow_action actions[2];
8178 	uint32_t flow_idx;
8179 	struct rte_flow_error error;
8180 
8181 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
8182 	actions[0].conf = &jump;
8183 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
8184 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
8185 				    &attr, items, actions, false, &error);
8186 	if (!flow_idx) {
8187 		DRV_LOG(DEBUG,
8188 			"Failed to create ctrl flow: rte_errno(%d),"
8189 			" type(%d), message(%s)",
8190 			rte_errno, error.type,
8191 			error.message ? error.message : " (no stated reason)");
8192 		return -rte_errno;
8193 	}
8194 	return 0;
8195 }
8196 
8197 /**
8198  * Enable a control flow configured from the control plane.
8199  *
8200  * @param dev
8201  *   Pointer to Ethernet device.
8202  * @param eth_spec
8203  *   An Ethernet flow spec to apply.
8204  * @param eth_mask
8205  *   An Ethernet flow mask to apply.
8206  * @param vlan_spec
8207  *   A VLAN flow spec to apply.
8208  * @param vlan_mask
8209  *   A VLAN flow mask to apply.
8210  *
8211  * @return
8212  *   0 on success, a negative errno value otherwise and rte_errno is set.
8213  */
8214 int
8215 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
8216 		    struct rte_flow_item_eth *eth_spec,
8217 		    struct rte_flow_item_eth *eth_mask,
8218 		    struct rte_flow_item_vlan *vlan_spec,
8219 		    struct rte_flow_item_vlan *vlan_mask)
8220 {
8221 	struct mlx5_priv *priv = dev->data->dev_private;
8222 	const struct rte_flow_attr attr = {
8223 		.ingress = 1,
8224 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
8225 	};
8226 	struct rte_flow_item items[] = {
8227 		{
8228 			.type = RTE_FLOW_ITEM_TYPE_ETH,
8229 			.spec = eth_spec,
8230 			.last = NULL,
8231 			.mask = eth_mask,
8232 		},
8233 		{
8234 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
8235 					      RTE_FLOW_ITEM_TYPE_END,
8236 			.spec = vlan_spec,
8237 			.last = NULL,
8238 			.mask = vlan_mask,
8239 		},
8240 		{
8241 			.type = RTE_FLOW_ITEM_TYPE_END,
8242 		},
8243 	};
8244 	uint16_t queue[priv->reta_idx_n];
8245 	struct rte_flow_action_rss action_rss = {
8246 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
8247 		.level = 0,
8248 		.types = priv->rss_conf.rss_hf,
8249 		.key_len = priv->rss_conf.rss_key_len,
8250 		.queue_num = priv->reta_idx_n,
8251 		.key = priv->rss_conf.rss_key,
8252 		.queue = queue,
8253 	};
8254 	struct rte_flow_action actions[] = {
8255 		{
8256 			.type = RTE_FLOW_ACTION_TYPE_RSS,
8257 			.conf = &action_rss,
8258 		},
8259 		{
8260 			.type = RTE_FLOW_ACTION_TYPE_END,
8261 		},
8262 	};
8263 	uint32_t flow_idx;
8264 	struct rte_flow_error error;
8265 	unsigned int i;
8266 
8267 	if (!priv->reta_idx_n || !priv->rxqs_n) {
8268 		return 0;
8269 	}
8270 	if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
8271 		action_rss.types = 0;
8272 	for (i = 0; i != priv->reta_idx_n; ++i)
8273 		queue[i] = (*priv->reta_idx)[i];
8274 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
8275 				    &attr, items, actions, false, &error);
8276 	if (!flow_idx)
8277 		return -rte_errno;
8278 	return 0;
8279 }
8280 
8281 /**
8282  * Enable a flow control configured from the control plane.
8283  *
8284  * @param dev
8285  *   Pointer to Ethernet device.
8286  * @param eth_spec
8287  *   An Ethernet flow spec to apply.
8288  * @param eth_mask
8289  *   An Ethernet flow mask to apply.
8290  *
8291  * @return
8292  *   0 on success, a negative errno value otherwise and rte_errno is set.
8293  */
8294 int
8295 mlx5_ctrl_flow(struct rte_eth_dev *dev,
8296 	       struct rte_flow_item_eth *eth_spec,
8297 	       struct rte_flow_item_eth *eth_mask)
8298 {
8299 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
8300 }
8301 
8302 /**
8303  * Create default miss flow rule matching lacp traffic
8304  *
8305  * @param dev
8306  *   Pointer to Ethernet device.
8307  * @param eth_spec
8308  *   An Ethernet flow spec to apply.
8309  *
8310  * @return
8311  *   0 on success, a negative errno value otherwise and rte_errno is set.
8312  */
8313 int
8314 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
8315 {
8316 	/*
8317 	 * The LACP matching is done by only using ether type since using
8318 	 * a multicast dst mac causes kernel to give low priority to this flow.
8319 	 */
8320 	static const struct rte_flow_item_eth lacp_spec = {
8321 		.hdr.ether_type = RTE_BE16(0x8809),
8322 	};
8323 	static const struct rte_flow_item_eth lacp_mask = {
8324 		.hdr.ether_type = 0xffff,
8325 	};
8326 	const struct rte_flow_attr attr = {
8327 		.ingress = 1,
8328 	};
8329 	struct rte_flow_item items[] = {
8330 		{
8331 			.type = RTE_FLOW_ITEM_TYPE_ETH,
8332 			.spec = &lacp_spec,
8333 			.mask = &lacp_mask,
8334 		},
8335 		{
8336 			.type = RTE_FLOW_ITEM_TYPE_END,
8337 		},
8338 	};
8339 	struct rte_flow_action actions[] = {
8340 		{
8341 			.type = (enum rte_flow_action_type)
8342 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
8343 		},
8344 		{
8345 			.type = RTE_FLOW_ACTION_TYPE_END,
8346 		},
8347 	};
8348 	struct rte_flow_error error;
8349 	uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
8350 					&attr, items, actions,
8351 					false, &error);
8352 
8353 	if (!flow_idx)
8354 		return -rte_errno;
8355 	return 0;
8356 }
8357 
8358 /**
8359  * Destroy a flow.
8360  *
8361  * @see rte_flow_destroy()
8362  * @see rte_flow_ops
8363  */
8364 int
8365 mlx5_flow_destroy(struct rte_eth_dev *dev,
8366 		  struct rte_flow *flow,
8367 		  struct rte_flow_error *error __rte_unused)
8368 {
8369 	struct mlx5_priv *priv = dev->data->dev_private;
8370 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
8371 	struct mlx5_dv_flow_info *flow_info;
8372 
8373 	if (priv->sh->config.dv_flow_en == 2)
8374 		return rte_flow_error_set(error, ENOTSUP,
8375 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8376 			  NULL,
8377 			  "Flow non-Q destruction not supported");
8378 	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
8379 				(uintptr_t)(void *)flow);
8380 	if (unlikely(mlx5_need_cache_flow(priv, NULL))) {
8381 		flow_info = LIST_FIRST(&mode_info->hot_upgrade);
8382 		while (flow_info) {
8383 			/* Romove the cache flow info. */
8384 			if (flow_info->flow_idx_low_prio == (uint32_t)(uintptr_t)flow) {
8385 				MLX5_ASSERT(!flow_info->flow_idx_high_prio);
8386 				LIST_REMOVE(flow_info, next);
8387 				mlx5_free(flow_info->items);
8388 				mlx5_free(flow_info->actions);
8389 				mlx5_free(flow_info);
8390 				break;
8391 			}
8392 			flow_info = LIST_NEXT(flow_info, next);
8393 		}
8394 	}
8395 	return 0;
8396 }
8397 
8398 /**
8399  * Destroy all flows.
8400  *
8401  * @see rte_flow_flush()
8402  * @see rte_flow_ops
8403  */
8404 int
8405 mlx5_flow_flush(struct rte_eth_dev *dev,
8406 		struct rte_flow_error *error __rte_unused)
8407 {
8408 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
8409 	return 0;
8410 }
8411 
8412 /**
8413  * Isolated mode.
8414  *
8415  * @see rte_flow_isolate()
8416  * @see rte_flow_ops
8417  */
8418 int
8419 mlx5_flow_isolate(struct rte_eth_dev *dev,
8420 		  int enable,
8421 		  struct rte_flow_error *error)
8422 {
8423 	struct mlx5_priv *priv = dev->data->dev_private;
8424 
8425 	if (dev->data->dev_started) {
8426 		rte_flow_error_set(error, EBUSY,
8427 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8428 				   NULL,
8429 				   "port must be stopped first");
8430 		return -rte_errno;
8431 	}
8432 	if (!enable && !priv->sh->config.repr_matching)
8433 		return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8434 					  "isolated mode cannot be disabled when "
8435 					  "representor matching is disabled");
8436 	priv->isolated = !!enable;
8437 	if (enable)
8438 		dev->dev_ops = &mlx5_dev_ops_isolate;
8439 	else
8440 		dev->dev_ops = &mlx5_dev_ops;
8441 
8442 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
8443 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
8444 
8445 	return 0;
8446 }
8447 
8448 /**
8449  * Query a flow.
8450  *
8451  * @see rte_flow_query()
8452  * @see rte_flow_ops
8453  */
8454 static int
8455 flow_drv_query(struct rte_eth_dev *dev,
8456 	       struct rte_flow *eflow,
8457 	       const struct rte_flow_action *actions,
8458 	       void *data,
8459 	       struct rte_flow_error *error)
8460 {
8461 	struct mlx5_priv *priv = dev->data->dev_private;
8462 	const struct mlx5_flow_driver_ops *fops;
8463 	struct rte_flow *flow = NULL;
8464 	enum mlx5_flow_drv_type ftype = MLX5_FLOW_TYPE_MIN;
8465 
8466 	if (priv->sh->config.dv_flow_en == 2) {
8467 #ifdef HAVE_MLX5_HWS_SUPPORT
8468 		flow = eflow;
8469 		ftype = MLX5_FLOW_TYPE_HW;
8470 #endif
8471 	} else {
8472 		flow = (struct rte_flow *)mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8473 				(uintptr_t)(void *)eflow);
8474 	}
8475 	if (!flow) {
8476 		return rte_flow_error_set(error, ENOENT,
8477 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8478 			  NULL,
8479 			  "invalid flow handle");
8480 	}
8481 	if (ftype == MLX5_FLOW_TYPE_MIN)
8482 		ftype = flow->drv_type;
8483 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
8484 	fops = flow_get_drv_ops(ftype);
8485 
8486 	return fops->query(dev, flow, actions, data, error);
8487 }
8488 
8489 /**
8490  * Query a flow.
8491  *
8492  * @see rte_flow_query()
8493  * @see rte_flow_ops
8494  */
8495 int
8496 mlx5_flow_query(struct rte_eth_dev *dev,
8497 		struct rte_flow *flow,
8498 		const struct rte_flow_action *actions,
8499 		void *data,
8500 		struct rte_flow_error *error)
8501 {
8502 	int ret;
8503 
8504 	ret = flow_drv_query(dev, flow, actions, data,
8505 			     error);
8506 	if (ret < 0)
8507 		return ret;
8508 	return 0;
8509 }
8510 
8511 /**
8512  * Get rte_flow callbacks.
8513  *
8514  * @param dev
8515  *   Pointer to Ethernet device structure.
8516  * @param ops
8517  *   Pointer to operation-specific structure.
8518  *
8519  * @return 0
8520  */
8521 int
8522 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
8523 		  const struct rte_flow_ops **ops)
8524 {
8525 	*ops = &mlx5_flow_ops;
8526 	return 0;
8527 }
8528 
8529 /**
8530  * Validate meter policy actions.
8531  * Dispatcher for action type specific validation.
8532  *
8533  * @param[in] dev
8534  *   Pointer to the Ethernet device structure.
8535  * @param[in] action
8536  *   The meter policy action object to validate.
8537  * @param[in] attr
8538  *   Attributes of flow to determine steering domain.
8539  * @param[out] is_rss
8540  *   Is RSS or not.
8541  * @param[out] domain_bitmap
8542  *   Domain bitmap.
8543  * @param[out] is_def_policy
8544  *   Is default policy or not.
8545  * @param[out] error
8546  *   Perform verbose error reporting if not NULL. Initialized in case of
8547  *   error only.
8548  *
8549  * @return
8550  *   0 on success, otherwise negative errno value.
8551  */
8552 int
8553 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
8554 			const struct rte_flow_action *actions[RTE_COLORS],
8555 			struct rte_flow_attr *attr,
8556 			bool *is_rss,
8557 			uint8_t *domain_bitmap,
8558 			uint8_t *policy_mode,
8559 			struct rte_mtr_error *error)
8560 {
8561 	const struct mlx5_flow_driver_ops *fops;
8562 
8563 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8564 	return fops->validate_mtr_acts(dev, actions, attr, is_rss,
8565 				       domain_bitmap, policy_mode, error);
8566 }
8567 
8568 /**
8569  * Destroy the meter table set.
8570  *
8571  * @param[in] dev
8572  *   Pointer to Ethernet device.
8573  * @param[in] mtr_policy
8574  *   Meter policy struct.
8575  */
8576 void
8577 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
8578 		      struct mlx5_flow_meter_policy *mtr_policy)
8579 {
8580 	const struct mlx5_flow_driver_ops *fops;
8581 
8582 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8583 	fops->destroy_mtr_acts(dev, mtr_policy);
8584 }
8585 
8586 /**
8587  * Create policy action, lock free,
8588  * (mutex should be acquired by caller).
8589  * Dispatcher for action type specific call.
8590  *
8591  * @param[in] dev
8592  *   Pointer to the Ethernet device structure.
8593  * @param[in] mtr_policy
8594  *   Meter policy struct.
8595  * @param[in] action
8596  *   Action specification used to create meter actions.
8597  * @param[in] attr
8598  *   Flow rule attributes.
8599  * @param[out] error
8600  *   Perform verbose error reporting if not NULL. Initialized in case of
8601  *   error only.
8602  *
8603  * @return
8604  *   0 on success, otherwise negative errno value.
8605  */
8606 int
8607 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
8608 		      struct mlx5_flow_meter_policy *mtr_policy,
8609 		      const struct rte_flow_action *actions[RTE_COLORS],
8610 		      struct rte_flow_attr *attr,
8611 		      struct rte_mtr_error *error)
8612 {
8613 	const struct mlx5_flow_driver_ops *fops;
8614 
8615 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8616 	return fops->create_mtr_acts(dev, mtr_policy, actions, attr, error);
8617 }
8618 
8619 /**
8620  * Create policy rules, lock free,
8621  * (mutex should be acquired by caller).
8622  * Dispatcher for action type specific call.
8623  *
8624  * @param[in] dev
8625  *   Pointer to the Ethernet device structure.
8626  * @param[in] mtr_policy
8627  *   Meter policy struct.
8628  *
8629  * @return
8630  *   0 on success, -1 otherwise.
8631  */
8632 int
8633 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
8634 			     struct mlx5_flow_meter_policy *mtr_policy)
8635 {
8636 	const struct mlx5_flow_driver_ops *fops;
8637 
8638 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8639 	return fops->create_policy_rules(dev, mtr_policy);
8640 }
8641 
8642 /**
8643  * Destroy policy rules, lock free,
8644  * (mutex should be acquired by caller).
8645  * Dispatcher for action type specific call.
8646  *
8647  * @param[in] dev
8648  *   Pointer to the Ethernet device structure.
8649  * @param[in] mtr_policy
8650  *   Meter policy struct.
8651  */
8652 void
8653 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
8654 			     struct mlx5_flow_meter_policy *mtr_policy)
8655 {
8656 	const struct mlx5_flow_driver_ops *fops;
8657 
8658 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8659 	fops->destroy_policy_rules(dev, mtr_policy);
8660 }
8661 
8662 /**
8663  * Destroy the default policy table set.
8664  *
8665  * @param[in] dev
8666  *   Pointer to Ethernet device.
8667  */
8668 void
8669 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
8670 {
8671 	const struct mlx5_flow_driver_ops *fops;
8672 
8673 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8674 	fops->destroy_def_policy(dev);
8675 }
8676 
8677 /**
8678  * Destroy the default policy table set.
8679  *
8680  * @param[in] dev
8681  *   Pointer to Ethernet device.
8682  *
8683  * @return
8684  *   0 on success, -1 otherwise.
8685  */
8686 int
8687 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
8688 {
8689 	const struct mlx5_flow_driver_ops *fops;
8690 
8691 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8692 	return fops->create_def_policy(dev);
8693 }
8694 
8695 /**
8696  * Create the needed meter and suffix tables.
8697  *
8698  * @param[in] dev
8699  *   Pointer to Ethernet device.
8700  *
8701  * @return
8702  *   0 on success, -1 otherwise.
8703  */
8704 int
8705 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
8706 			struct mlx5_flow_meter_info *fm,
8707 			uint32_t mtr_idx,
8708 			uint8_t domain_bitmap)
8709 {
8710 	const struct mlx5_flow_driver_ops *fops;
8711 
8712 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8713 	return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
8714 }
8715 
8716 /**
8717  * Destroy the meter table set.
8718  *
8719  * @param[in] dev
8720  *   Pointer to Ethernet device.
8721  * @param[in] tbl
8722  *   Pointer to the meter table set.
8723  */
8724 void
8725 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8726 			   struct mlx5_flow_meter_info *fm)
8727 {
8728 	const struct mlx5_flow_driver_ops *fops;
8729 
8730 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8731 	fops->destroy_mtr_tbls(dev, fm);
8732 }
8733 
8734 /**
8735  * Destroy the global meter drop table.
8736  *
8737  * @param[in] dev
8738  *   Pointer to Ethernet device.
8739  */
8740 void
8741 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8742 {
8743 	const struct mlx5_flow_driver_ops *fops;
8744 
8745 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8746 	fops->destroy_mtr_drop_tbls(dev);
8747 }
8748 
8749 /**
8750  * Destroy the sub policy table with RX queue.
8751  *
8752  * @param[in] dev
8753  *   Pointer to Ethernet device.
8754  * @param[in] mtr_policy
8755  *   Pointer to meter policy table.
8756  */
8757 void
8758 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8759 		struct mlx5_flow_meter_policy *mtr_policy)
8760 {
8761 	const struct mlx5_flow_driver_ops *fops;
8762 
8763 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8764 	fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8765 }
8766 
8767 /**
8768  * Allocate the needed aso flow meter id.
8769  *
8770  * @param[in] dev
8771  *   Pointer to Ethernet device.
8772  *
8773  * @return
8774  *   Index to aso flow meter on success, NULL otherwise.
8775  */
8776 uint32_t
8777 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8778 {
8779 	const struct mlx5_flow_driver_ops *fops;
8780 
8781 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8782 	return fops->create_meter(dev);
8783 }
8784 
8785 /**
8786  * Free the aso flow meter id.
8787  *
8788  * @param[in] dev
8789  *   Pointer to Ethernet device.
8790  * @param[in] mtr_idx
8791  *  Index to aso flow meter to be free.
8792  *
8793  * @return
8794  *   0 on success.
8795  */
8796 void
8797 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8798 {
8799 	const struct mlx5_flow_driver_ops *fops;
8800 
8801 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8802 	fops->free_meter(dev, mtr_idx);
8803 }
8804 
8805 /**
8806  * Allocate a counter.
8807  *
8808  * @param[in] dev
8809  *   Pointer to Ethernet device structure.
8810  *
8811  * @return
8812  *   Index to allocated counter  on success, 0 otherwise.
8813  */
8814 uint32_t
8815 mlx5_counter_alloc(struct rte_eth_dev *dev)
8816 {
8817 	struct rte_flow_attr attr = { .transfer = 0 };
8818 
8819 	return flow_get_drv_ops(flow_get_drv_type(dev, &attr))->counter_alloc
8820 		(dev);
8821 }
8822 
8823 /**
8824  * Free a counter.
8825  *
8826  * @param[in] dev
8827  *   Pointer to Ethernet device structure.
8828  * @param[in] cnt
8829  *   Index to counter to be free.
8830  */
8831 void
8832 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8833 {
8834 	struct rte_flow_attr attr = { .transfer = 0 };
8835 
8836 	flow_get_drv_ops(flow_get_drv_type(dev, &attr))->counter_free(dev, cnt);
8837 }
8838 
8839 /**
8840  * Query counter statistics.
8841  *
8842  * @param[in] dev
8843  *   Pointer to Ethernet device structure.
8844  * @param[in] cnt
8845  *   Index to counter to query.
8846  * @param[in] clear
8847  *   Set to clear counter statistics.
8848  * @param[out] pkts
8849  *   The counter hits packets number to save.
8850  * @param[out] bytes
8851  *   The counter hits bytes number to save.
8852  *
8853  * @return
8854  *   0 on success, a negative errno value otherwise.
8855  */
8856 int
8857 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8858 		   bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8859 {
8860 	struct rte_flow_attr attr = { .transfer = 0 };
8861 
8862 	return flow_get_drv_ops(flow_get_drv_type(dev, &attr))->counter_query
8863 		(dev, cnt, clear, pkts, bytes, action);
8864 }
8865 
8866 /**
8867  * Get information about HWS pre-configurable resources.
8868  *
8869  * @param[in] dev
8870  *   Pointer to the rte_eth_dev structure.
8871  * @param[out] port_info
8872  *   Pointer to port information.
8873  * @param[out] queue_info
8874  *   Pointer to queue information.
8875  * @param[out] error
8876  *   Pointer to error structure.
8877  *
8878  * @return
8879  *   0 on success, a negative errno value otherwise and rte_errno is set.
8880  */
8881 static int
8882 mlx5_flow_info_get(struct rte_eth_dev *dev,
8883 		   struct rte_flow_port_info *port_info,
8884 		   struct rte_flow_queue_info *queue_info,
8885 		   struct rte_flow_error *error)
8886 {
8887 	const struct mlx5_flow_driver_ops *fops;
8888 	struct rte_flow_attr attr = {0};
8889 
8890 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8891 		return rte_flow_error_set(error, ENOTSUP,
8892 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8893 				NULL,
8894 				"info get with incorrect steering mode");
8895 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8896 	return fops->info_get(dev, port_info, queue_info, error);
8897 }
8898 
8899 /**
8900  * Configure port HWS resources.
8901  *
8902  * @param[in] dev
8903  *   Pointer to the rte_eth_dev structure.
8904  * @param[in] port_attr
8905  *   Port configuration attributes.
8906  * @param[in] nb_queue
8907  *   Number of queue.
8908  * @param[in] queue_attr
8909  *   Array that holds attributes for each flow queue.
8910  * @param[out] error
8911  *   Pointer to error structure.
8912  *
8913  * @return
8914  *   0 on success, a negative errno value otherwise and rte_errno is set.
8915  */
8916 static int
8917 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8918 			 const struct rte_flow_port_attr *port_attr,
8919 			 uint16_t nb_queue,
8920 			 const struct rte_flow_queue_attr *queue_attr[],
8921 			 struct rte_flow_error *error)
8922 {
8923 	const struct mlx5_flow_driver_ops *fops;
8924 	struct rte_flow_attr attr = {0};
8925 
8926 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8927 		return rte_flow_error_set(error, ENOTSUP,
8928 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8929 				NULL,
8930 				"port configure with incorrect steering mode");
8931 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8932 	return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
8933 }
8934 
8935 /**
8936  * Validate item template.
8937  *
8938  * @param[in] dev
8939  *   Pointer to the rte_eth_dev structure.
8940  * @param[in] attr
8941  *   Pointer to the item template attributes.
8942  * @param[in] items
8943  *   The template item pattern.
8944  * @param[out] error
8945  *   Pointer to error structure.
8946  *
8947  * @return
8948  *   0 on success, a negative errno value otherwise and rte_errno is set.
8949  */
8950 int
8951 mlx5_flow_pattern_validate(struct rte_eth_dev *dev,
8952 		const struct rte_flow_pattern_template_attr *attr,
8953 		const struct rte_flow_item items[],
8954 		struct rte_flow_error *error)
8955 {
8956 	const struct mlx5_flow_driver_ops *fops;
8957 	struct rte_flow_attr fattr = {0};
8958 
8959 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8960 		rte_flow_error_set(error, ENOTSUP,
8961 			RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8962 			"pattern validate with incorrect steering mode");
8963 		return -ENOTSUP;
8964 	}
8965 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8966 	return fops->pattern_validate(dev, attr, items, error);
8967 }
8968 
8969 /**
8970  * Create flow item template.
8971  *
8972  * @param[in] dev
8973  *   Pointer to the rte_eth_dev structure.
8974  * @param[in] attr
8975  *   Pointer to the item template attributes.
8976  * @param[in] items
8977  *   The template item pattern.
8978  * @param[out] error
8979  *   Pointer to error structure.
8980  *
8981  * @return
8982  *   0 on success, a negative errno value otherwise and rte_errno is set.
8983  */
8984 static struct rte_flow_pattern_template *
8985 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
8986 		const struct rte_flow_pattern_template_attr *attr,
8987 		const struct rte_flow_item items[],
8988 		struct rte_flow_error *error)
8989 {
8990 	const struct mlx5_flow_driver_ops *fops;
8991 	struct rte_flow_attr fattr = {0};
8992 
8993 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8994 		rte_flow_error_set(error, ENOTSUP,
8995 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8996 				NULL,
8997 				"pattern create with incorrect steering mode");
8998 		return NULL;
8999 	}
9000 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9001 	return fops->pattern_template_create(dev, attr, items, error);
9002 }
9003 
9004 /**
9005  * Destroy flow item template.
9006  *
9007  * @param[in] dev
9008  *   Pointer to the rte_eth_dev structure.
9009  * @param[in] template
9010  *   Pointer to the item template to be destroyed.
9011  * @param[out] error
9012  *   Pointer to error structure.
9013  *
9014  * @return
9015  *   0 on success, a negative errno value otherwise and rte_errno is set.
9016  */
9017 static int
9018 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
9019 				   struct rte_flow_pattern_template *template,
9020 				   struct rte_flow_error *error)
9021 {
9022 	const struct mlx5_flow_driver_ops *fops;
9023 	struct rte_flow_attr attr = {0};
9024 
9025 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9026 		return rte_flow_error_set(error, ENOTSUP,
9027 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9028 				NULL,
9029 				"pattern destroy with incorrect steering mode");
9030 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9031 	return fops->pattern_template_destroy(dev, template, error);
9032 }
9033 
9034 /**
9035  * Validate flow actions template.
9036  *
9037  * @param[in] dev
9038  *   Pointer to the rte_eth_dev structure.
9039  * @param[in] attr
9040  *   Pointer to the action template attributes.
9041  * @param[in] actions
9042  *   Associated actions (list terminated by the END action).
9043  * @param[in] masks
9044  *   List of actions that marks which of the action's member is constant.
9045  * @param[out] error
9046  *   Pointer to error structure.
9047  *
9048  * @return
9049  *   0 on success, a negative errno value otherwise and rte_errno is set.
9050  */
9051 int
9052 mlx5_flow_actions_validate(struct rte_eth_dev *dev,
9053 			const struct rte_flow_actions_template_attr *attr,
9054 			const struct rte_flow_action actions[],
9055 			const struct rte_flow_action masks[],
9056 			struct rte_flow_error *error)
9057 {
9058 	const struct mlx5_flow_driver_ops *fops;
9059 	struct rte_flow_attr fattr = {0};
9060 
9061 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9062 		rte_flow_error_set(error, ENOTSUP,
9063 			RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9064 			"actions validate with incorrect steering mode");
9065 		return -ENOTSUP;
9066 	}
9067 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9068 	return fops->actions_validate(dev, attr, actions, masks, error);
9069 }
9070 
9071 /**
9072  * Create flow item template.
9073  *
9074  * @param[in] dev
9075  *   Pointer to the rte_eth_dev structure.
9076  * @param[in] attr
9077  *   Pointer to the action template attributes.
9078  * @param[in] actions
9079  *   Associated actions (list terminated by the END action).
9080  * @param[in] masks
9081  *   List of actions that marks which of the action's member is constant.
9082  * @param[out] error
9083  *   Pointer to error structure.
9084  *
9085  * @return
9086  *   0 on success, a negative errno value otherwise and rte_errno is set.
9087  */
9088 static struct rte_flow_actions_template *
9089 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
9090 			const struct rte_flow_actions_template_attr *attr,
9091 			const struct rte_flow_action actions[],
9092 			const struct rte_flow_action masks[],
9093 			struct rte_flow_error *error)
9094 {
9095 	const struct mlx5_flow_driver_ops *fops;
9096 	struct rte_flow_attr fattr = {0};
9097 
9098 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9099 		rte_flow_error_set(error, ENOTSUP,
9100 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9101 				NULL,
9102 				"action create with incorrect steering mode");
9103 		return NULL;
9104 	}
9105 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9106 	return fops->actions_template_create(dev, attr, actions, masks, error);
9107 }
9108 
9109 /**
9110  * Destroy flow action template.
9111  *
9112  * @param[in] dev
9113  *   Pointer to the rte_eth_dev structure.
9114  * @param[in] template
9115  *   Pointer to the action template to be destroyed.
9116  * @param[out] error
9117  *   Pointer to error structure.
9118  *
9119  * @return
9120  *   0 on success, a negative errno value otherwise and rte_errno is set.
9121  */
9122 static int
9123 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
9124 				   struct rte_flow_actions_template *template,
9125 				   struct rte_flow_error *error)
9126 {
9127 	const struct mlx5_flow_driver_ops *fops;
9128 	struct rte_flow_attr attr = {0};
9129 
9130 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9131 		return rte_flow_error_set(error, ENOTSUP,
9132 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9133 				NULL,
9134 				"action destroy with incorrect steering mode");
9135 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9136 	return fops->actions_template_destroy(dev, template, error);
9137 }
9138 
9139 /**
9140  * Create flow table.
9141  *
9142  * @param[in] dev
9143  *   Pointer to the rte_eth_dev structure.
9144  * @param[in] attr
9145  *   Pointer to the table attributes.
9146  * @param[in] item_templates
9147  *   Item template array to be binded to the table.
9148  * @param[in] nb_item_templates
9149  *   Number of item template.
9150  * @param[in] action_templates
9151  *   Action template array to be binded to the table.
9152  * @param[in] nb_action_templates
9153  *   Number of action template.
9154  * @param[out] error
9155  *   Pointer to error structure.
9156  *
9157  * @return
9158  *    Table on success, NULL otherwise and rte_errno is set.
9159  */
9160 static struct rte_flow_template_table *
9161 mlx5_flow_table_create(struct rte_eth_dev *dev,
9162 		       const struct rte_flow_template_table_attr *attr,
9163 		       struct rte_flow_pattern_template *item_templates[],
9164 		       uint8_t nb_item_templates,
9165 		       struct rte_flow_actions_template *action_templates[],
9166 		       uint8_t nb_action_templates,
9167 		       struct rte_flow_error *error)
9168 {
9169 	const struct mlx5_flow_driver_ops *fops;
9170 	struct rte_flow_attr fattr = {0};
9171 
9172 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9173 		rte_flow_error_set(error, ENOTSUP,
9174 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9175 				NULL,
9176 				"table create with incorrect steering mode");
9177 		return NULL;
9178 	}
9179 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9180 	return fops->template_table_create(dev,
9181 					   attr,
9182 					   item_templates,
9183 					   nb_item_templates,
9184 					   action_templates,
9185 					   nb_action_templates,
9186 					   error);
9187 }
9188 
9189 /**
9190  * PMD destroy flow table.
9191  *
9192  * @param[in] dev
9193  *   Pointer to the rte_eth_dev structure.
9194  * @param[in] table
9195  *   Pointer to the table to be destroyed.
9196  * @param[out] error
9197  *   Pointer to error structure.
9198  *
9199  * @return
9200  *   0 on success, a negative errno value otherwise and rte_errno is set.
9201  */
9202 static int
9203 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
9204 			struct rte_flow_template_table *table,
9205 			struct rte_flow_error *error)
9206 {
9207 	const struct mlx5_flow_driver_ops *fops;
9208 	struct rte_flow_attr attr = {0};
9209 
9210 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9211 		return rte_flow_error_set(error, ENOTSUP,
9212 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9213 				NULL,
9214 				"table destroy with incorrect steering mode");
9215 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9216 	return fops->template_table_destroy(dev, table, error);
9217 }
9218 
9219 /**
9220  * Enqueue flow creation.
9221  *
9222  * @param[in] dev
9223  *   Pointer to the rte_eth_dev structure.
9224  * @param[in] queue_id
9225  *   The queue to create the flow.
9226  * @param[in] attr
9227  *   Pointer to the flow operation attributes.
9228  * @param[in] items
9229  *   Items with flow spec value.
9230  * @param[in] pattern_template_index
9231  *   The item pattern flow follows from the table.
9232  * @param[in] actions
9233  *   Action with flow spec value.
9234  * @param[in] action_template_index
9235  *   The action pattern flow follows from the table.
9236  * @param[in] user_data
9237  *   Pointer to the user_data.
9238  * @param[out] error
9239  *   Pointer to error structure.
9240  *
9241  * @return
9242  *    Flow pointer on success, NULL otherwise and rte_errno is set.
9243  */
9244 static struct rte_flow *
9245 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
9246 			    uint32_t queue_id,
9247 			    const struct rte_flow_op_attr *attr,
9248 			    struct rte_flow_template_table *table,
9249 			    const struct rte_flow_item items[],
9250 			    uint8_t pattern_template_index,
9251 			    const struct rte_flow_action actions[],
9252 			    uint8_t action_template_index,
9253 			    void *user_data,
9254 			    struct rte_flow_error *error)
9255 {
9256 	const struct mlx5_flow_driver_ops *fops;
9257 	struct rte_flow_attr fattr = {0};
9258 
9259 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9260 		rte_flow_error_set(error, ENOTSUP,
9261 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9262 				NULL,
9263 				"flow_q create with incorrect steering mode");
9264 		return NULL;
9265 	}
9266 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9267 	return fops->async_flow_create(dev, queue_id, attr, table,
9268 				       items, pattern_template_index,
9269 				       actions, action_template_index,
9270 				       user_data, error);
9271 }
9272 
9273 /**
9274  * Enqueue flow creation by index.
9275  *
9276  * @param[in] dev
9277  *   Pointer to the rte_eth_dev structure.
9278  * @param[in] queue_id
9279  *   The queue to create the flow.
9280  * @param[in] attr
9281  *   Pointer to the flow operation attributes.
9282  * @param[in] rule_index
9283  *   The item pattern flow follows from the table.
9284  * @param[in] actions
9285  *   Action with flow spec value.
9286  * @param[in] action_template_index
9287  *   The action pattern flow follows from the table.
9288  * @param[in] user_data
9289  *   Pointer to the user_data.
9290  * @param[out] error
9291  *   Pointer to error structure.
9292  *
9293  * @return
9294  *    Flow pointer on success, NULL otherwise and rte_errno is set.
9295  */
9296 static struct rte_flow *
9297 mlx5_flow_async_flow_create_by_index(struct rte_eth_dev *dev,
9298 			    uint32_t queue_id,
9299 			    const struct rte_flow_op_attr *attr,
9300 			    struct rte_flow_template_table *table,
9301 			    uint32_t rule_index,
9302 			    const struct rte_flow_action actions[],
9303 			    uint8_t action_template_index,
9304 			    void *user_data,
9305 			    struct rte_flow_error *error)
9306 {
9307 	const struct mlx5_flow_driver_ops *fops;
9308 	struct rte_flow_attr fattr = {0};
9309 
9310 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9311 		rte_flow_error_set(error, ENOTSUP,
9312 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9313 				NULL,
9314 				"flow_q create with incorrect steering mode");
9315 		return NULL;
9316 	}
9317 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9318 	return fops->async_flow_create_by_index(dev, queue_id, attr, table,
9319 				       rule_index, actions, action_template_index,
9320 				       user_data, error);
9321 }
9322 
9323 /**
9324  * Enqueue flow destruction.
9325  *
9326  * @param[in] dev
9327  *   Pointer to the rte_eth_dev structure.
9328  * @param[in] queue
9329  *   The queue to destroy the flow.
9330  * @param[in] attr
9331  *   Pointer to the flow operation attributes.
9332  * @param[in] flow
9333  *   Pointer to the flow to be destroyed.
9334  * @param[in] user_data
9335  *   Pointer to the user_data.
9336  * @param[out] error
9337  *   Pointer to error structure.
9338  *
9339  * @return
9340  *    0 on success, negative value otherwise and rte_errno is set.
9341  */
9342 static int
9343 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
9344 			     uint32_t queue,
9345 			     const struct rte_flow_op_attr *attr,
9346 			     struct rte_flow *flow,
9347 			     void *user_data,
9348 			     struct rte_flow_error *error)
9349 {
9350 	const struct mlx5_flow_driver_ops *fops;
9351 	struct rte_flow_attr fattr = {0};
9352 
9353 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW)
9354 		return rte_flow_error_set(error, ENOTSUP,
9355 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9356 				NULL,
9357 				"flow_q destroy with incorrect steering mode");
9358 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9359 	return fops->async_flow_destroy(dev, queue, attr, flow,
9360 					user_data, error);
9361 }
9362 
9363 /**
9364  * Pull the enqueued flows.
9365  *
9366  * @param[in] dev
9367  *   Pointer to the rte_eth_dev structure.
9368  * @param[in] queue
9369  *   The queue to pull the result.
9370  * @param[in/out] res
9371  *   Array to save the results.
9372  * @param[in] n_res
9373  *   Available result with the array.
9374  * @param[out] error
9375  *   Pointer to error structure.
9376  *
9377  * @return
9378  *    Result number on success, negative value otherwise and rte_errno is set.
9379  */
9380 static int
9381 mlx5_flow_pull(struct rte_eth_dev *dev,
9382 	       uint32_t queue,
9383 	       struct rte_flow_op_result res[],
9384 	       uint16_t n_res,
9385 	       struct rte_flow_error *error)
9386 {
9387 	const struct mlx5_flow_driver_ops *fops;
9388 	struct rte_flow_attr attr = {0};
9389 
9390 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9391 		return rte_flow_error_set(error, ENOTSUP,
9392 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9393 				NULL,
9394 				"flow_q pull with incorrect steering mode");
9395 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9396 	return fops->pull(dev, queue, res, n_res, error);
9397 }
9398 
9399 /**
9400  * Push the enqueued flows.
9401  *
9402  * @param[in] dev
9403  *   Pointer to the rte_eth_dev structure.
9404  * @param[in] queue
9405  *   The queue to push the flows.
9406  * @param[out] error
9407  *   Pointer to error structure.
9408  *
9409  * @return
9410  *    0 on success, negative value otherwise and rte_errno is set.
9411  */
9412 static int
9413 mlx5_flow_push(struct rte_eth_dev *dev,
9414 	       uint32_t queue,
9415 	       struct rte_flow_error *error)
9416 {
9417 	const struct mlx5_flow_driver_ops *fops;
9418 	struct rte_flow_attr attr = {0};
9419 
9420 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9421 		return rte_flow_error_set(error, ENOTSUP,
9422 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9423 				NULL,
9424 				"flow_q push with incorrect steering mode");
9425 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9426 	return fops->push(dev, queue, error);
9427 }
9428 
9429 /**
9430  * Create shared action.
9431  *
9432  * @param[in] dev
9433  *   Pointer to the rte_eth_dev structure.
9434  * @param[in] queue
9435  *   Which queue to be used..
9436  * @param[in] attr
9437  *   Operation attribute.
9438  * @param[in] conf
9439  *   Indirect action configuration.
9440  * @param[in] action
9441  *   rte_flow action detail.
9442  * @param[in] user_data
9443  *   Pointer to the user_data.
9444  * @param[out] error
9445  *   Pointer to error structure.
9446  *
9447  * @return
9448  *   Action handle on success, NULL otherwise and rte_errno is set.
9449  */
9450 static struct rte_flow_action_handle *
9451 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
9452 				 const struct rte_flow_op_attr *attr,
9453 				 const struct rte_flow_indir_action_conf *conf,
9454 				 const struct rte_flow_action *action,
9455 				 void *user_data,
9456 				 struct rte_flow_error *error)
9457 {
9458 	const struct mlx5_flow_driver_ops *fops =
9459 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9460 
9461 	return fops->async_action_create(dev, queue, attr, conf, action,
9462 					 user_data, error);
9463 }
9464 
9465 /**
9466  * Update shared action.
9467  *
9468  * @param[in] dev
9469  *   Pointer to the rte_eth_dev structure.
9470  * @param[in] queue
9471  *   Which queue to be used..
9472  * @param[in] attr
9473  *   Operation attribute.
9474  * @param[in] handle
9475  *   Action handle to be updated.
9476  * @param[in] update
9477  *   Update value.
9478  * @param[in] user_data
9479  *   Pointer to the user_data.
9480  * @param[out] error
9481  *   Pointer to error structure.
9482  *
9483  * @return
9484  *   0 on success, negative value otherwise and rte_errno is set.
9485  */
9486 static int
9487 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
9488 				     const struct rte_flow_op_attr *attr,
9489 				     struct rte_flow_action_handle *handle,
9490 				     const void *update,
9491 				     void *user_data,
9492 				     struct rte_flow_error *error)
9493 {
9494 	const struct mlx5_flow_driver_ops *fops =
9495 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9496 
9497 	return fops->async_action_update(dev, queue, attr, handle,
9498 					 update, user_data, error);
9499 }
9500 
9501 /**
9502  * Query shared action.
9503  *
9504  * @param[in] dev
9505  *   Pointer to the rte_eth_dev structure.
9506  * @param[in] queue
9507  *   Which queue to be used..
9508  * @param[in] attr
9509  *   Operation attribute.
9510  * @param[in] handle
9511  *   Action handle to be updated.
9512  * @param[in] data
9513  *   Pointer query result data.
9514  * @param[in] user_data
9515  *   Pointer to the user_data.
9516  * @param[out] error
9517  *   Pointer to error structure.
9518  *
9519  * @return
9520  *   0 on success, negative value otherwise and rte_errno is set.
9521  */
9522 static int
9523 mlx5_flow_async_action_handle_query(struct rte_eth_dev *dev, uint32_t queue,
9524 				    const struct rte_flow_op_attr *attr,
9525 				    const struct rte_flow_action_handle *handle,
9526 				    void *data,
9527 				    void *user_data,
9528 				    struct rte_flow_error *error)
9529 {
9530 	const struct mlx5_flow_driver_ops *fops =
9531 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9532 
9533 	return fops->async_action_query(dev, queue, attr, handle,
9534 					data, user_data, error);
9535 }
9536 
9537 /**
9538  * Destroy shared action.
9539  *
9540  * @param[in] dev
9541  *   Pointer to the rte_eth_dev structure.
9542  * @param[in] queue
9543  *   Which queue to be used..
9544  * @param[in] attr
9545  *   Operation attribute.
9546  * @param[in] handle
9547  *   Action handle to be destroyed.
9548  * @param[in] user_data
9549  *   Pointer to the user_data.
9550  * @param[out] error
9551  *   Pointer to error structure.
9552  *
9553  * @return
9554  *   0 on success, negative value otherwise and rte_errno is set.
9555  */
9556 static int
9557 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
9558 				      const struct rte_flow_op_attr *attr,
9559 				      struct rte_flow_action_handle *handle,
9560 				      void *user_data,
9561 				      struct rte_flow_error *error)
9562 {
9563 	const struct mlx5_flow_driver_ops *fops =
9564 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9565 
9566 	return fops->async_action_destroy(dev, queue, attr, handle,
9567 					  user_data, error);
9568 }
9569 
9570 /**
9571  * Allocate a new memory for the counter values wrapped by all the needed
9572  * management.
9573  *
9574  * @param[in] sh
9575  *   Pointer to mlx5_dev_ctx_shared object.
9576  *
9577  * @return
9578  *   0 on success, a negative errno value otherwise.
9579  */
9580 static int
9581 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
9582 {
9583 	struct mlx5_counter_stats_mem_mng *mem_mng;
9584 	volatile struct flow_counter_stats *raw_data;
9585 	int raws_n = MLX5_CNT_MR_ALLOC_BULK + MLX5_MAX_PENDING_QUERIES;
9586 	int size = (sizeof(struct flow_counter_stats) *
9587 			MLX5_COUNTERS_PER_POOL +
9588 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
9589 			sizeof(struct mlx5_counter_stats_mem_mng);
9590 	size_t pgsize = rte_mem_page_size();
9591 	uint8_t *mem;
9592 	int ret;
9593 	int i;
9594 
9595 	if (pgsize == (size_t)-1) {
9596 		DRV_LOG(ERR, "Failed to get mem page size");
9597 		rte_errno = ENOMEM;
9598 		return -ENOMEM;
9599 	}
9600 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
9601 	if (!mem) {
9602 		rte_errno = ENOMEM;
9603 		return -ENOMEM;
9604 	}
9605 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
9606 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
9607 	ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
9608 					  sh->cdev->pdn, mem, size,
9609 					  &mem_mng->wm);
9610 	if (ret) {
9611 		rte_errno = errno;
9612 		mlx5_free(mem);
9613 		return -rte_errno;
9614 	}
9615 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
9616 	raw_data = (volatile struct flow_counter_stats *)mem;
9617 	for (i = 0; i < raws_n; ++i) {
9618 		mem_mng->raws[i].mem_mng = mem_mng;
9619 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
9620 	}
9621 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
9622 		LIST_INSERT_HEAD(&sh->sws_cmng.free_stat_raws,
9623 				 mem_mng->raws + MLX5_CNT_MR_ALLOC_BULK + i,
9624 				 next);
9625 	LIST_INSERT_HEAD(&sh->sws_cmng.mem_mngs, mem_mng, next);
9626 	sh->sws_cmng.mem_mng = mem_mng;
9627 	return 0;
9628 }
9629 
9630 /**
9631  * Set the statistic memory to the new counter pool.
9632  *
9633  * @param[in] sh
9634  *   Pointer to mlx5_dev_ctx_shared object.
9635  * @param[in] pool
9636  *   Pointer to the pool to set the statistic memory.
9637  *
9638  * @return
9639  *   0 on success, a negative errno value otherwise.
9640  */
9641 static int
9642 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
9643 			       struct mlx5_flow_counter_pool *pool)
9644 {
9645 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
9646 	/* Resize statistic memory once used out. */
9647 	if (!(pool->index % MLX5_CNT_MR_ALLOC_BULK) &&
9648 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
9649 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
9650 		return -1;
9651 	}
9652 	rte_spinlock_lock(&pool->sl);
9653 	pool->raw = cmng->mem_mng->raws + pool->index % MLX5_CNT_MR_ALLOC_BULK;
9654 	rte_spinlock_unlock(&pool->sl);
9655 	pool->raw_hw = NULL;
9656 	return 0;
9657 }
9658 
9659 #define MLX5_POOL_QUERY_FREQ_US 1000000
9660 
9661 /**
9662  * Set the periodic procedure for triggering asynchronous batch queries for all
9663  * the counter pools.
9664  *
9665  * @param[in] sh
9666  *   Pointer to mlx5_dev_ctx_shared object.
9667  */
9668 void
9669 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
9670 {
9671 	uint32_t pools_n, us;
9672 
9673 	pools_n = __atomic_load_n(&sh->sws_cmng.n_valid, __ATOMIC_RELAXED);
9674 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
9675 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
9676 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
9677 		sh->sws_cmng.query_thread_on = 0;
9678 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
9679 	} else {
9680 		sh->sws_cmng.query_thread_on = 1;
9681 	}
9682 }
9683 
9684 /**
9685  * The periodic procedure for triggering asynchronous batch queries for all the
9686  * counter pools. This function is probably called by the host thread.
9687  *
9688  * @param[in] arg
9689  *   The parameter for the alarm process.
9690  */
9691 void
9692 mlx5_flow_query_alarm(void *arg)
9693 {
9694 	struct mlx5_dev_ctx_shared *sh = arg;
9695 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
9696 	uint16_t pool_index = cmng->pool_index;
9697 	struct mlx5_flow_counter_pool *pool;
9698 	uint16_t n_valid;
9699 	int ret;
9700 
9701 	if (cmng->pending_queries >= MLX5_MAX_PENDING_QUERIES)
9702 		goto set_alarm;
9703 	rte_spinlock_lock(&cmng->pool_update_sl);
9704 	pool = cmng->pools[pool_index];
9705 	n_valid = cmng->n_valid;
9706 	rte_spinlock_unlock(&cmng->pool_update_sl);
9707 	/* Set the statistic memory to the new created pool. */
9708 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
9709 		goto set_alarm;
9710 	if (pool->raw_hw)
9711 		/* There is a pool query in progress. */
9712 		goto set_alarm;
9713 	pool->raw_hw = LIST_FIRST(&cmng->free_stat_raws);
9714 	if (!pool->raw_hw)
9715 		/* No free counter statistics raw memory. */
9716 		goto set_alarm;
9717 	/*
9718 	 * Identify the counters released between query trigger and query
9719 	 * handle more efficiently. The counter released in this gap period
9720 	 * should wait for a new round of query as the new arrived packets
9721 	 * will not be taken into account.
9722 	 */
9723 	pool->query_gen++;
9724 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
9725 					       MLX5_COUNTERS_PER_POOL,
9726 					       NULL, NULL,
9727 					       pool->raw_hw->mem_mng->wm.lkey,
9728 					       (void *)(uintptr_t)
9729 					       pool->raw_hw->data,
9730 					       sh->devx_comp,
9731 					       (uint64_t)(uintptr_t)pool);
9732 	if (ret) {
9733 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
9734 			" %d", pool->min_dcs->id);
9735 		pool->raw_hw = NULL;
9736 		goto set_alarm;
9737 	}
9738 	LIST_REMOVE(pool->raw_hw, next);
9739 	cmng->pending_queries++;
9740 	pool_index++;
9741 	if (pool_index >= n_valid)
9742 		pool_index = 0;
9743 set_alarm:
9744 	cmng->pool_index = pool_index;
9745 	mlx5_set_query_alarm(sh);
9746 }
9747 
9748 /**
9749  * Check and callback event for new aged flow in the counter pool
9750  *
9751  * @param[in] sh
9752  *   Pointer to mlx5_dev_ctx_shared object.
9753  * @param[in] pool
9754  *   Pointer to Current counter pool.
9755  */
9756 static void
9757 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
9758 		   struct mlx5_flow_counter_pool *pool)
9759 {
9760 	struct mlx5_priv *priv;
9761 	struct mlx5_flow_counter *cnt;
9762 	struct mlx5_age_info *age_info;
9763 	struct mlx5_age_param *age_param;
9764 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
9765 	struct mlx5_counter_stats_raw *prev = pool->raw;
9766 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
9767 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
9768 	uint16_t expected = AGE_CANDIDATE;
9769 	uint32_t i;
9770 
9771 	pool->time_of_last_age_check = curr_time;
9772 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
9773 		cnt = MLX5_POOL_GET_CNT(pool, i);
9774 		age_param = MLX5_CNT_TO_AGE(cnt);
9775 		if (__atomic_load_n(&age_param->state,
9776 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
9777 			continue;
9778 		if (cur->data[i].hits != prev->data[i].hits) {
9779 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
9780 					 __ATOMIC_RELAXED);
9781 			continue;
9782 		}
9783 		if (__atomic_fetch_add(&age_param->sec_since_last_hit,
9784 				       time_delta,
9785 				       __ATOMIC_RELAXED) + time_delta <= age_param->timeout)
9786 			continue;
9787 		/**
9788 		 * Hold the lock first, or if between the
9789 		 * state AGE_TMOUT and tailq operation the
9790 		 * release happened, the release procedure
9791 		 * may delete a non-existent tailq node.
9792 		 */
9793 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
9794 		age_info = GET_PORT_AGE_INFO(priv);
9795 		rte_spinlock_lock(&age_info->aged_sl);
9796 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
9797 						AGE_TMOUT, false,
9798 						__ATOMIC_RELAXED,
9799 						__ATOMIC_RELAXED)) {
9800 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
9801 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
9802 		}
9803 		rte_spinlock_unlock(&age_info->aged_sl);
9804 	}
9805 	mlx5_age_event_prepare(sh);
9806 }
9807 
9808 /**
9809  * Handler for the HW respond about ready values from an asynchronous batch
9810  * query. This function is probably called by the host thread.
9811  *
9812  * @param[in] sh
9813  *   The pointer to the shared device context.
9814  * @param[in] async_id
9815  *   The Devx async ID.
9816  * @param[in] status
9817  *   The status of the completion.
9818  */
9819 void
9820 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
9821 				  uint64_t async_id, int status)
9822 {
9823 	struct mlx5_flow_counter_pool *pool =
9824 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
9825 	struct mlx5_counter_stats_raw *raw_to_free;
9826 	uint8_t query_gen = pool->query_gen ^ 1;
9827 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
9828 	enum mlx5_counter_type cnt_type =
9829 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
9830 				MLX5_COUNTER_TYPE_ORIGIN;
9831 
9832 	if (unlikely(status)) {
9833 		raw_to_free = pool->raw_hw;
9834 	} else {
9835 		raw_to_free = pool->raw;
9836 		if (pool->is_aged)
9837 			mlx5_flow_aging_check(sh, pool);
9838 		rte_spinlock_lock(&pool->sl);
9839 		pool->raw = pool->raw_hw;
9840 		rte_spinlock_unlock(&pool->sl);
9841 		/* Be sure the new raw counters data is updated in memory. */
9842 		rte_io_wmb();
9843 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
9844 			rte_spinlock_lock(&cmng->csl[cnt_type]);
9845 			TAILQ_CONCAT(&cmng->counters[cnt_type],
9846 				     &pool->counters[query_gen], next);
9847 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
9848 		}
9849 	}
9850 	LIST_INSERT_HEAD(&sh->sws_cmng.free_stat_raws, raw_to_free, next);
9851 	pool->raw_hw = NULL;
9852 	sh->sws_cmng.pending_queries--;
9853 }
9854 
9855 static int
9856 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
9857 		    const struct flow_grp_info *grp_info,
9858 		    struct rte_flow_error *error)
9859 {
9860 	if (grp_info->transfer && grp_info->external &&
9861 	    grp_info->fdb_def_rule) {
9862 		if (group == UINT32_MAX)
9863 			return rte_flow_error_set
9864 						(error, EINVAL,
9865 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9866 						 NULL,
9867 						 "group index not supported");
9868 		*table = group + 1;
9869 	} else {
9870 		*table = group;
9871 	}
9872 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
9873 	return 0;
9874 }
9875 
9876 /**
9877  * Translate the rte_flow group index to HW table value.
9878  *
9879  * If tunnel offload is disabled, all group ids converted to flow table
9880  * id using the standard method.
9881  * If tunnel offload is enabled, group id can be converted using the
9882  * standard or tunnel conversion method. Group conversion method
9883  * selection depends on flags in `grp_info` parameter:
9884  * - Internal (grp_info.external == 0) groups conversion uses the
9885  *   standard method.
9886  * - Group ids in JUMP action converted with the tunnel conversion.
9887  * - Group id in rule attribute conversion depends on a rule type and
9888  *   group id value:
9889  *   ** non zero group attributes converted with the tunnel method
9890  *   ** zero group attribute in non-tunnel rule is converted using the
9891  *      standard method - there's only one root table
9892  *   ** zero group attribute in steer tunnel rule is converted with the
9893  *      standard method - single root table
9894  *   ** zero group attribute in match tunnel rule is a special OvS
9895  *      case: that value is used for portability reasons. That group
9896  *      id is converted with the tunnel conversion method.
9897  *
9898  * @param[in] dev
9899  *   Port device
9900  * @param[in] tunnel
9901  *   PMD tunnel offload object
9902  * @param[in] group
9903  *   rte_flow group index value.
9904  * @param[out] table
9905  *   HW table value.
9906  * @param[in] grp_info
9907  *   flags used for conversion
9908  * @param[out] error
9909  *   Pointer to error structure.
9910  *
9911  * @return
9912  *   0 on success, a negative errno value otherwise and rte_errno is set.
9913  */
9914 int
9915 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
9916 			 const struct mlx5_flow_tunnel *tunnel,
9917 			 uint32_t group, uint32_t *table,
9918 			 const struct flow_grp_info *grp_info,
9919 			 struct rte_flow_error *error)
9920 {
9921 	int ret;
9922 	bool standard_translation;
9923 
9924 	if (!grp_info->skip_scale && grp_info->external &&
9925 	    group < MLX5_MAX_TABLES_EXTERNAL)
9926 		group *= MLX5_FLOW_TABLE_FACTOR;
9927 	if (is_tunnel_offload_active(dev)) {
9928 		standard_translation = !grp_info->external ||
9929 					grp_info->std_tbl_fix;
9930 	} else {
9931 		standard_translation = true;
9932 	}
9933 	DRV_LOG(DEBUG,
9934 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
9935 		dev->data->port_id, group, grp_info->transfer,
9936 		grp_info->external, grp_info->fdb_def_rule,
9937 		standard_translation ? "STANDARD" : "TUNNEL");
9938 	if (standard_translation)
9939 		ret = flow_group_to_table(dev->data->port_id, group, table,
9940 					  grp_info, error);
9941 	else
9942 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
9943 						      table, error);
9944 
9945 	return ret;
9946 }
9947 
9948 /**
9949  * Discover availability of metadata reg_c's.
9950  *
9951  * Iteratively use test flows to check availability.
9952  *
9953  * @param[in] dev
9954  *   Pointer to the Ethernet device structure.
9955  *
9956  * @return
9957  *   0 on success, a negative errno value otherwise and rte_errno is set.
9958  */
9959 int
9960 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
9961 {
9962 	struct mlx5_priv *priv = dev->data->dev_private;
9963 	enum modify_reg idx;
9964 	int n = 0;
9965 
9966 	/* reg_c[0] and reg_c[1] are reserved. */
9967 	priv->sh->flow_mreg_c[n++] = REG_C_0;
9968 	priv->sh->flow_mreg_c[n++] = REG_C_1;
9969 	/* Discover availability of other reg_c's. */
9970 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
9971 		struct rte_flow_attr attr = {
9972 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
9973 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
9974 			.ingress = 1,
9975 		};
9976 		struct rte_flow_item items[] = {
9977 			[0] = {
9978 				.type = RTE_FLOW_ITEM_TYPE_END,
9979 			},
9980 		};
9981 		struct rte_flow_action actions[] = {
9982 			[0] = {
9983 				.type = (enum rte_flow_action_type)
9984 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
9985 				.conf = &(struct mlx5_flow_action_copy_mreg){
9986 					.src = REG_C_1,
9987 					.dst = idx,
9988 				},
9989 			},
9990 			[1] = {
9991 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
9992 				.conf = &(struct rte_flow_action_jump){
9993 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
9994 				},
9995 			},
9996 			[2] = {
9997 				.type = RTE_FLOW_ACTION_TYPE_END,
9998 			},
9999 		};
10000 		uint32_t flow_idx;
10001 		struct rte_flow *flow;
10002 		struct rte_flow_error error;
10003 
10004 		if (!priv->sh->config.dv_flow_en)
10005 			break;
10006 		/* Create internal flow, validation skips copy action. */
10007 		flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
10008 					items, actions, false, &error);
10009 		flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
10010 				      flow_idx);
10011 		if (!flow)
10012 			continue;
10013 		priv->sh->flow_mreg_c[n++] = idx;
10014 		flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
10015 	}
10016 	for (; n < MLX5_MREG_C_NUM; ++n)
10017 		priv->sh->flow_mreg_c[n] = REG_NON;
10018 	priv->sh->metadata_regc_check_flag = 1;
10019 	return 0;
10020 }
10021 
10022 int
10023 save_dump_file(const uint8_t *data, uint32_t size,
10024 	uint32_t type, uint64_t id, void *arg, FILE *file)
10025 {
10026 	char line[BUF_SIZE];
10027 	uint32_t out = 0;
10028 	uint32_t k;
10029 	uint32_t actions_num;
10030 	struct rte_flow_query_count *count;
10031 
10032 	memset(line, 0, BUF_SIZE);
10033 	switch (type) {
10034 	case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
10035 		actions_num = *(uint32_t *)(arg);
10036 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
10037 				type, id, actions_num);
10038 		break;
10039 	case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
10040 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
10041 				type, id);
10042 		break;
10043 	case DR_DUMP_REC_TYPE_PMD_COUNTER:
10044 		count = (struct rte_flow_query_count *)arg;
10045 		fprintf(file,
10046 			"%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
10047 			type, id, count->hits, count->bytes);
10048 		return 0;
10049 	default:
10050 		return -1;
10051 	}
10052 
10053 	for (k = 0; k < size; k++) {
10054 		/* Make sure we do not overrun the line buffer length. */
10055 		if (out >= BUF_SIZE - 4) {
10056 			line[out] = '\0';
10057 			break;
10058 		}
10059 		out += snprintf(line + out, BUF_SIZE - out, "%02x",
10060 				(data[k]) & 0xff);
10061 	}
10062 	fprintf(file, "%s\n", line);
10063 	return 0;
10064 }
10065 
10066 int
10067 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
10068 	struct rte_flow_query_count *count, struct rte_flow_error *error)
10069 {
10070 	struct rte_flow_action action[2];
10071 	enum mlx5_flow_drv_type ftype;
10072 	const struct mlx5_flow_driver_ops *fops;
10073 
10074 	if (!flow) {
10075 		return rte_flow_error_set(error, ENOENT,
10076 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
10077 				NULL,
10078 				"invalid flow handle");
10079 	}
10080 	action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
10081 	action[1].type = RTE_FLOW_ACTION_TYPE_END;
10082 	if (flow->counter) {
10083 		memset(count, 0, sizeof(struct rte_flow_query_count));
10084 		ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
10085 		MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
10086 						ftype < MLX5_FLOW_TYPE_MAX);
10087 		fops = flow_get_drv_ops(ftype);
10088 		return fops->query(dev, flow, action, count, error);
10089 	}
10090 	return -1;
10091 }
10092 
10093 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10094 /**
10095  * Dump flow ipool data to file
10096  *
10097  * @param[in] dev
10098  *   The pointer to Ethernet device.
10099  * @param[in] file
10100  *   A pointer to a file for output.
10101  * @param[out] error
10102  *   Perform verbose error reporting if not NULL. PMDs initialize this
10103  *   structure in case of error only.
10104  * @return
10105  *   0 on success, a negative value otherwise.
10106  */
10107 int
10108 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
10109 	struct rte_flow *flow, FILE *file,
10110 	struct rte_flow_error *error)
10111 {
10112 	struct mlx5_priv *priv = dev->data->dev_private;
10113 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
10114 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
10115 	uint32_t handle_idx;
10116 	struct mlx5_flow_handle *dh;
10117 	struct rte_flow_query_count count;
10118 	uint32_t actions_num;
10119 	const uint8_t *data;
10120 	size_t size;
10121 	uint64_t id;
10122 	uint32_t type;
10123 	void *action = NULL;
10124 
10125 	if (!flow) {
10126 		return rte_flow_error_set(error, ENOENT,
10127 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
10128 				NULL,
10129 				"invalid flow handle");
10130 	}
10131 	handle_idx = flow->dev_handles;
10132 	/* query counter */
10133 	if (flow->counter &&
10134 	(!mlx5_counter_query(dev, flow->counter, false,
10135 	&count.hits, &count.bytes, &action)) && action) {
10136 		id = (uint64_t)(uintptr_t)action;
10137 		type = DR_DUMP_REC_TYPE_PMD_COUNTER;
10138 		save_dump_file(NULL, 0, type,
10139 			id, (void *)&count, file);
10140 	}
10141 
10142 	while (handle_idx) {
10143 		dh = mlx5_ipool_get(priv->sh->ipool
10144 				[MLX5_IPOOL_MLX5_FLOW], handle_idx);
10145 		if (!dh)
10146 			continue;
10147 		handle_idx = dh->next.next;
10148 
10149 		/* Get modify_hdr and encap_decap buf from ipools. */
10150 		encap_decap = NULL;
10151 		modify_hdr = dh->dvh.modify_hdr;
10152 
10153 		if (dh->dvh.rix_encap_decap) {
10154 			encap_decap = mlx5_ipool_get(priv->sh->ipool
10155 						[MLX5_IPOOL_DECAP_ENCAP],
10156 						dh->dvh.rix_encap_decap);
10157 		}
10158 		if (modify_hdr) {
10159 			data = (const uint8_t *)modify_hdr->actions;
10160 			size = (size_t)(modify_hdr->actions_num) * 8;
10161 			id = (uint64_t)(uintptr_t)modify_hdr->action;
10162 			actions_num = modify_hdr->actions_num;
10163 			type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
10164 			save_dump_file(data, size, type, id,
10165 						(void *)(&actions_num), file);
10166 		}
10167 		if (encap_decap) {
10168 			data = encap_decap->buf;
10169 			size = encap_decap->size;
10170 			id = (uint64_t)(uintptr_t)encap_decap->action;
10171 			type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
10172 			save_dump_file(data, size, type,
10173 						id, NULL, file);
10174 		}
10175 	}
10176 	return 0;
10177 }
10178 
10179 /**
10180  * Dump all flow's encap_decap/modify_hdr/counter data to file
10181  *
10182  * @param[in] dev
10183  *   The pointer to Ethernet device.
10184  * @param[in] file
10185  *   A pointer to a file for output.
10186  * @param[out] error
10187  *   Perform verbose error reporting if not NULL. PMDs initialize this
10188  *   structure in case of error only.
10189  * @return
10190  *   0 on success, a negative value otherwise.
10191  */
10192 static int
10193 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
10194 	FILE *file, struct rte_flow_error *error __rte_unused)
10195 {
10196 	struct mlx5_priv *priv = dev->data->dev_private;
10197 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10198 	struct mlx5_hlist *h;
10199 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
10200 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
10201 	struct rte_flow_query_count count;
10202 	uint32_t actions_num;
10203 	const uint8_t *data;
10204 	size_t size;
10205 	uint64_t id;
10206 	uint32_t type;
10207 	uint32_t i;
10208 	uint32_t j;
10209 	struct mlx5_list_inconst *l_inconst;
10210 	struct mlx5_list_entry *e;
10211 	int lcore_index;
10212 	struct mlx5_flow_counter_mng *cmng = &priv->sh->sws_cmng;
10213 	uint32_t max;
10214 	void *action;
10215 
10216 	/* encap_decap hlist is lcore_share, get global core cache. */
10217 	i = MLX5_LIST_GLOBAL;
10218 	h = sh->encaps_decaps;
10219 	if (h) {
10220 		for (j = 0; j <= h->mask; j++) {
10221 			l_inconst = &h->buckets[j].l;
10222 			if (!l_inconst || !l_inconst->cache[i])
10223 				continue;
10224 
10225 			e = LIST_FIRST(&l_inconst->cache[i]->h);
10226 			while (e) {
10227 				encap_decap =
10228 				(struct mlx5_flow_dv_encap_decap_resource *)e;
10229 				data = encap_decap->buf;
10230 				size = encap_decap->size;
10231 				id = (uint64_t)(uintptr_t)encap_decap->action;
10232 				type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
10233 				save_dump_file(data, size, type,
10234 					id, NULL, file);
10235 				e = LIST_NEXT(e, next);
10236 			}
10237 		}
10238 	}
10239 
10240 	/* get modify_hdr */
10241 	h = sh->modify_cmds;
10242 	if (h) {
10243 		lcore_index = rte_lcore_index(rte_lcore_id());
10244 		if (unlikely(lcore_index == -1)) {
10245 			lcore_index = MLX5_LIST_NLCORE;
10246 			rte_spinlock_lock(&h->l_const.lcore_lock);
10247 		}
10248 		i = lcore_index;
10249 
10250 		for (j = 0; j <= h->mask; j++) {
10251 			l_inconst = &h->buckets[j].l;
10252 			if (!l_inconst || !l_inconst->cache[i])
10253 				continue;
10254 
10255 			e = LIST_FIRST(&l_inconst->cache[i]->h);
10256 			while (e) {
10257 				modify_hdr =
10258 				(struct mlx5_flow_dv_modify_hdr_resource *)e;
10259 				data = (const uint8_t *)modify_hdr->actions;
10260 				size = (size_t)(modify_hdr->actions_num) * 8;
10261 				actions_num = modify_hdr->actions_num;
10262 				id = (uint64_t)(uintptr_t)modify_hdr->action;
10263 				type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
10264 				save_dump_file(data, size, type, id,
10265 						(void *)(&actions_num), file);
10266 				e = LIST_NEXT(e, next);
10267 			}
10268 		}
10269 
10270 		if (unlikely(lcore_index == MLX5_LIST_NLCORE))
10271 			rte_spinlock_unlock(&h->l_const.lcore_lock);
10272 	}
10273 
10274 	/* get counter */
10275 	MLX5_ASSERT(cmng->n_valid <= MLX5_COUNTER_POOLS_MAX_NUM);
10276 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
10277 	for (j = 1; j <= max; j++) {
10278 		action = NULL;
10279 		if ((!mlx5_counter_query(dev, j, false, &count.hits,
10280 		&count.bytes, &action)) && action) {
10281 			id = (uint64_t)(uintptr_t)action;
10282 			type = DR_DUMP_REC_TYPE_PMD_COUNTER;
10283 			save_dump_file(NULL, 0, type,
10284 					id, (void *)&count, file);
10285 		}
10286 	}
10287 	return 0;
10288 }
10289 #endif
10290 
10291 /**
10292  * Dump flow raw hw data to file
10293  *
10294  * @param[in] dev
10295  *    The pointer to Ethernet device.
10296  * @param[in] file
10297  *   A pointer to a file for output.
10298  * @param[out] error
10299  *   Perform verbose error reporting if not NULL. PMDs initialize this
10300  *   structure in case of error only.
10301  * @return
10302  *   0 on success, a negative value otherwise.
10303  */
10304 int
10305 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
10306 		   FILE *file,
10307 		   struct rte_flow_error *error __rte_unused)
10308 {
10309 	struct mlx5_priv *priv = dev->data->dev_private;
10310 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10311 	uint32_t handle_idx;
10312 	int ret;
10313 	struct mlx5_flow_handle *dh;
10314 	struct rte_flow *flow;
10315 
10316 	if (!sh->config.dv_flow_en) {
10317 		if (fputs("device dv flow disabled\n", file) <= 0)
10318 			return -errno;
10319 		return -ENOTSUP;
10320 	}
10321 
10322 	/* dump all */
10323 	if (!flow_idx) {
10324 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10325 		if (mlx5_flow_dev_dump_sh_all(dev, file, error))
10326 			return -EINVAL;
10327 
10328 		if (sh->config.dv_flow_en == 2)
10329 			return mlx5dr_debug_dump(priv->dr_ctx, file);
10330 #endif
10331 		return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
10332 					       sh->rx_domain,
10333 					       sh->tx_domain, file);
10334 	}
10335 	/* dump one */
10336 	flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
10337 			(uintptr_t)(void *)flow_idx);
10338 	if (!flow)
10339 		return -EINVAL;
10340 
10341 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10342 	mlx5_flow_dev_dump_ipool(dev, flow, file, error);
10343 #endif
10344 	handle_idx = flow->dev_handles;
10345 	while (handle_idx) {
10346 		dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
10347 				handle_idx);
10348 		if (!dh)
10349 			return -ENOENT;
10350 		if (dh->drv_flow) {
10351 			if (sh->config.dv_flow_en == 2)
10352 				return -ENOTSUP;
10353 
10354 			ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
10355 							     file);
10356 			if (ret)
10357 				return -ENOENT;
10358 		}
10359 		handle_idx = dh->next.next;
10360 	}
10361 	return 0;
10362 }
10363 
10364 /**
10365  * Get aged-out flows.
10366  *
10367  * @param[in] dev
10368  *   Pointer to the Ethernet device structure.
10369  * @param[in] context
10370  *   The address of an array of pointers to the aged-out flows contexts.
10371  * @param[in] nb_countexts
10372  *   The length of context array pointers.
10373  * @param[out] error
10374  *   Perform verbose error reporting if not NULL. Initialized in case of
10375  *   error only.
10376  *
10377  * @return
10378  *   how many contexts get in success, otherwise negative errno value.
10379  *   if nb_contexts is 0, return the amount of all aged contexts.
10380  *   if nb_contexts is not 0 , return the amount of aged flows reported
10381  *   in the context array.
10382  */
10383 int
10384 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
10385 			uint32_t nb_contexts, struct rte_flow_error *error)
10386 {
10387 	struct rte_flow_attr attr = { .transfer = 0 };
10388 
10389 	return flow_get_drv_ops(flow_get_drv_type(dev, &attr))->get_aged_flows
10390 		(dev, contexts, nb_contexts, error);
10391 }
10392 
10393 /**
10394  * Get aged-out flows per HWS queue.
10395  *
10396  * @param[in] dev
10397  *   Pointer to the Ethernet device structure.
10398  * @param[in] queue_id
10399  *   Flow queue to query.
10400  * @param[in] context
10401  *   The address of an array of pointers to the aged-out flows contexts.
10402  * @param[in] nb_countexts
10403  *   The length of context array pointers.
10404  * @param[out] error
10405  *   Perform verbose error reporting if not NULL. Initialized in case of
10406  *   error only.
10407  *
10408  * @return
10409  *   how many contexts get in success, otherwise negative errno value.
10410  *   if nb_contexts is 0, return the amount of all aged contexts.
10411  *   if nb_contexts is not 0 , return the amount of aged flows reported
10412  *   in the context array.
10413  */
10414 int
10415 mlx5_flow_get_q_aged_flows(struct rte_eth_dev *dev, uint32_t queue_id,
10416 			   void **contexts, uint32_t nb_contexts,
10417 			   struct rte_flow_error *error)
10418 {
10419 	const struct mlx5_flow_driver_ops *fops;
10420 	struct rte_flow_attr attr = { 0 };
10421 
10422 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_HW) {
10423 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
10424 		return fops->get_q_aged_flows(dev, queue_id, contexts,
10425 					      nb_contexts, error);
10426 	}
10427 	DRV_LOG(ERR, "port %u queue %u get aged flows is not supported.",
10428 		dev->data->port_id, queue_id);
10429 	return rte_flow_error_set(error, ENOTSUP,
10430 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10431 				  "get Q aged flows with incorrect steering mode");
10432 }
10433 
10434 /* Wrapper for driver action_validate op callback */
10435 static int
10436 flow_drv_action_validate(struct rte_eth_dev *dev,
10437 			 const struct rte_flow_indir_action_conf *conf,
10438 			 const struct rte_flow_action *action,
10439 			 const struct mlx5_flow_driver_ops *fops,
10440 			 struct rte_flow_error *error)
10441 {
10442 	static const char err_msg[] = "indirect action validation unsupported";
10443 
10444 	if (!fops->action_validate) {
10445 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10446 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10447 				   NULL, err_msg);
10448 		return -rte_errno;
10449 	}
10450 	return fops->action_validate(dev, conf, action, error);
10451 }
10452 
10453 /**
10454  * Destroys the shared action by handle.
10455  *
10456  * @param dev
10457  *   Pointer to Ethernet device structure.
10458  * @param[in] handle
10459  *   Handle for the indirect action object to be destroyed.
10460  * @param[out] error
10461  *   Perform verbose error reporting if not NULL. PMDs initialize this
10462  *   structure in case of error only.
10463  *
10464  * @return
10465  *   0 on success, a negative errno value otherwise and rte_errno is set.
10466  *
10467  * @note: wrapper for driver action_create op callback.
10468  */
10469 static int
10470 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
10471 			   struct rte_flow_action_handle *handle,
10472 			   struct rte_flow_error *error)
10473 {
10474 	static const char err_msg[] = "indirect action destruction unsupported";
10475 	struct rte_flow_attr attr = { .transfer = 0 };
10476 	const struct mlx5_flow_driver_ops *fops =
10477 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10478 
10479 	if (!fops->action_destroy) {
10480 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10481 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10482 				   NULL, err_msg);
10483 		return -rte_errno;
10484 	}
10485 	return fops->action_destroy(dev, handle, error);
10486 }
10487 
10488 /* Wrapper for driver action_destroy op callback */
10489 static int
10490 flow_drv_action_update(struct rte_eth_dev *dev,
10491 		       struct rte_flow_action_handle *handle,
10492 		       const void *update,
10493 		       const struct mlx5_flow_driver_ops *fops,
10494 		       struct rte_flow_error *error)
10495 {
10496 	static const char err_msg[] = "indirect action update unsupported";
10497 
10498 	if (!fops->action_update) {
10499 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10500 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10501 				   NULL, err_msg);
10502 		return -rte_errno;
10503 	}
10504 	return fops->action_update(dev, handle, update, error);
10505 }
10506 
10507 /* Wrapper for driver action_destroy op callback */
10508 static int
10509 flow_drv_action_query(struct rte_eth_dev *dev,
10510 		      const struct rte_flow_action_handle *handle,
10511 		      void *data,
10512 		      const struct mlx5_flow_driver_ops *fops,
10513 		      struct rte_flow_error *error)
10514 {
10515 	static const char err_msg[] = "indirect action query unsupported";
10516 
10517 	if (!fops->action_query) {
10518 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10519 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10520 				   NULL, err_msg);
10521 		return -rte_errno;
10522 	}
10523 	return fops->action_query(dev, handle, data, error);
10524 }
10525 
10526 /**
10527  * Create indirect action for reuse in multiple flow rules.
10528  *
10529  * @param dev
10530  *   Pointer to Ethernet device structure.
10531  * @param conf
10532  *   Pointer to indirect action object configuration.
10533  * @param[in] action
10534  *   Action configuration for indirect action object creation.
10535  * @param[out] error
10536  *   Perform verbose error reporting if not NULL. PMDs initialize this
10537  *   structure in case of error only.
10538  * @return
10539  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
10540  */
10541 static struct rte_flow_action_handle *
10542 mlx5_action_handle_create(struct rte_eth_dev *dev,
10543 			  const struct rte_flow_indir_action_conf *conf,
10544 			  const struct rte_flow_action *action,
10545 			  struct rte_flow_error *error)
10546 {
10547 	static const char err_msg[] = "indirect action creation unsupported";
10548 	struct rte_flow_attr attr = { .transfer = 0 };
10549 	const struct mlx5_flow_driver_ops *fops =
10550 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10551 
10552 	if (flow_drv_action_validate(dev, conf, action, fops, error))
10553 		return NULL;
10554 	if (!fops->action_create) {
10555 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10556 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10557 				   NULL, err_msg);
10558 		return NULL;
10559 	}
10560 	return fops->action_create(dev, conf, action, error);
10561 }
10562 
10563 /**
10564  * Updates inplace the indirect action configuration pointed by *handle*
10565  * with the configuration provided as *update* argument.
10566  * The update of the indirect action configuration effects all flow rules
10567  * reusing the action via handle.
10568  *
10569  * @param dev
10570  *   Pointer to Ethernet device structure.
10571  * @param[in] handle
10572  *   Handle for the indirect action to be updated.
10573  * @param[in] update
10574  *   Action specification used to modify the action pointed by handle.
10575  *   *update* could be of same type with the action pointed by the *handle*
10576  *   handle argument, or some other structures like a wrapper, depending on
10577  *   the indirect action type.
10578  * @param[out] error
10579  *   Perform verbose error reporting if not NULL. PMDs initialize this
10580  *   structure in case of error only.
10581  *
10582  * @return
10583  *   0 on success, a negative errno value otherwise and rte_errno is set.
10584  */
10585 static int
10586 mlx5_action_handle_update(struct rte_eth_dev *dev,
10587 		struct rte_flow_action_handle *handle,
10588 		const void *update,
10589 		struct rte_flow_error *error)
10590 {
10591 	struct rte_flow_attr attr = { .transfer = 0 };
10592 	const struct mlx5_flow_driver_ops *fops =
10593 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10594 	int ret;
10595 
10596 	ret = flow_drv_action_validate(dev, NULL,
10597 			(const struct rte_flow_action *)update, fops, error);
10598 	if (ret)
10599 		return ret;
10600 	return flow_drv_action_update(dev, handle, update, fops,
10601 				      error);
10602 }
10603 
10604 /**
10605  * Query the indirect action by handle.
10606  *
10607  * This function allows retrieving action-specific data such as counters.
10608  * Data is gathered by special action which may be present/referenced in
10609  * more than one flow rule definition.
10610  *
10611  * see @RTE_FLOW_ACTION_TYPE_COUNT
10612  *
10613  * @param dev
10614  *   Pointer to Ethernet device structure.
10615  * @param[in] handle
10616  *   Handle for the indirect action to query.
10617  * @param[in, out] data
10618  *   Pointer to storage for the associated query data type.
10619  * @param[out] error
10620  *   Perform verbose error reporting if not NULL. PMDs initialize this
10621  *   structure in case of error only.
10622  *
10623  * @return
10624  *   0 on success, a negative errno value otherwise and rte_errno is set.
10625  */
10626 static int
10627 mlx5_action_handle_query(struct rte_eth_dev *dev,
10628 			 const struct rte_flow_action_handle *handle,
10629 			 void *data,
10630 			 struct rte_flow_error *error)
10631 {
10632 	struct rte_flow_attr attr = { .transfer = 0 };
10633 	const struct mlx5_flow_driver_ops *fops =
10634 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10635 
10636 	return flow_drv_action_query(dev, handle, data, fops, error);
10637 }
10638 
10639 /**
10640  * Destroy all indirect actions (shared RSS).
10641  *
10642  * @param dev
10643  *   Pointer to Ethernet device.
10644  *
10645  * @return
10646  *   0 on success, a negative errno value otherwise and rte_errno is set.
10647  */
10648 int
10649 mlx5_action_handle_flush(struct rte_eth_dev *dev)
10650 {
10651 	struct rte_flow_error error;
10652 	struct mlx5_priv *priv = dev->data->dev_private;
10653 	struct mlx5_shared_action_rss *shared_rss;
10654 	int ret = 0;
10655 	uint32_t idx;
10656 
10657 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
10658 		      priv->rss_shared_actions, idx, shared_rss, next) {
10659 		ret |= mlx5_action_handle_destroy(dev,
10660 		       (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
10661 	}
10662 	return ret;
10663 }
10664 
10665 /**
10666  * Validate existing indirect actions against current device configuration
10667  * and attach them to device resources.
10668  *
10669  * @param dev
10670  *   Pointer to Ethernet device.
10671  *
10672  * @return
10673  *   0 on success, a negative errno value otherwise and rte_errno is set.
10674  */
10675 int
10676 mlx5_action_handle_attach(struct rte_eth_dev *dev)
10677 {
10678 	struct mlx5_priv *priv = dev->data->dev_private;
10679 	int ret = 0;
10680 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
10681 
10682 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10683 		const char *message;
10684 		uint32_t queue_idx;
10685 
10686 		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
10687 					       ind_tbl->queues_n,
10688 					       &message, &queue_idx);
10689 		if (ret != 0) {
10690 			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
10691 				dev->data->port_id, ind_tbl->queues[queue_idx],
10692 				message);
10693 			break;
10694 		}
10695 	}
10696 	if (ret != 0)
10697 		return ret;
10698 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10699 		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
10700 		if (ret != 0) {
10701 			DRV_LOG(ERR, "Port %u could not attach "
10702 				"indirection table obj %p",
10703 				dev->data->port_id, (void *)ind_tbl);
10704 			goto error;
10705 		}
10706 	}
10707 
10708 	return 0;
10709 error:
10710 	ind_tbl_last = ind_tbl;
10711 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10712 		if (ind_tbl == ind_tbl_last)
10713 			break;
10714 		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
10715 			DRV_LOG(CRIT, "Port %u could not detach "
10716 				"indirection table obj %p on rollback",
10717 				dev->data->port_id, (void *)ind_tbl);
10718 	}
10719 	return ret;
10720 }
10721 
10722 /**
10723  * Detach indirect actions of the device from its resources.
10724  *
10725  * @param dev
10726  *   Pointer to Ethernet device.
10727  *
10728  * @return
10729  *   0 on success, a negative errno value otherwise and rte_errno is set.
10730  */
10731 int
10732 mlx5_action_handle_detach(struct rte_eth_dev *dev)
10733 {
10734 	struct mlx5_priv *priv = dev->data->dev_private;
10735 	int ret = 0;
10736 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
10737 
10738 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10739 		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
10740 		if (ret != 0) {
10741 			DRV_LOG(ERR, "Port %u could not detach "
10742 				"indirection table obj %p",
10743 				dev->data->port_id, (void *)ind_tbl);
10744 			goto error;
10745 		}
10746 	}
10747 	return 0;
10748 error:
10749 	ind_tbl_last = ind_tbl;
10750 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10751 		if (ind_tbl == ind_tbl_last)
10752 			break;
10753 		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
10754 			DRV_LOG(CRIT, "Port %u could not attach "
10755 				"indirection table obj %p on rollback",
10756 				dev->data->port_id, (void *)ind_tbl);
10757 	}
10758 	return ret;
10759 }
10760 
10761 #ifndef HAVE_MLX5DV_DR
10762 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
10763 #else
10764 #define MLX5_DOMAIN_SYNC_FLOW \
10765 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
10766 #endif
10767 
10768 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
10769 {
10770 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
10771 	const struct mlx5_flow_driver_ops *fops;
10772 	int ret;
10773 	struct rte_flow_attr attr = { .transfer = 0 };
10774 
10775 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10776 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
10777 	if (ret > 0)
10778 		ret = -ret;
10779 	return ret;
10780 }
10781 
10782 const struct mlx5_flow_tunnel *
10783 mlx5_get_tof(const struct rte_flow_item *item,
10784 	     const struct rte_flow_action *action,
10785 	     enum mlx5_tof_rule_type *rule_type)
10786 {
10787 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10788 		if (item->type == (typeof(item->type))
10789 				  MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
10790 			*rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
10791 			return flow_items_to_tunnel(item);
10792 		}
10793 	}
10794 	for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
10795 		if (action->type == (typeof(action->type))
10796 				    MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
10797 			*rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
10798 			return flow_actions_to_tunnel(action);
10799 		}
10800 	}
10801 	return NULL;
10802 }
10803 
10804 /**
10805  * tunnel offload functionality is defined for DV environment only
10806  */
10807 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10808 __extension__
10809 union tunnel_offload_mark {
10810 	uint32_t val;
10811 	struct {
10812 		uint32_t app_reserve:8;
10813 		uint32_t table_id:15;
10814 		uint32_t transfer:1;
10815 		uint32_t _unused_:8;
10816 	};
10817 };
10818 
10819 static bool
10820 mlx5_access_tunnel_offload_db
10821 	(struct rte_eth_dev *dev,
10822 	 bool (*match)(struct rte_eth_dev *,
10823 		       struct mlx5_flow_tunnel *, const void *),
10824 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10825 	 void (*miss)(struct rte_eth_dev *, void *),
10826 	 void *ctx, bool lock_op);
10827 
10828 static int
10829 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
10830 			     struct rte_flow *flow,
10831 			     const struct rte_flow_attr *attr,
10832 			     const struct rte_flow_action *app_actions,
10833 			     uint32_t flow_idx,
10834 			     const struct mlx5_flow_tunnel *tunnel,
10835 			     struct tunnel_default_miss_ctx *ctx,
10836 			     struct rte_flow_error *error)
10837 {
10838 	struct mlx5_priv *priv = dev->data->dev_private;
10839 	struct mlx5_flow *dev_flow;
10840 	struct rte_flow_attr miss_attr = *attr;
10841 	const struct rte_flow_item miss_items[2] = {
10842 		{
10843 			.type = RTE_FLOW_ITEM_TYPE_ETH,
10844 			.spec = NULL,
10845 			.last = NULL,
10846 			.mask = NULL
10847 		},
10848 		{
10849 			.type = RTE_FLOW_ITEM_TYPE_END,
10850 			.spec = NULL,
10851 			.last = NULL,
10852 			.mask = NULL
10853 		}
10854 	};
10855 	union tunnel_offload_mark mark_id;
10856 	struct rte_flow_action_mark miss_mark;
10857 	struct rte_flow_action miss_actions[3] = {
10858 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
10859 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
10860 	};
10861 	const struct rte_flow_action_jump *jump_data;
10862 	uint32_t i, flow_table = 0; /* prevent compilation warning */
10863 	struct flow_grp_info grp_info = {
10864 		.external = 1,
10865 		.transfer = attr->transfer,
10866 		.fdb_def_rule = !!priv->fdb_def_rule,
10867 		.std_tbl_fix = 0,
10868 	};
10869 	int ret;
10870 
10871 	if (!attr->transfer) {
10872 		uint32_t q_size;
10873 
10874 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
10875 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
10876 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
10877 					 0, SOCKET_ID_ANY);
10878 		if (!ctx->queue)
10879 			return rte_flow_error_set
10880 				(error, ENOMEM,
10881 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10882 				NULL, "invalid default miss RSS");
10883 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
10884 		ctx->action_rss.level = 0,
10885 		ctx->action_rss.types = priv->rss_conf.rss_hf,
10886 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
10887 		ctx->action_rss.queue_num = priv->reta_idx_n,
10888 		ctx->action_rss.key = priv->rss_conf.rss_key,
10889 		ctx->action_rss.queue = ctx->queue;
10890 		if (!priv->reta_idx_n || !priv->rxqs_n)
10891 			return rte_flow_error_set
10892 				(error, EINVAL,
10893 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10894 				NULL, "invalid port configuration");
10895 		if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
10896 			ctx->action_rss.types = 0;
10897 		for (i = 0; i != priv->reta_idx_n; ++i)
10898 			ctx->queue[i] = (*priv->reta_idx)[i];
10899 	} else {
10900 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
10901 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
10902 	}
10903 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
10904 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
10905 	jump_data = app_actions->conf;
10906 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
10907 	miss_attr.group = jump_data->group;
10908 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
10909 				       &flow_table, &grp_info, error);
10910 	if (ret)
10911 		return rte_flow_error_set(error, EINVAL,
10912 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10913 					  NULL, "invalid tunnel id");
10914 	mark_id.app_reserve = 0;
10915 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
10916 	mark_id.transfer = !!attr->transfer;
10917 	mark_id._unused_ = 0;
10918 	miss_mark.id = mark_id.val;
10919 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
10920 				    miss_items, miss_actions, flow_idx, error);
10921 	if (!dev_flow)
10922 		return -rte_errno;
10923 	dev_flow->flow = flow;
10924 	dev_flow->external = true;
10925 	dev_flow->tunnel = tunnel;
10926 	dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
10927 	/* Subflow object was created, we must include one in the list. */
10928 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
10929 		      dev_flow->handle, next);
10930 	DRV_LOG(DEBUG,
10931 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
10932 		dev->data->port_id, tunnel->app_tunnel.type,
10933 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
10934 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
10935 				  miss_actions, error);
10936 	if (!ret)
10937 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
10938 						  error);
10939 
10940 	return ret;
10941 }
10942 
10943 static const struct mlx5_flow_tbl_data_entry  *
10944 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
10945 {
10946 	struct mlx5_priv *priv = dev->data->dev_private;
10947 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10948 	struct mlx5_list_entry *he;
10949 	union tunnel_offload_mark mbits = { .val = mark };
10950 	union mlx5_flow_tbl_key table_key = {
10951 		{
10952 			.level = tunnel_id_to_flow_tbl(mbits.table_id),
10953 			.id = 0,
10954 			.reserved = 0,
10955 			.dummy = 0,
10956 			.is_fdb = !!mbits.transfer,
10957 			.is_egress = 0,
10958 		}
10959 	};
10960 	struct mlx5_flow_cb_ctx ctx = {
10961 		.data = &table_key.v64,
10962 	};
10963 
10964 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
10965 	return he ?
10966 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
10967 }
10968 
10969 static void
10970 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
10971 				   struct mlx5_list_entry *entry)
10972 {
10973 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10974 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10975 
10976 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10977 			tunnel_flow_tbl_to_id(tte->flow_table));
10978 	mlx5_free(tte);
10979 }
10980 
10981 static int
10982 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
10983 				  struct mlx5_list_entry *entry, void *cb_ctx)
10984 {
10985 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10986 	union tunnel_tbl_key tbl = {
10987 		.val = *(uint64_t *)(ctx->data),
10988 	};
10989 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10990 
10991 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
10992 }
10993 
10994 static struct mlx5_list_entry *
10995 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
10996 {
10997 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10998 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10999 	struct tunnel_tbl_entry *tte;
11000 	union tunnel_tbl_key tbl = {
11001 		.val = *(uint64_t *)(ctx->data),
11002 	};
11003 
11004 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
11005 			  sizeof(*tte), 0,
11006 			  SOCKET_ID_ANY);
11007 	if (!tte)
11008 		goto err;
11009 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
11010 			  &tte->flow_table);
11011 	if (tte->flow_table >= MLX5_MAX_TABLES) {
11012 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
11013 			tte->flow_table);
11014 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
11015 				tte->flow_table);
11016 		goto err;
11017 	} else if (!tte->flow_table) {
11018 		goto err;
11019 	}
11020 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
11021 	tte->tunnel_id = tbl.tunnel_id;
11022 	tte->group = tbl.group;
11023 	return &tte->hash;
11024 err:
11025 	if (tte)
11026 		mlx5_free(tte);
11027 	return NULL;
11028 }
11029 
11030 static struct mlx5_list_entry *
11031 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
11032 				  struct mlx5_list_entry *oentry,
11033 				  void *cb_ctx __rte_unused)
11034 {
11035 	struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
11036 						   0, SOCKET_ID_ANY);
11037 
11038 	if (!tte)
11039 		return NULL;
11040 	memcpy(tte, oentry, sizeof(*tte));
11041 	return &tte->hash;
11042 }
11043 
11044 static void
11045 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
11046 				       struct mlx5_list_entry *entry)
11047 {
11048 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
11049 
11050 	mlx5_free(tte);
11051 }
11052 
11053 static uint32_t
11054 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
11055 				const struct mlx5_flow_tunnel *tunnel,
11056 				uint32_t group, uint32_t *table,
11057 				struct rte_flow_error *error)
11058 {
11059 	struct mlx5_list_entry *he;
11060 	struct tunnel_tbl_entry *tte;
11061 	union tunnel_tbl_key key = {
11062 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
11063 		.group = group
11064 	};
11065 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
11066 	struct mlx5_hlist *group_hash;
11067 	struct mlx5_flow_cb_ctx ctx = {
11068 		.data = &key.val,
11069 	};
11070 
11071 	group_hash = tunnel ? tunnel->groups : thub->groups;
11072 	he = mlx5_hlist_register(group_hash, key.val, &ctx);
11073 	if (!he)
11074 		return rte_flow_error_set(error, EINVAL,
11075 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
11076 					  NULL,
11077 					  "tunnel group index not supported");
11078 	tte = container_of(he, typeof(*tte), hash);
11079 	*table = tte->flow_table;
11080 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
11081 		dev->data->port_id, key.tunnel_id, group, *table);
11082 	return 0;
11083 }
11084 
11085 static void
11086 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
11087 		      struct mlx5_flow_tunnel *tunnel)
11088 {
11089 	struct mlx5_priv *priv = dev->data->dev_private;
11090 	struct mlx5_indexed_pool *ipool;
11091 
11092 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
11093 		dev->data->port_id, tunnel->tunnel_id);
11094 	LIST_REMOVE(tunnel, chain);
11095 	mlx5_hlist_destroy(tunnel->groups);
11096 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
11097 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
11098 }
11099 
11100 static bool
11101 mlx5_access_tunnel_offload_db
11102 	(struct rte_eth_dev *dev,
11103 	 bool (*match)(struct rte_eth_dev *,
11104 		       struct mlx5_flow_tunnel *, const void *),
11105 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
11106 	 void (*miss)(struct rte_eth_dev *, void *),
11107 	 void *ctx, bool lock_op)
11108 {
11109 	bool verdict = false;
11110 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
11111 	struct mlx5_flow_tunnel *tunnel;
11112 
11113 	rte_spinlock_lock(&thub->sl);
11114 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
11115 		verdict = match(dev, tunnel, (const void *)ctx);
11116 		if (verdict)
11117 			break;
11118 	}
11119 	if (!lock_op)
11120 		rte_spinlock_unlock(&thub->sl);
11121 	if (verdict && hit)
11122 		hit(dev, tunnel, ctx);
11123 	if (!verdict && miss)
11124 		miss(dev, ctx);
11125 	if (lock_op)
11126 		rte_spinlock_unlock(&thub->sl);
11127 
11128 	return verdict;
11129 }
11130 
11131 struct tunnel_db_find_tunnel_id_ctx {
11132 	uint32_t tunnel_id;
11133 	struct mlx5_flow_tunnel *tunnel;
11134 };
11135 
11136 static bool
11137 find_tunnel_id_match(struct rte_eth_dev *dev,
11138 		     struct mlx5_flow_tunnel *tunnel, const void *x)
11139 {
11140 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
11141 
11142 	RTE_SET_USED(dev);
11143 	return tunnel->tunnel_id == ctx->tunnel_id;
11144 }
11145 
11146 static void
11147 find_tunnel_id_hit(struct rte_eth_dev *dev,
11148 		   struct mlx5_flow_tunnel *tunnel, void *x)
11149 {
11150 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
11151 	RTE_SET_USED(dev);
11152 	ctx->tunnel = tunnel;
11153 }
11154 
11155 static struct mlx5_flow_tunnel *
11156 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
11157 {
11158 	struct tunnel_db_find_tunnel_id_ctx ctx = {
11159 		.tunnel_id = id,
11160 	};
11161 
11162 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
11163 				      find_tunnel_id_hit, NULL, &ctx, true);
11164 
11165 	return ctx.tunnel;
11166 }
11167 
11168 static struct mlx5_flow_tunnel *
11169 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
11170 			  const struct rte_flow_tunnel *app_tunnel)
11171 {
11172 	struct mlx5_priv *priv = dev->data->dev_private;
11173 	struct mlx5_indexed_pool *ipool;
11174 	struct mlx5_flow_tunnel *tunnel;
11175 	uint32_t id;
11176 
11177 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
11178 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
11179 	if (!tunnel)
11180 		return NULL;
11181 	if (id >= MLX5_MAX_TUNNELS) {
11182 		mlx5_ipool_free(ipool, id);
11183 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
11184 		return NULL;
11185 	}
11186 	tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
11187 					   priv->sh,
11188 					   mlx5_flow_tunnel_grp2tbl_create_cb,
11189 					   mlx5_flow_tunnel_grp2tbl_match_cb,
11190 					   mlx5_flow_tunnel_grp2tbl_remove_cb,
11191 					   mlx5_flow_tunnel_grp2tbl_clone_cb,
11192 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
11193 	if (!tunnel->groups) {
11194 		mlx5_ipool_free(ipool, id);
11195 		return NULL;
11196 	}
11197 	/* initiate new PMD tunnel */
11198 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
11199 	tunnel->tunnel_id = id;
11200 	tunnel->action.type = (typeof(tunnel->action.type))
11201 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
11202 	tunnel->action.conf = tunnel;
11203 	tunnel->item.type = (typeof(tunnel->item.type))
11204 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
11205 	tunnel->item.spec = tunnel;
11206 	tunnel->item.last = NULL;
11207 	tunnel->item.mask = NULL;
11208 
11209 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
11210 		dev->data->port_id, tunnel->tunnel_id);
11211 
11212 	return tunnel;
11213 }
11214 
11215 struct tunnel_db_get_tunnel_ctx {
11216 	const struct rte_flow_tunnel *app_tunnel;
11217 	struct mlx5_flow_tunnel *tunnel;
11218 };
11219 
11220 static bool get_tunnel_match(struct rte_eth_dev *dev,
11221 			     struct mlx5_flow_tunnel *tunnel, const void *x)
11222 {
11223 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
11224 
11225 	RTE_SET_USED(dev);
11226 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
11227 		       sizeof(*ctx->app_tunnel));
11228 }
11229 
11230 static void get_tunnel_hit(struct rte_eth_dev *dev,
11231 			   struct mlx5_flow_tunnel *tunnel, void *x)
11232 {
11233 	/* called under tunnel spinlock protection */
11234 	struct tunnel_db_get_tunnel_ctx *ctx = x;
11235 
11236 	RTE_SET_USED(dev);
11237 	tunnel->refctn++;
11238 	ctx->tunnel = tunnel;
11239 }
11240 
11241 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
11242 {
11243 	/* called under tunnel spinlock protection */
11244 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
11245 	struct tunnel_db_get_tunnel_ctx *ctx = x;
11246 
11247 	rte_spinlock_unlock(&thub->sl);
11248 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
11249 	rte_spinlock_lock(&thub->sl);
11250 	if (ctx->tunnel) {
11251 		ctx->tunnel->refctn = 1;
11252 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
11253 	}
11254 }
11255 
11256 
11257 static int
11258 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
11259 		     const struct rte_flow_tunnel *app_tunnel,
11260 		     struct mlx5_flow_tunnel **tunnel)
11261 {
11262 	struct tunnel_db_get_tunnel_ctx ctx = {
11263 		.app_tunnel = app_tunnel,
11264 	};
11265 
11266 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
11267 				      get_tunnel_miss, &ctx, true);
11268 	*tunnel = ctx.tunnel;
11269 	return ctx.tunnel ? 0 : -ENOMEM;
11270 }
11271 
11272 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
11273 {
11274 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
11275 
11276 	if (!thub)
11277 		return;
11278 	if (!LIST_EMPTY(&thub->tunnels))
11279 		DRV_LOG(WARNING, "port %u tunnels present", port_id);
11280 	mlx5_hlist_destroy(thub->groups);
11281 	mlx5_free(thub);
11282 }
11283 
11284 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
11285 {
11286 	int err;
11287 	struct mlx5_flow_tunnel_hub *thub;
11288 
11289 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
11290 			   0, SOCKET_ID_ANY);
11291 	if (!thub)
11292 		return -ENOMEM;
11293 	LIST_INIT(&thub->tunnels);
11294 	rte_spinlock_init(&thub->sl);
11295 	thub->groups = mlx5_hlist_create("flow groups", 64,
11296 					 false, true, sh,
11297 					 mlx5_flow_tunnel_grp2tbl_create_cb,
11298 					 mlx5_flow_tunnel_grp2tbl_match_cb,
11299 					 mlx5_flow_tunnel_grp2tbl_remove_cb,
11300 					 mlx5_flow_tunnel_grp2tbl_clone_cb,
11301 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
11302 	if (!thub->groups) {
11303 		err = -rte_errno;
11304 		goto err;
11305 	}
11306 	sh->tunnel_hub = thub;
11307 
11308 	return 0;
11309 
11310 err:
11311 	if (thub->groups)
11312 		mlx5_hlist_destroy(thub->groups);
11313 	if (thub)
11314 		mlx5_free(thub);
11315 	return err;
11316 }
11317 
11318 static inline int
11319 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
11320 			  struct rte_flow_tunnel *tunnel,
11321 			  struct rte_flow_error *error)
11322 {
11323 	struct mlx5_priv *priv = dev->data->dev_private;
11324 
11325 	if (!priv->sh->config.dv_flow_en)
11326 		return rte_flow_error_set(error, ENOTSUP,
11327 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11328 					  "flow DV interface is off");
11329 	if (!is_tunnel_offload_active(dev))
11330 		return rte_flow_error_set(error, ENOTSUP,
11331 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11332 					  "tunnel offload was not activated");
11333 	if (!tunnel)
11334 		return rte_flow_error_set(error, EINVAL,
11335 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11336 					  "no application tunnel");
11337 	switch (tunnel->type) {
11338 	default:
11339 		return rte_flow_error_set(error, EINVAL,
11340 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11341 					  "unsupported tunnel type");
11342 	case RTE_FLOW_ITEM_TYPE_VXLAN:
11343 	case RTE_FLOW_ITEM_TYPE_GRE:
11344 	case RTE_FLOW_ITEM_TYPE_NVGRE:
11345 	case RTE_FLOW_ITEM_TYPE_GENEVE:
11346 		break;
11347 	}
11348 	return 0;
11349 }
11350 
11351 static int
11352 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
11353 		    struct rte_flow_tunnel *app_tunnel,
11354 		    struct rte_flow_action **actions,
11355 		    uint32_t *num_of_actions,
11356 		    struct rte_flow_error *error)
11357 {
11358 	struct mlx5_flow_tunnel *tunnel;
11359 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
11360 
11361 	if (ret)
11362 		return ret;
11363 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
11364 	if (ret < 0) {
11365 		return rte_flow_error_set(error, ret,
11366 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11367 					  "failed to initialize pmd tunnel");
11368 	}
11369 	*actions = &tunnel->action;
11370 	*num_of_actions = 1;
11371 	return 0;
11372 }
11373 
11374 static int
11375 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
11376 		       struct rte_flow_tunnel *app_tunnel,
11377 		       struct rte_flow_item **items,
11378 		       uint32_t *num_of_items,
11379 		       struct rte_flow_error *error)
11380 {
11381 	struct mlx5_flow_tunnel *tunnel;
11382 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
11383 
11384 	if (ret)
11385 		return ret;
11386 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
11387 	if (ret < 0) {
11388 		return rte_flow_error_set(error, ret,
11389 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
11390 					  "failed to initialize pmd tunnel");
11391 	}
11392 	*items = &tunnel->item;
11393 	*num_of_items = 1;
11394 	return 0;
11395 }
11396 
11397 struct tunnel_db_element_release_ctx {
11398 	struct rte_flow_item *items;
11399 	struct rte_flow_action *actions;
11400 	uint32_t num_elements;
11401 	struct rte_flow_error *error;
11402 	int ret;
11403 };
11404 
11405 static bool
11406 tunnel_element_release_match(struct rte_eth_dev *dev,
11407 			     struct mlx5_flow_tunnel *tunnel, const void *x)
11408 {
11409 	const struct tunnel_db_element_release_ctx *ctx = x;
11410 
11411 	RTE_SET_USED(dev);
11412 	if (ctx->num_elements != 1)
11413 		return false;
11414 	else if (ctx->items)
11415 		return ctx->items == &tunnel->item;
11416 	else if (ctx->actions)
11417 		return ctx->actions == &tunnel->action;
11418 
11419 	return false;
11420 }
11421 
11422 static void
11423 tunnel_element_release_hit(struct rte_eth_dev *dev,
11424 			   struct mlx5_flow_tunnel *tunnel, void *x)
11425 {
11426 	struct tunnel_db_element_release_ctx *ctx = x;
11427 	ctx->ret = 0;
11428 	if (!(__atomic_fetch_sub(&tunnel->refctn, 1, __ATOMIC_RELAXED) - 1))
11429 		mlx5_flow_tunnel_free(dev, tunnel);
11430 }
11431 
11432 static void
11433 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
11434 {
11435 	struct tunnel_db_element_release_ctx *ctx = x;
11436 	RTE_SET_USED(dev);
11437 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
11438 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
11439 				      "invalid argument");
11440 }
11441 
11442 static int
11443 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
11444 		       struct rte_flow_item *pmd_items,
11445 		       uint32_t num_items, struct rte_flow_error *err)
11446 {
11447 	struct tunnel_db_element_release_ctx ctx = {
11448 		.items = pmd_items,
11449 		.actions = NULL,
11450 		.num_elements = num_items,
11451 		.error = err,
11452 	};
11453 
11454 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
11455 				      tunnel_element_release_hit,
11456 				      tunnel_element_release_miss, &ctx, false);
11457 
11458 	return ctx.ret;
11459 }
11460 
11461 static int
11462 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
11463 			 struct rte_flow_action *pmd_actions,
11464 			 uint32_t num_actions, struct rte_flow_error *err)
11465 {
11466 	struct tunnel_db_element_release_ctx ctx = {
11467 		.items = NULL,
11468 		.actions = pmd_actions,
11469 		.num_elements = num_actions,
11470 		.error = err,
11471 	};
11472 
11473 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
11474 				      tunnel_element_release_hit,
11475 				      tunnel_element_release_miss, &ctx, false);
11476 
11477 	return ctx.ret;
11478 }
11479 
11480 static int
11481 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
11482 				  struct rte_mbuf *m,
11483 				  struct rte_flow_restore_info *info,
11484 				  struct rte_flow_error *err)
11485 {
11486 	uint64_t ol_flags = m->ol_flags;
11487 	const struct mlx5_flow_tbl_data_entry *tble;
11488 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
11489 
11490 	if (!is_tunnel_offload_active(dev)) {
11491 		info->flags = 0;
11492 		return 0;
11493 	}
11494 
11495 	if ((ol_flags & mask) != mask)
11496 		goto err;
11497 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
11498 	if (!tble) {
11499 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
11500 			dev->data->port_id, m->hash.fdir.hi);
11501 		goto err;
11502 	}
11503 	MLX5_ASSERT(tble->tunnel);
11504 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
11505 	info->group_id = tble->group_id;
11506 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
11507 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
11508 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
11509 
11510 	return 0;
11511 
11512 err:
11513 	return rte_flow_error_set(err, EINVAL,
11514 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11515 				  "failed to get restore info");
11516 }
11517 
11518 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
11519 static int
11520 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
11521 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
11522 			   __rte_unused struct rte_flow_action **actions,
11523 			   __rte_unused uint32_t *num_of_actions,
11524 			   __rte_unused struct rte_flow_error *error)
11525 {
11526 	return -ENOTSUP;
11527 }
11528 
11529 static int
11530 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
11531 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
11532 		       __rte_unused struct rte_flow_item **items,
11533 		       __rte_unused uint32_t *num_of_items,
11534 		       __rte_unused struct rte_flow_error *error)
11535 {
11536 	return -ENOTSUP;
11537 }
11538 
11539 static int
11540 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
11541 			      __rte_unused struct rte_flow_item *pmd_items,
11542 			      __rte_unused uint32_t num_items,
11543 			      __rte_unused struct rte_flow_error *err)
11544 {
11545 	return -ENOTSUP;
11546 }
11547 
11548 static int
11549 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
11550 				__rte_unused struct rte_flow_action *pmd_action,
11551 				__rte_unused uint32_t num_actions,
11552 				__rte_unused struct rte_flow_error *err)
11553 {
11554 	return -ENOTSUP;
11555 }
11556 
11557 static int
11558 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
11559 				  __rte_unused struct rte_mbuf *m,
11560 				  __rte_unused struct rte_flow_restore_info *i,
11561 				  __rte_unused struct rte_flow_error *err)
11562 {
11563 	return -ENOTSUP;
11564 }
11565 
11566 static int
11567 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
11568 			     __rte_unused struct rte_flow *flow,
11569 			     __rte_unused const struct rte_flow_attr *attr,
11570 			     __rte_unused const struct rte_flow_action *actions,
11571 			     __rte_unused uint32_t flow_idx,
11572 			     __rte_unused const struct mlx5_flow_tunnel *tunnel,
11573 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
11574 			     __rte_unused struct rte_flow_error *error)
11575 {
11576 	return -ENOTSUP;
11577 }
11578 
11579 static struct mlx5_flow_tunnel *
11580 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
11581 		    __rte_unused uint32_t id)
11582 {
11583 	return NULL;
11584 }
11585 
11586 static void
11587 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
11588 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
11589 {
11590 }
11591 
11592 static uint32_t
11593 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
11594 				__rte_unused const struct mlx5_flow_tunnel *t,
11595 				__rte_unused uint32_t group,
11596 				__rte_unused uint32_t *table,
11597 				struct rte_flow_error *error)
11598 {
11599 	return rte_flow_error_set(error, ENOTSUP,
11600 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11601 				  "tunnel offload requires DV support");
11602 }
11603 
11604 void
11605 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
11606 			__rte_unused  uint16_t port_id)
11607 {
11608 }
11609 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
11610 
11611 /* Flex flow item API */
11612 static struct rte_flow_item_flex_handle *
11613 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
11614 			   const struct rte_flow_item_flex_conf *conf,
11615 			   struct rte_flow_error *error)
11616 {
11617 	static const char err_msg[] = "flex item creation unsupported";
11618 	struct mlx5_priv *priv = dev->data->dev_private;
11619 	struct rte_flow_attr attr = { .transfer = 0 };
11620 	const struct mlx5_flow_driver_ops *fops =
11621 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
11622 
11623 	if (!priv->pci_dev) {
11624 		rte_flow_error_set(error, ENOTSUP,
11625 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11626 				   "create flex item on PF only");
11627 		return NULL;
11628 	}
11629 	switch (priv->pci_dev->id.device_id) {
11630 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD2:
11631 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD3:
11632 		break;
11633 	default:
11634 		rte_flow_error_set(error, ENOTSUP,
11635 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11636 				   "flex item available on BlueField ports only");
11637 		return NULL;
11638 	}
11639 	if (!fops->item_create) {
11640 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
11641 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
11642 				   NULL, err_msg);
11643 		return NULL;
11644 	}
11645 	return fops->item_create(dev, conf, error);
11646 }
11647 
11648 static int
11649 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
11650 			    const struct rte_flow_item_flex_handle *handle,
11651 			    struct rte_flow_error *error)
11652 {
11653 	static const char err_msg[] = "flex item release unsupported";
11654 	struct rte_flow_attr attr = { .transfer = 0 };
11655 	const struct mlx5_flow_driver_ops *fops =
11656 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
11657 
11658 	if (!fops->item_release) {
11659 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
11660 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
11661 				   NULL, err_msg);
11662 		return -rte_errno;
11663 	}
11664 	return fops->item_release(dev, handle, error);
11665 }
11666 
11667 static void
11668 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
11669 {
11670 	int ret;
11671 	struct rte_flow_error error;
11672 
11673 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
11674 		char *item_name;
11675 		ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
11676 				    sizeof(item_name),
11677 				    (void *)(uintptr_t)item->type, &error);
11678 		if (ret > 0)
11679 			printf("%s ", item_name);
11680 		else
11681 			printf("%d\n", (int)item->type);
11682 	}
11683 	printf("END\n");
11684 }
11685 
11686 static int
11687 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
11688 {
11689 	const struct rte_flow_item_udp *spec = udp_item->spec;
11690 	const struct rte_flow_item_udp *mask = udp_item->mask;
11691 	uint16_t udp_dport = 0;
11692 
11693 	if (spec != NULL) {
11694 		if (!mask)
11695 			mask = &rte_flow_item_udp_mask;
11696 		udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
11697 				mask->hdr.dst_port);
11698 	}
11699 	return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
11700 }
11701 
11702 static const struct mlx5_flow_expand_node *
11703 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
11704 		unsigned int item_idx,
11705 		const struct mlx5_flow_expand_node graph[],
11706 		const struct mlx5_flow_expand_node *node)
11707 {
11708 	const struct rte_flow_item *item = pattern + item_idx, *prev_item;
11709 
11710 	if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
11711 			node != NULL &&
11712 			node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
11713 		/*
11714 		 * The expansion node is VXLAN and it is also the last
11715 		 * expandable item in the pattern, so need to continue
11716 		 * expansion of the inner tunnel.
11717 		 */
11718 		MLX5_ASSERT(item_idx > 0);
11719 		prev_item = pattern + item_idx - 1;
11720 		MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
11721 		if (mlx5_flow_is_std_vxlan_port(prev_item))
11722 			return &graph[MLX5_EXPANSION_STD_VXLAN];
11723 		return &graph[MLX5_EXPANSION_L3_VXLAN];
11724 	}
11725 	return node;
11726 }
11727 
11728 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
11729 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
11730 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
11731 };
11732 
11733 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
11734 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
11735 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
11736 	{ 9, 10, 11 }, { 12, 13, 14 },
11737 };
11738 
11739 /**
11740  * Discover the number of available flow priorities.
11741  *
11742  * @param dev
11743  *   Ethernet device.
11744  *
11745  * @return
11746  *   On success, number of available flow priorities.
11747  *   On failure, a negative errno-style code and rte_errno is set.
11748  */
11749 int
11750 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
11751 {
11752 	static const uint16_t vprio[] = {8, 16};
11753 	const struct mlx5_priv *priv = dev->data->dev_private;
11754 	const struct mlx5_flow_driver_ops *fops;
11755 	enum mlx5_flow_drv_type type;
11756 	int ret;
11757 
11758 	type = mlx5_flow_os_get_type();
11759 	if (type == MLX5_FLOW_TYPE_MAX) {
11760 		type = MLX5_FLOW_TYPE_VERBS;
11761 		if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
11762 			type = MLX5_FLOW_TYPE_DV;
11763 	}
11764 	fops = flow_get_drv_ops(type);
11765 	if (fops->discover_priorities == NULL) {
11766 		DRV_LOG(ERR, "Priority discovery not supported");
11767 		rte_errno = ENOTSUP;
11768 		return -rte_errno;
11769 	}
11770 	ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
11771 	if (ret < 0)
11772 		return ret;
11773 	switch (ret) {
11774 	case 8:
11775 		ret = RTE_DIM(priority_map_3);
11776 		break;
11777 	case 16:
11778 		ret = RTE_DIM(priority_map_5);
11779 		break;
11780 	default:
11781 		rte_errno = ENOTSUP;
11782 		DRV_LOG(ERR,
11783 			"port %u maximum priority: %d expected 8/16",
11784 			dev->data->port_id, ret);
11785 		return -rte_errno;
11786 	}
11787 	DRV_LOG(INFO, "port %u supported flow priorities:"
11788 		" 0-%d for ingress or egress root table,"
11789 		" 0-%d for non-root table or transfer root table.",
11790 		dev->data->port_id, ret - 2,
11791 		MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
11792 	return ret;
11793 }
11794 
11795 /**
11796  * Adjust flow priority based on the highest layer and the request priority.
11797  *
11798  * @param[in] dev
11799  *   Pointer to the Ethernet device structure.
11800  * @param[in] priority
11801  *   The rule base priority.
11802  * @param[in] subpriority
11803  *   The priority based on the items.
11804  *
11805  * @return
11806  *   The new priority.
11807  */
11808 uint32_t
11809 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
11810 			  uint32_t subpriority)
11811 {
11812 	uint32_t res = 0;
11813 	struct mlx5_priv *priv = dev->data->dev_private;
11814 
11815 	switch (priv->sh->flow_max_priority) {
11816 	case RTE_DIM(priority_map_3):
11817 		res = priority_map_3[priority][subpriority];
11818 		break;
11819 	case RTE_DIM(priority_map_5):
11820 		res = priority_map_5[priority][subpriority];
11821 		break;
11822 	}
11823 	return  res;
11824 }
11825 
11826 /**
11827  * Get the priority for sending traffic to kernel table.
11828  *
11829  * @param[in] dev
11830  *   Pointer to the Ethernet device structure.
11831  *
11832  * @return
11833  *   On success: the value of priority for sending traffic to kernel table
11834  *   On failure: -1
11835  */
11836 uint32_t
11837 mlx5_get_send_to_kernel_priority(struct rte_eth_dev *dev)
11838 {
11839 	struct mlx5_priv *priv = dev->data->dev_private;
11840 	uint32_t res;
11841 
11842 	switch (priv->sh->flow_max_priority) {
11843 	case RTE_DIM(priority_map_5):
11844 		res = 15;
11845 		break;
11846 	case RTE_DIM(priority_map_3):
11847 		res = 7;
11848 		break;
11849 	default:
11850 		DRV_LOG(ERR,
11851 			"port %u maximum priority: %d expected 8/16",
11852 			dev->data->port_id, priv->sh->flow_max_priority);
11853 		res = (uint32_t)-1;
11854 	}
11855 	return res;
11856 }
11857 
11858 /**
11859  * Get the E-Switch Manager vport id.
11860  *
11861  * @param[in] dev
11862  *   Pointer to the Ethernet device structure.
11863  *
11864  * @return
11865  *   The vport id.
11866  */
11867 int16_t mlx5_flow_get_esw_manager_vport_id(struct rte_eth_dev *dev)
11868 {
11869 	struct mlx5_priv *priv = dev->data->dev_private;
11870 	struct mlx5_common_device *cdev = priv->sh->cdev;
11871 
11872 	/* New FW exposes E-Switch Manager vport ID, can use it directly. */
11873 	if (cdev->config.hca_attr.esw_mgr_vport_id_valid)
11874 		return (int16_t)cdev->config.hca_attr.esw_mgr_vport_id;
11875 
11876 	if (priv->pci_dev == NULL)
11877 		return 0;
11878 	switch (priv->pci_dev->id.device_id) {
11879 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD:
11880 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD2:
11881 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD3:
11882 	/*
11883 	 * In old FW which doesn't expose the E-Switch Manager vport ID in the capability,
11884 	 * only the BF embedded CPUs control the E-Switch Manager port. Hence,
11885 	 * ECPF vport ID is selected and not the host port (0) in any BF case.
11886 	 */
11887 		return (int16_t)MLX5_ECPF_VPORT_ID;
11888 	default:
11889 		return MLX5_PF_VPORT_ID;
11890 	}
11891 }
11892 
11893 /**
11894  * Parse item to get the vport id.
11895  *
11896  * @param[in] dev
11897  *   Pointer to the Ethernet device structure.
11898  * @param[in] item
11899  *   The src port id match item.
11900  * @param[out] vport_id
11901  *   Pointer to put the vport id.
11902  * @param[out] all_ports
11903  *   Indicate if the item matches all ports.
11904  * @param[out] error
11905  *   Pointer to error structure.
11906  *
11907  * @return
11908  *   0 on success, a negative errno value otherwise and rte_errno is set.
11909  */
11910 int mlx5_flow_get_item_vport_id(struct rte_eth_dev *dev,
11911 				const struct rte_flow_item *item,
11912 				uint16_t *vport_id,
11913 				bool *all_ports,
11914 				struct rte_flow_error *error)
11915 {
11916 	struct mlx5_priv *port_priv;
11917 	const struct rte_flow_item_port_id *pid_v = NULL;
11918 	const struct rte_flow_item_ethdev *dev_v = NULL;
11919 	uint32_t esw_mgr_port;
11920 	uint32_t src_port;
11921 
11922 	if (all_ports)
11923 		*all_ports = false;
11924 	switch (item->type) {
11925 	case RTE_FLOW_ITEM_TYPE_PORT_ID:
11926 		pid_v = item->spec;
11927 		if (!pid_v)
11928 			return 0;
11929 		src_port = pid_v->id;
11930 		esw_mgr_port = MLX5_PORT_ESW_MGR;
11931 		break;
11932 	case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT:
11933 		dev_v = item->spec;
11934 		if (!dev_v) {
11935 			if (all_ports)
11936 				*all_ports = true;
11937 			return 0;
11938 		}
11939 		src_port = dev_v->port_id;
11940 		esw_mgr_port = MLX5_REPRESENTED_PORT_ESW_MGR;
11941 		break;
11942 	case RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR:
11943 		src_port = MLX5_REPRESENTED_PORT_ESW_MGR;
11944 		esw_mgr_port = MLX5_REPRESENTED_PORT_ESW_MGR;
11945 		break;
11946 	default:
11947 		return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
11948 					  NULL, "Incorrect item type.");
11949 	}
11950 	if (src_port == esw_mgr_port) {
11951 		*vport_id = mlx5_flow_get_esw_manager_vport_id(dev);
11952 	} else {
11953 		port_priv = mlx5_port_to_eswitch_info(src_port, false);
11954 		if (!port_priv)
11955 			return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
11956 						  NULL, "Failed to get port info.");
11957 		*vport_id = port_priv->representor_id;
11958 	}
11959 
11960 	return 0;
11961 }
11962 
11963 int
11964 mlx5_flow_pick_transfer_proxy(struct rte_eth_dev *dev,
11965 			      uint16_t *proxy_port_id,
11966 			      struct rte_flow_error *error)
11967 {
11968 	const struct mlx5_priv *priv = dev->data->dev_private;
11969 	uint16_t port_id;
11970 
11971 	if (!priv->sh->config.dv_esw_en)
11972 		return rte_flow_error_set(error, EINVAL,
11973 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
11974 					  NULL,
11975 					  "unable to provide a proxy port"
11976 					  " without E-Switch configured");
11977 	if (!priv->master && !priv->representor)
11978 		return rte_flow_error_set(error, EINVAL,
11979 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
11980 					  NULL,
11981 					  "unable to provide a proxy port"
11982 					  " for port which is not a master"
11983 					  " or a representor port");
11984 	if (priv->master) {
11985 		*proxy_port_id = dev->data->port_id;
11986 		return 0;
11987 	}
11988 	MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
11989 		const struct rte_eth_dev *port_dev = &rte_eth_devices[port_id];
11990 		const struct mlx5_priv *port_priv = port_dev->data->dev_private;
11991 
11992 		if (port_priv->master &&
11993 		    port_priv->domain_id == priv->domain_id) {
11994 			*proxy_port_id = port_id;
11995 			return 0;
11996 		}
11997 	}
11998 	return rte_flow_error_set(error, ENODEV,
11999 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
12000 				  NULL, "unable to find a proxy port");
12001 }
12002