xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 592ab76f9f0f41993bebb44da85c37750a93ece9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26 
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 struct tunnel_default_miss_ctx {
37 	uint16_t *queue;
38 	__extension__
39 	union {
40 		struct rte_flow_action_rss action_rss;
41 		struct rte_flow_action_queue miss_queue;
42 		struct rte_flow_action_jump miss_jump;
43 		uint8_t raw[0];
44 	};
45 };
46 
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49 			     struct rte_flow *flow,
50 			     const struct rte_flow_attr *attr,
51 			     const struct rte_flow_action *app_actions,
52 			     uint32_t flow_idx,
53 			     const struct mlx5_flow_tunnel *tunnel,
54 			     struct tunnel_default_miss_ctx *ctx,
55 			     struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62 				const struct mlx5_flow_tunnel *tunnel,
63 				uint32_t group, uint32_t *table,
64 				struct rte_flow_error *error);
65 
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68 
69 
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72 
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74 
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79 	[MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
80 #endif
81 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
82 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
83 };
84 
85 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
86 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
87 	(const int []){ \
88 		__VA_ARGS__, 0, \
89 	}
90 
91 /** Node object of input graph for mlx5_flow_expand_rss(). */
92 struct mlx5_flow_expand_node {
93 	const int *const next;
94 	/**<
95 	 * List of next node indexes. Index 0 is interpreted as a terminator.
96 	 */
97 	const enum rte_flow_item_type type;
98 	/**< Pattern item type of current node. */
99 	uint64_t rss_types;
100 	/**<
101 	 * RSS types bit-field associated with this node
102 	 * (see RTE_ETH_RSS_* definitions).
103 	 */
104 	uint64_t node_flags;
105 	/**<
106 	 *  Bit-fields that define how the node is used in the expansion.
107 	 * (see MLX5_EXPANSION_NODE_* definitions).
108 	 */
109 };
110 
111 /* Optional expand field. The expansion alg will not go deeper. */
112 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
113 
114 /* The node is not added implicitly as expansion to the flow pattern.
115  * If the node type does not match the flow pattern item type, the
116  * expansion alg will go deeper to its next items.
117  * In the current implementation, the list of next nodes indexes can
118  * have up to one node with this flag set and it has to be the last
119  * node index (before the list terminator).
120  */
121 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
122 
123 /** Object returned by mlx5_flow_expand_rss(). */
124 struct mlx5_flow_expand_rss {
125 	uint32_t entries;
126 	/**< Number of entries @p patterns and @p priorities. */
127 	struct {
128 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
129 		uint32_t priority; /**< Priority offset for each expansion. */
130 	} entry[];
131 };
132 
133 static void
134 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
135 
136 static const struct mlx5_flow_expand_node *
137 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
138 		unsigned int item_idx,
139 		const struct mlx5_flow_expand_node graph[],
140 		const struct mlx5_flow_expand_node *node);
141 
142 static bool
143 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
144 {
145 	switch (item->type) {
146 	case RTE_FLOW_ITEM_TYPE_ETH:
147 	case RTE_FLOW_ITEM_TYPE_VLAN:
148 	case RTE_FLOW_ITEM_TYPE_IPV4:
149 	case RTE_FLOW_ITEM_TYPE_IPV6:
150 	case RTE_FLOW_ITEM_TYPE_UDP:
151 	case RTE_FLOW_ITEM_TYPE_TCP:
152 	case RTE_FLOW_ITEM_TYPE_ESP:
153 	case RTE_FLOW_ITEM_TYPE_VXLAN:
154 	case RTE_FLOW_ITEM_TYPE_NVGRE:
155 	case RTE_FLOW_ITEM_TYPE_GRE:
156 	case RTE_FLOW_ITEM_TYPE_GENEVE:
157 	case RTE_FLOW_ITEM_TYPE_MPLS:
158 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
159 	case RTE_FLOW_ITEM_TYPE_GRE_KEY:
160 	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
161 	case RTE_FLOW_ITEM_TYPE_GTP:
162 		return true;
163 	default:
164 		break;
165 	}
166 	return false;
167 }
168 
169 /**
170  * Network Service Header (NSH) and its next protocol values
171  * are described in RFC-8393.
172  */
173 static enum rte_flow_item_type
174 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
175 {
176 	enum rte_flow_item_type type;
177 
178 	switch (proto_mask & proto_spec) {
179 	case 0:
180 		type = RTE_FLOW_ITEM_TYPE_VOID;
181 		break;
182 	case RTE_VXLAN_GPE_TYPE_IPV4:
183 		type = RTE_FLOW_ITEM_TYPE_IPV4;
184 		break;
185 	case RTE_VXLAN_GPE_TYPE_IPV6:
186 		type = RTE_VXLAN_GPE_TYPE_IPV6;
187 		break;
188 	case RTE_VXLAN_GPE_TYPE_ETH:
189 		type = RTE_FLOW_ITEM_TYPE_ETH;
190 		break;
191 	default:
192 		type = RTE_FLOW_ITEM_TYPE_END;
193 	}
194 	return type;
195 }
196 
197 static enum rte_flow_item_type
198 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
199 {
200 	enum rte_flow_item_type type;
201 
202 	switch (proto_mask & proto_spec) {
203 	case 0:
204 		type = RTE_FLOW_ITEM_TYPE_VOID;
205 		break;
206 	case IPPROTO_UDP:
207 		type = RTE_FLOW_ITEM_TYPE_UDP;
208 		break;
209 	case IPPROTO_TCP:
210 		type = RTE_FLOW_ITEM_TYPE_TCP;
211 		break;
212 	case IPPROTO_IPIP:
213 		type = RTE_FLOW_ITEM_TYPE_IPV4;
214 		break;
215 	case IPPROTO_IPV6:
216 		type = RTE_FLOW_ITEM_TYPE_IPV6;
217 		break;
218 	case IPPROTO_ESP:
219 		type = RTE_FLOW_ITEM_TYPE_ESP;
220 		break;
221 	default:
222 		type = RTE_FLOW_ITEM_TYPE_END;
223 	}
224 	return type;
225 }
226 
227 static enum rte_flow_item_type
228 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
229 			    rte_be16_t type_mask, bool is_tunnel)
230 {
231 	enum rte_flow_item_type type;
232 
233 	switch (rte_be_to_cpu_16(type_spec & type_mask)) {
234 	case 0:
235 		type = RTE_FLOW_ITEM_TYPE_VOID;
236 		break;
237 	case RTE_ETHER_TYPE_TEB:
238 		type = is_tunnel ?
239 		       RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
240 		break;
241 	case RTE_ETHER_TYPE_VLAN:
242 		type = !is_tunnel ?
243 		       RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
244 		break;
245 	case RTE_ETHER_TYPE_IPV4:
246 		type = RTE_FLOW_ITEM_TYPE_IPV4;
247 		break;
248 	case RTE_ETHER_TYPE_IPV6:
249 		type = RTE_FLOW_ITEM_TYPE_IPV6;
250 		break;
251 	default:
252 		type = RTE_FLOW_ITEM_TYPE_END;
253 	}
254 	return type;
255 }
256 
257 static enum rte_flow_item_type
258 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
259 {
260 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
261 	do {                                                             \
262 		const void *m = item->mask;                              \
263 		const void *s = item->spec;                              \
264 		mask = m ?                                               \
265 			((const struct rte_flow_item_##type *)m)->fld :  \
266 			rte_flow_item_##type##_mask.fld;                 \
267 		spec = ((const struct rte_flow_item_##type *)s)->fld;    \
268 	} while (0)
269 
270 	enum rte_flow_item_type ret;
271 	uint16_t spec, mask;
272 
273 	if (item == NULL || item->spec == NULL)
274 		return RTE_FLOW_ITEM_TYPE_VOID;
275 	switch (item->type) {
276 	case RTE_FLOW_ITEM_TYPE_ETH:
277 		MLX5_XSET_ITEM_MASK_SPEC(eth, type);
278 		if (!mask)
279 			return RTE_FLOW_ITEM_TYPE_VOID;
280 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
281 		break;
282 	case RTE_FLOW_ITEM_TYPE_VLAN:
283 		MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type);
284 		if (!mask)
285 			return RTE_FLOW_ITEM_TYPE_VOID;
286 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
287 		break;
288 	case RTE_FLOW_ITEM_TYPE_IPV4:
289 		MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
290 		if (!mask)
291 			return RTE_FLOW_ITEM_TYPE_VOID;
292 		ret = mlx5_inet_proto_to_item_type(spec, mask);
293 		break;
294 	case RTE_FLOW_ITEM_TYPE_IPV6:
295 		MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
296 		if (!mask)
297 			return RTE_FLOW_ITEM_TYPE_VOID;
298 		ret = mlx5_inet_proto_to_item_type(spec, mask);
299 		break;
300 	case RTE_FLOW_ITEM_TYPE_GENEVE:
301 		MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
302 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
303 		break;
304 	case RTE_FLOW_ITEM_TYPE_GRE:
305 		MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
306 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
307 		break;
308 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
309 		MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol);
310 		ret = mlx5_nsh_proto_to_item_type(spec, mask);
311 		break;
312 	default:
313 		ret = RTE_FLOW_ITEM_TYPE_VOID;
314 		break;
315 	}
316 	return ret;
317 #undef MLX5_XSET_ITEM_MASK_SPEC
318 }
319 
320 static const int *
321 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
322 		const int *next_node)
323 {
324 	const struct mlx5_flow_expand_node *node = NULL;
325 	const int *next = next_node;
326 
327 	while (next && *next) {
328 		/*
329 		 * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
330 		 * flag set, because they were not found in the flow pattern.
331 		 */
332 		node = &graph[*next];
333 		if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
334 			break;
335 		next = node->next;
336 	}
337 	return next;
338 }
339 
340 #define MLX5_RSS_EXP_ELT_N 16
341 
342 /**
343  * Expand RSS flows into several possible flows according to the RSS hash
344  * fields requested and the driver capabilities.
345  *
346  * @param[out] buf
347  *   Buffer to store the result expansion.
348  * @param[in] size
349  *   Buffer size in bytes. If 0, @p buf can be NULL.
350  * @param[in] pattern
351  *   User flow pattern.
352  * @param[in] types
353  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
354  * @param[in] graph
355  *   Input graph to expand @p pattern according to @p types.
356  * @param[in] graph_root_index
357  *   Index of root node in @p graph, typically 0.
358  *
359  * @return
360  *   A positive value representing the size of @p buf in bytes regardless of
361  *   @p size on success, a negative errno value otherwise and rte_errno is
362  *   set, the following errors are defined:
363  *
364  *   -E2BIG: graph-depth @p graph is too deep.
365  *   -EINVAL: @p size has not enough space for expanded pattern.
366  */
367 static int
368 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
369 		     const struct rte_flow_item *pattern, uint64_t types,
370 		     const struct mlx5_flow_expand_node graph[],
371 		     int graph_root_index)
372 {
373 	const struct rte_flow_item *item;
374 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
375 	const int *next_node;
376 	const int *stack[MLX5_RSS_EXP_ELT_N];
377 	int stack_pos = 0;
378 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
379 	unsigned int i, item_idx, last_expand_item_idx = 0;
380 	size_t lsize;
381 	size_t user_pattern_size = 0;
382 	void *addr = NULL;
383 	const struct mlx5_flow_expand_node *next = NULL;
384 	struct rte_flow_item missed_item;
385 	int missed = 0;
386 	int elt = 0;
387 	const struct rte_flow_item *last_expand_item = NULL;
388 
389 	memset(&missed_item, 0, sizeof(missed_item));
390 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
391 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
392 	if (lsize > size)
393 		return -EINVAL;
394 	buf->entry[0].priority = 0;
395 	buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
396 	buf->entries = 0;
397 	addr = buf->entry[0].pattern;
398 	for (item = pattern, item_idx = 0;
399 			item->type != RTE_FLOW_ITEM_TYPE_END;
400 			item++, item_idx++) {
401 		if (!mlx5_flow_is_rss_expandable_item(item)) {
402 			user_pattern_size += sizeof(*item);
403 			continue;
404 		}
405 		last_expand_item = item;
406 		last_expand_item_idx = item_idx;
407 		i = 0;
408 		while (node->next && node->next[i]) {
409 			next = &graph[node->next[i]];
410 			if (next->type == item->type)
411 				break;
412 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
413 				node = next;
414 				i = 0;
415 			} else {
416 				++i;
417 			}
418 		}
419 		if (next)
420 			node = next;
421 		user_pattern_size += sizeof(*item);
422 	}
423 	user_pattern_size += sizeof(*item); /* Handle END item. */
424 	lsize += user_pattern_size;
425 	if (lsize > size)
426 		return -EINVAL;
427 	/* Copy the user pattern in the first entry of the buffer. */
428 	rte_memcpy(addr, pattern, user_pattern_size);
429 	addr = (void *)(((uintptr_t)addr) + user_pattern_size);
430 	buf->entries = 1;
431 	/* Start expanding. */
432 	memset(flow_items, 0, sizeof(flow_items));
433 	user_pattern_size -= sizeof(*item);
434 	/*
435 	 * Check if the last valid item has spec set, need complete pattern,
436 	 * and the pattern can be used for expansion.
437 	 */
438 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
439 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
440 		/* Item type END indicates expansion is not required. */
441 		return lsize;
442 	}
443 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
444 		next = NULL;
445 		missed = 1;
446 		i = 0;
447 		while (node->next && node->next[i]) {
448 			next = &graph[node->next[i]];
449 			if (next->type == missed_item.type) {
450 				flow_items[0].type = missed_item.type;
451 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
452 				break;
453 			}
454 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
455 				node = next;
456 				i = 0;
457 			} else {
458 				++i;
459 			}
460 			next = NULL;
461 		}
462 	}
463 	if (next && missed) {
464 		elt = 2; /* missed item + item end. */
465 		node = next;
466 		lsize += elt * sizeof(*item) + user_pattern_size;
467 		if (lsize > size)
468 			return -EINVAL;
469 		if (node->rss_types & types) {
470 			buf->entry[buf->entries].priority = 1;
471 			buf->entry[buf->entries].pattern = addr;
472 			buf->entries++;
473 			rte_memcpy(addr, buf->entry[0].pattern,
474 				   user_pattern_size);
475 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
476 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
477 			addr = (void *)(((uintptr_t)addr) +
478 					elt * sizeof(*item));
479 		}
480 	} else if (last_expand_item != NULL) {
481 		node = mlx5_flow_expand_rss_adjust_node(pattern,
482 				last_expand_item_idx, graph, node);
483 	}
484 	memset(flow_items, 0, sizeof(flow_items));
485 	next_node = mlx5_flow_expand_rss_skip_explicit(graph,
486 			node->next);
487 	stack[stack_pos] = next_node;
488 	node = next_node ? &graph[*next_node] : NULL;
489 	while (node) {
490 		flow_items[stack_pos].type = node->type;
491 		if (node->rss_types & types) {
492 			size_t n;
493 			/*
494 			 * compute the number of items to copy from the
495 			 * expansion and copy it.
496 			 * When the stack_pos is 0, there are 1 element in it,
497 			 * plus the addition END item.
498 			 */
499 			elt = stack_pos + 2;
500 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
501 			lsize += elt * sizeof(*item) + user_pattern_size;
502 			if (lsize > size)
503 				return -EINVAL;
504 			n = elt * sizeof(*item);
505 			buf->entry[buf->entries].priority =
506 				stack_pos + 1 + missed;
507 			buf->entry[buf->entries].pattern = addr;
508 			buf->entries++;
509 			rte_memcpy(addr, buf->entry[0].pattern,
510 				   user_pattern_size);
511 			addr = (void *)(((uintptr_t)addr) +
512 					user_pattern_size);
513 			rte_memcpy(addr, &missed_item,
514 				   missed * sizeof(*item));
515 			addr = (void *)(((uintptr_t)addr) +
516 				missed * sizeof(*item));
517 			rte_memcpy(addr, flow_items, n);
518 			addr = (void *)(((uintptr_t)addr) + n);
519 		}
520 		/* Go deeper. */
521 		if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
522 				node->next) {
523 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
524 					node->next);
525 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
526 				rte_errno = E2BIG;
527 				return -rte_errno;
528 			}
529 			stack[stack_pos] = next_node;
530 		} else if (*(next_node + 1)) {
531 			/* Follow up with the next possibility. */
532 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
533 					++next_node);
534 		} else if (!stack_pos) {
535 			/*
536 			 * Completing the traverse over the different paths.
537 			 * The next_node is advanced to the terminator.
538 			 */
539 			++next_node;
540 		} else {
541 			/* Move to the next path. */
542 			while (stack_pos) {
543 				next_node = stack[--stack_pos];
544 				next_node++;
545 				if (*next_node)
546 					break;
547 			}
548 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
549 					next_node);
550 			stack[stack_pos] = next_node;
551 		}
552 		node = next_node && *next_node ? &graph[*next_node] : NULL;
553 	};
554 	return lsize;
555 }
556 
557 enum mlx5_expansion {
558 	MLX5_EXPANSION_ROOT,
559 	MLX5_EXPANSION_ROOT_OUTER,
560 	MLX5_EXPANSION_OUTER_ETH,
561 	MLX5_EXPANSION_OUTER_VLAN,
562 	MLX5_EXPANSION_OUTER_IPV4,
563 	MLX5_EXPANSION_OUTER_IPV4_UDP,
564 	MLX5_EXPANSION_OUTER_IPV4_TCP,
565 	MLX5_EXPANSION_OUTER_IPV4_ESP,
566 	MLX5_EXPANSION_OUTER_IPV6,
567 	MLX5_EXPANSION_OUTER_IPV6_UDP,
568 	MLX5_EXPANSION_OUTER_IPV6_TCP,
569 	MLX5_EXPANSION_OUTER_IPV6_ESP,
570 	MLX5_EXPANSION_VXLAN,
571 	MLX5_EXPANSION_STD_VXLAN,
572 	MLX5_EXPANSION_L3_VXLAN,
573 	MLX5_EXPANSION_VXLAN_GPE,
574 	MLX5_EXPANSION_GRE,
575 	MLX5_EXPANSION_NVGRE,
576 	MLX5_EXPANSION_GRE_KEY,
577 	MLX5_EXPANSION_MPLS,
578 	MLX5_EXPANSION_ETH,
579 	MLX5_EXPANSION_VLAN,
580 	MLX5_EXPANSION_IPV4,
581 	MLX5_EXPANSION_IPV4_UDP,
582 	MLX5_EXPANSION_IPV4_TCP,
583 	MLX5_EXPANSION_IPV4_ESP,
584 	MLX5_EXPANSION_IPV6,
585 	MLX5_EXPANSION_IPV6_UDP,
586 	MLX5_EXPANSION_IPV6_TCP,
587 	MLX5_EXPANSION_IPV6_ESP,
588 	MLX5_EXPANSION_IPV6_FRAG_EXT,
589 	MLX5_EXPANSION_GTP,
590 	MLX5_EXPANSION_GENEVE,
591 };
592 
593 /** Supported expansion of items. */
594 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
595 	[MLX5_EXPANSION_ROOT] = {
596 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
597 						  MLX5_EXPANSION_IPV4,
598 						  MLX5_EXPANSION_IPV6),
599 		.type = RTE_FLOW_ITEM_TYPE_END,
600 	},
601 	[MLX5_EXPANSION_ROOT_OUTER] = {
602 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
603 						  MLX5_EXPANSION_OUTER_IPV4,
604 						  MLX5_EXPANSION_OUTER_IPV6),
605 		.type = RTE_FLOW_ITEM_TYPE_END,
606 	},
607 	[MLX5_EXPANSION_OUTER_ETH] = {
608 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
609 		.type = RTE_FLOW_ITEM_TYPE_ETH,
610 		.rss_types = 0,
611 	},
612 	[MLX5_EXPANSION_OUTER_VLAN] = {
613 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
614 						  MLX5_EXPANSION_OUTER_IPV6),
615 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
616 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
617 	},
618 	[MLX5_EXPANSION_OUTER_IPV4] = {
619 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
620 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
621 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
622 			 MLX5_EXPANSION_OUTER_IPV4_ESP,
623 			 MLX5_EXPANSION_GRE,
624 			 MLX5_EXPANSION_NVGRE,
625 			 MLX5_EXPANSION_IPV4,
626 			 MLX5_EXPANSION_IPV6),
627 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
628 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
629 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
630 	},
631 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
632 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
633 						  MLX5_EXPANSION_VXLAN_GPE,
634 						  MLX5_EXPANSION_MPLS,
635 						  MLX5_EXPANSION_GENEVE,
636 						  MLX5_EXPANSION_GTP),
637 		.type = RTE_FLOW_ITEM_TYPE_UDP,
638 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
639 	},
640 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
641 		.type = RTE_FLOW_ITEM_TYPE_TCP,
642 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
643 	},
644 	[MLX5_EXPANSION_OUTER_IPV4_ESP] = {
645 		.type = RTE_FLOW_ITEM_TYPE_ESP,
646 		.rss_types = RTE_ETH_RSS_ESP,
647 	},
648 	[MLX5_EXPANSION_OUTER_IPV6] = {
649 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
650 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
651 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
652 			 MLX5_EXPANSION_OUTER_IPV6_ESP,
653 			 MLX5_EXPANSION_IPV4,
654 			 MLX5_EXPANSION_IPV6,
655 			 MLX5_EXPANSION_GRE,
656 			 MLX5_EXPANSION_NVGRE),
657 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
658 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
659 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
660 	},
661 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
662 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
663 						  MLX5_EXPANSION_VXLAN_GPE,
664 						  MLX5_EXPANSION_MPLS,
665 						  MLX5_EXPANSION_GENEVE,
666 						  MLX5_EXPANSION_GTP),
667 		.type = RTE_FLOW_ITEM_TYPE_UDP,
668 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
669 	},
670 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
671 		.type = RTE_FLOW_ITEM_TYPE_TCP,
672 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
673 	},
674 	[MLX5_EXPANSION_OUTER_IPV6_ESP] = {
675 		.type = RTE_FLOW_ITEM_TYPE_ESP,
676 		.rss_types = RTE_ETH_RSS_ESP,
677 	},
678 	[MLX5_EXPANSION_VXLAN] = {
679 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
680 						  MLX5_EXPANSION_IPV4,
681 						  MLX5_EXPANSION_IPV6),
682 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
683 	},
684 	[MLX5_EXPANSION_STD_VXLAN] = {
685 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
686 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
687 	},
688 	[MLX5_EXPANSION_L3_VXLAN] = {
689 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
690 					MLX5_EXPANSION_IPV6),
691 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
692 	},
693 	[MLX5_EXPANSION_VXLAN_GPE] = {
694 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
695 						  MLX5_EXPANSION_IPV4,
696 						  MLX5_EXPANSION_IPV6),
697 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
698 	},
699 	[MLX5_EXPANSION_GRE] = {
700 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
701 						  MLX5_EXPANSION_IPV4,
702 						  MLX5_EXPANSION_IPV6,
703 						  MLX5_EXPANSION_GRE_KEY,
704 						  MLX5_EXPANSION_MPLS),
705 		.type = RTE_FLOW_ITEM_TYPE_GRE,
706 	},
707 	[MLX5_EXPANSION_GRE_KEY] = {
708 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
709 						  MLX5_EXPANSION_IPV6,
710 						  MLX5_EXPANSION_MPLS),
711 		.type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
712 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
713 	},
714 	[MLX5_EXPANSION_NVGRE] = {
715 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
716 		.type = RTE_FLOW_ITEM_TYPE_NVGRE,
717 	},
718 	[MLX5_EXPANSION_MPLS] = {
719 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
720 						  MLX5_EXPANSION_IPV6,
721 						  MLX5_EXPANSION_ETH),
722 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
723 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
724 	},
725 	[MLX5_EXPANSION_ETH] = {
726 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
727 		.type = RTE_FLOW_ITEM_TYPE_ETH,
728 	},
729 	[MLX5_EXPANSION_VLAN] = {
730 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
731 						  MLX5_EXPANSION_IPV6),
732 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
733 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
734 	},
735 	[MLX5_EXPANSION_IPV4] = {
736 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
737 						  MLX5_EXPANSION_IPV4_TCP,
738 						  MLX5_EXPANSION_IPV4_ESP),
739 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
740 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
741 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
742 	},
743 	[MLX5_EXPANSION_IPV4_UDP] = {
744 		.type = RTE_FLOW_ITEM_TYPE_UDP,
745 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
746 	},
747 	[MLX5_EXPANSION_IPV4_TCP] = {
748 		.type = RTE_FLOW_ITEM_TYPE_TCP,
749 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
750 	},
751 	[MLX5_EXPANSION_IPV4_ESP] = {
752 		.type = RTE_FLOW_ITEM_TYPE_ESP,
753 		.rss_types = RTE_ETH_RSS_ESP,
754 	},
755 	[MLX5_EXPANSION_IPV6] = {
756 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
757 						  MLX5_EXPANSION_IPV6_TCP,
758 						  MLX5_EXPANSION_IPV6_ESP,
759 						  MLX5_EXPANSION_IPV6_FRAG_EXT),
760 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
761 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
762 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
763 	},
764 	[MLX5_EXPANSION_IPV6_UDP] = {
765 		.type = RTE_FLOW_ITEM_TYPE_UDP,
766 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
767 	},
768 	[MLX5_EXPANSION_IPV6_TCP] = {
769 		.type = RTE_FLOW_ITEM_TYPE_TCP,
770 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
771 	},
772 	[MLX5_EXPANSION_IPV6_ESP] = {
773 		.type = RTE_FLOW_ITEM_TYPE_ESP,
774 		.rss_types = RTE_ETH_RSS_ESP,
775 	},
776 	[MLX5_EXPANSION_IPV6_FRAG_EXT] = {
777 		.type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
778 	},
779 	[MLX5_EXPANSION_GTP] = {
780 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
781 						  MLX5_EXPANSION_IPV6),
782 		.type = RTE_FLOW_ITEM_TYPE_GTP,
783 	},
784 	[MLX5_EXPANSION_GENEVE] = {
785 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
786 						  MLX5_EXPANSION_IPV4,
787 						  MLX5_EXPANSION_IPV6),
788 		.type = RTE_FLOW_ITEM_TYPE_GENEVE,
789 	},
790 };
791 
792 static struct rte_flow_action_handle *
793 mlx5_action_handle_create(struct rte_eth_dev *dev,
794 			  const struct rte_flow_indir_action_conf *conf,
795 			  const struct rte_flow_action *action,
796 			  struct rte_flow_error *error);
797 static int mlx5_action_handle_destroy
798 				(struct rte_eth_dev *dev,
799 				 struct rte_flow_action_handle *handle,
800 				 struct rte_flow_error *error);
801 static int mlx5_action_handle_update
802 				(struct rte_eth_dev *dev,
803 				 struct rte_flow_action_handle *handle,
804 				 const void *update,
805 				 struct rte_flow_error *error);
806 static int mlx5_action_handle_query
807 				(struct rte_eth_dev *dev,
808 				 const struct rte_flow_action_handle *handle,
809 				 void *data,
810 				 struct rte_flow_error *error);
811 static int
812 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
813 		    struct rte_flow_tunnel *app_tunnel,
814 		    struct rte_flow_action **actions,
815 		    uint32_t *num_of_actions,
816 		    struct rte_flow_error *error);
817 static int
818 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
819 		       struct rte_flow_tunnel *app_tunnel,
820 		       struct rte_flow_item **items,
821 		       uint32_t *num_of_items,
822 		       struct rte_flow_error *error);
823 static int
824 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
825 			      struct rte_flow_item *pmd_items,
826 			      uint32_t num_items, struct rte_flow_error *err);
827 static int
828 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
829 				struct rte_flow_action *pmd_actions,
830 				uint32_t num_actions,
831 				struct rte_flow_error *err);
832 static int
833 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
834 				  struct rte_mbuf *m,
835 				  struct rte_flow_restore_info *info,
836 				  struct rte_flow_error *err);
837 static struct rte_flow_item_flex_handle *
838 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
839 			   const struct rte_flow_item_flex_conf *conf,
840 			   struct rte_flow_error *error);
841 static int
842 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
843 			    const struct rte_flow_item_flex_handle *handle,
844 			    struct rte_flow_error *error);
845 static int
846 mlx5_flow_info_get(struct rte_eth_dev *dev,
847 		   struct rte_flow_port_info *port_info,
848 		   struct rte_flow_queue_info *queue_info,
849 		   struct rte_flow_error *error);
850 static int
851 mlx5_flow_port_configure(struct rte_eth_dev *dev,
852 			 const struct rte_flow_port_attr *port_attr,
853 			 uint16_t nb_queue,
854 			 const struct rte_flow_queue_attr *queue_attr[],
855 			 struct rte_flow_error *err);
856 
857 static struct rte_flow_pattern_template *
858 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
859 		const struct rte_flow_pattern_template_attr *attr,
860 		const struct rte_flow_item items[],
861 		struct rte_flow_error *error);
862 
863 static int
864 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
865 				   struct rte_flow_pattern_template *template,
866 				   struct rte_flow_error *error);
867 static struct rte_flow_actions_template *
868 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
869 			const struct rte_flow_actions_template_attr *attr,
870 			const struct rte_flow_action actions[],
871 			const struct rte_flow_action masks[],
872 			struct rte_flow_error *error);
873 static int
874 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
875 				   struct rte_flow_actions_template *template,
876 				   struct rte_flow_error *error);
877 
878 static struct rte_flow_template_table *
879 mlx5_flow_table_create(struct rte_eth_dev *dev,
880 		       const struct rte_flow_template_table_attr *attr,
881 		       struct rte_flow_pattern_template *item_templates[],
882 		       uint8_t nb_item_templates,
883 		       struct rte_flow_actions_template *action_templates[],
884 		       uint8_t nb_action_templates,
885 		       struct rte_flow_error *error);
886 static int
887 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
888 			struct rte_flow_template_table *table,
889 			struct rte_flow_error *error);
890 static struct rte_flow *
891 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
892 			    uint32_t queue,
893 			    const struct rte_flow_op_attr *attr,
894 			    struct rte_flow_template_table *table,
895 			    const struct rte_flow_item items[],
896 			    uint8_t pattern_template_index,
897 			    const struct rte_flow_action actions[],
898 			    uint8_t action_template_index,
899 			    void *user_data,
900 			    struct rte_flow_error *error);
901 static int
902 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
903 			     uint32_t queue,
904 			     const struct rte_flow_op_attr *attr,
905 			     struct rte_flow *flow,
906 			     void *user_data,
907 			     struct rte_flow_error *error);
908 static int
909 mlx5_flow_pull(struct rte_eth_dev *dev,
910 	       uint32_t queue,
911 	       struct rte_flow_op_result res[],
912 	       uint16_t n_res,
913 	       struct rte_flow_error *error);
914 static int
915 mlx5_flow_push(struct rte_eth_dev *dev,
916 	       uint32_t queue,
917 	       struct rte_flow_error *error);
918 
919 static struct rte_flow_action_handle *
920 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
921 				 const struct rte_flow_op_attr *attr,
922 				 const struct rte_flow_indir_action_conf *conf,
923 				 const struct rte_flow_action *action,
924 				 void *user_data,
925 				 struct rte_flow_error *error);
926 
927 static int
928 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
929 				 const struct rte_flow_op_attr *attr,
930 				 struct rte_flow_action_handle *handle,
931 				 const void *update,
932 				 void *user_data,
933 				 struct rte_flow_error *error);
934 
935 static int
936 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
937 				  const struct rte_flow_op_attr *attr,
938 				  struct rte_flow_action_handle *handle,
939 				  void *user_data,
940 				  struct rte_flow_error *error);
941 
942 static const struct rte_flow_ops mlx5_flow_ops = {
943 	.validate = mlx5_flow_validate,
944 	.create = mlx5_flow_create,
945 	.destroy = mlx5_flow_destroy,
946 	.flush = mlx5_flow_flush,
947 	.isolate = mlx5_flow_isolate,
948 	.query = mlx5_flow_query,
949 	.dev_dump = mlx5_flow_dev_dump,
950 	.get_aged_flows = mlx5_flow_get_aged_flows,
951 	.action_handle_create = mlx5_action_handle_create,
952 	.action_handle_destroy = mlx5_action_handle_destroy,
953 	.action_handle_update = mlx5_action_handle_update,
954 	.action_handle_query = mlx5_action_handle_query,
955 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
956 	.tunnel_match = mlx5_flow_tunnel_match,
957 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
958 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
959 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
960 	.flex_item_create = mlx5_flow_flex_item_create,
961 	.flex_item_release = mlx5_flow_flex_item_release,
962 	.info_get = mlx5_flow_info_get,
963 	.configure = mlx5_flow_port_configure,
964 	.pattern_template_create = mlx5_flow_pattern_template_create,
965 	.pattern_template_destroy = mlx5_flow_pattern_template_destroy,
966 	.actions_template_create = mlx5_flow_actions_template_create,
967 	.actions_template_destroy = mlx5_flow_actions_template_destroy,
968 	.template_table_create = mlx5_flow_table_create,
969 	.template_table_destroy = mlx5_flow_table_destroy,
970 	.async_create = mlx5_flow_async_flow_create,
971 	.async_destroy = mlx5_flow_async_flow_destroy,
972 	.pull = mlx5_flow_pull,
973 	.push = mlx5_flow_push,
974 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
975 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
976 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
977 };
978 
979 /* Tunnel information. */
980 struct mlx5_flow_tunnel_info {
981 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
982 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
983 };
984 
985 static struct mlx5_flow_tunnel_info tunnels_info[] = {
986 	{
987 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
988 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
989 	},
990 	{
991 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
992 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
993 	},
994 	{
995 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
996 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
997 	},
998 	{
999 		.tunnel = MLX5_FLOW_LAYER_GRE,
1000 		.ptype = RTE_PTYPE_TUNNEL_GRE,
1001 	},
1002 	{
1003 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
1004 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
1005 	},
1006 	{
1007 		.tunnel = MLX5_FLOW_LAYER_MPLS,
1008 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
1009 	},
1010 	{
1011 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
1012 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
1013 	},
1014 	{
1015 		.tunnel = MLX5_FLOW_LAYER_IPIP,
1016 		.ptype = RTE_PTYPE_TUNNEL_IP,
1017 	},
1018 	{
1019 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
1020 		.ptype = RTE_PTYPE_TUNNEL_IP,
1021 	},
1022 	{
1023 		.tunnel = MLX5_FLOW_LAYER_GTP,
1024 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
1025 	},
1026 };
1027 
1028 
1029 
1030 /**
1031  * Translate tag ID to register.
1032  *
1033  * @param[in] dev
1034  *   Pointer to the Ethernet device structure.
1035  * @param[in] feature
1036  *   The feature that request the register.
1037  * @param[in] id
1038  *   The request register ID.
1039  * @param[out] error
1040  *   Error description in case of any.
1041  *
1042  * @return
1043  *   The request register on success, a negative errno
1044  *   value otherwise and rte_errno is set.
1045  */
1046 int
1047 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1048 		     enum mlx5_feature_name feature,
1049 		     uint32_t id,
1050 		     struct rte_flow_error *error)
1051 {
1052 	struct mlx5_priv *priv = dev->data->dev_private;
1053 	struct mlx5_sh_config *config = &priv->sh->config;
1054 	enum modify_reg start_reg;
1055 	bool skip_mtr_reg = false;
1056 
1057 	switch (feature) {
1058 	case MLX5_HAIRPIN_RX:
1059 		return REG_B;
1060 	case MLX5_HAIRPIN_TX:
1061 		return REG_A;
1062 	case MLX5_METADATA_RX:
1063 		switch (config->dv_xmeta_en) {
1064 		case MLX5_XMETA_MODE_LEGACY:
1065 			return REG_B;
1066 		case MLX5_XMETA_MODE_META16:
1067 			return REG_C_0;
1068 		case MLX5_XMETA_MODE_META32:
1069 			return REG_C_1;
1070 		}
1071 		break;
1072 	case MLX5_METADATA_TX:
1073 		return REG_A;
1074 	case MLX5_METADATA_FDB:
1075 		switch (config->dv_xmeta_en) {
1076 		case MLX5_XMETA_MODE_LEGACY:
1077 			return REG_NON;
1078 		case MLX5_XMETA_MODE_META16:
1079 			return REG_C_0;
1080 		case MLX5_XMETA_MODE_META32:
1081 			return REG_C_1;
1082 		}
1083 		break;
1084 	case MLX5_FLOW_MARK:
1085 		switch (config->dv_xmeta_en) {
1086 		case MLX5_XMETA_MODE_LEGACY:
1087 			return REG_NON;
1088 		case MLX5_XMETA_MODE_META16:
1089 			return REG_C_1;
1090 		case MLX5_XMETA_MODE_META32:
1091 			return REG_C_0;
1092 		}
1093 		break;
1094 	case MLX5_MTR_ID:
1095 		/*
1096 		 * If meter color and meter id share one register, flow match
1097 		 * should use the meter color register for match.
1098 		 */
1099 		if (priv->mtr_reg_share)
1100 			return priv->mtr_color_reg;
1101 		else
1102 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1103 			       REG_C_3;
1104 	case MLX5_MTR_COLOR:
1105 	case MLX5_ASO_FLOW_HIT:
1106 	case MLX5_ASO_CONNTRACK:
1107 	case MLX5_SAMPLE_ID:
1108 		/* All features use the same REG_C. */
1109 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1110 		return priv->mtr_color_reg;
1111 	case MLX5_COPY_MARK:
1112 		/*
1113 		 * Metadata COPY_MARK register using is in meter suffix sub
1114 		 * flow while with meter. It's safe to share the same register.
1115 		 */
1116 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1117 	case MLX5_APP_TAG:
1118 		/*
1119 		 * If meter is enable, it will engage the register for color
1120 		 * match and flow match. If meter color match is not using the
1121 		 * REG_C_2, need to skip the REG_C_x be used by meter color
1122 		 * match.
1123 		 * If meter is disable, free to use all available registers.
1124 		 */
1125 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1126 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1127 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1128 		if (id > (uint32_t)(REG_C_7 - start_reg))
1129 			return rte_flow_error_set(error, EINVAL,
1130 						  RTE_FLOW_ERROR_TYPE_ITEM,
1131 						  NULL, "invalid tag id");
1132 		if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1133 			return rte_flow_error_set(error, ENOTSUP,
1134 						  RTE_FLOW_ERROR_TYPE_ITEM,
1135 						  NULL, "unsupported tag id");
1136 		/*
1137 		 * This case means meter is using the REG_C_x great than 2.
1138 		 * Take care not to conflict with meter color REG_C_x.
1139 		 * If the available index REG_C_y >= REG_C_x, skip the
1140 		 * color register.
1141 		 */
1142 		if (skip_mtr_reg && priv->sh->flow_mreg_c
1143 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1144 			if (id >= (uint32_t)(REG_C_7 - start_reg))
1145 				return rte_flow_error_set(error, EINVAL,
1146 						       RTE_FLOW_ERROR_TYPE_ITEM,
1147 							NULL, "invalid tag id");
1148 			if (priv->sh->flow_mreg_c
1149 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
1150 				return priv->sh->flow_mreg_c
1151 					       [id + 1 + start_reg - REG_C_0];
1152 			return rte_flow_error_set(error, ENOTSUP,
1153 						  RTE_FLOW_ERROR_TYPE_ITEM,
1154 						  NULL, "unsupported tag id");
1155 		}
1156 		return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1157 	}
1158 	MLX5_ASSERT(false);
1159 	return rte_flow_error_set(error, EINVAL,
1160 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1161 				  NULL, "invalid feature name");
1162 }
1163 
1164 /**
1165  * Check extensive flow metadata register support.
1166  *
1167  * @param dev
1168  *   Pointer to rte_eth_dev structure.
1169  *
1170  * @return
1171  *   True if device supports extensive flow metadata register, otherwise false.
1172  */
1173 bool
1174 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1175 {
1176 	struct mlx5_priv *priv = dev->data->dev_private;
1177 
1178 	/*
1179 	 * Having available reg_c can be regarded inclusively as supporting
1180 	 * extensive flow metadata register, which could mean,
1181 	 * - metadata register copy action by modify header.
1182 	 * - 16 modify header actions is supported.
1183 	 * - reg_c's are preserved across different domain (FDB and NIC) on
1184 	 *   packet loopback by flow lookup miss.
1185 	 */
1186 	return priv->sh->flow_mreg_c[2] != REG_NON;
1187 }
1188 
1189 /**
1190  * Get the lowest priority.
1191  *
1192  * @param[in] dev
1193  *   Pointer to the Ethernet device structure.
1194  * @param[in] attributes
1195  *   Pointer to device flow rule attributes.
1196  *
1197  * @return
1198  *   The value of lowest priority of flow.
1199  */
1200 uint32_t
1201 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1202 			  const struct rte_flow_attr *attr)
1203 {
1204 	struct mlx5_priv *priv = dev->data->dev_private;
1205 
1206 	if (!attr->group && !attr->transfer)
1207 		return priv->sh->flow_max_priority - 2;
1208 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1209 }
1210 
1211 /**
1212  * Calculate matcher priority of the flow.
1213  *
1214  * @param[in] dev
1215  *   Pointer to the Ethernet device structure.
1216  * @param[in] attr
1217  *   Pointer to device flow rule attributes.
1218  * @param[in] subpriority
1219  *   The priority based on the items.
1220  * @param[in] external
1221  *   Flow is user flow.
1222  * @return
1223  *   The matcher priority of the flow.
1224  */
1225 uint16_t
1226 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1227 			  const struct rte_flow_attr *attr,
1228 			  uint32_t subpriority, bool external)
1229 {
1230 	uint16_t priority = (uint16_t)attr->priority;
1231 	struct mlx5_priv *priv = dev->data->dev_private;
1232 
1233 	if (!attr->group && !attr->transfer) {
1234 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1235 			priority = priv->sh->flow_max_priority - 1;
1236 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1237 	} else if (!external && attr->transfer && attr->group == 0 &&
1238 		   attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1239 		return (priv->sh->flow_max_priority - 1) * 3;
1240 	}
1241 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1242 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1243 	return priority * 3 + subpriority;
1244 }
1245 
1246 /**
1247  * Verify the @p item specifications (spec, last, mask) are compatible with the
1248  * NIC capabilities.
1249  *
1250  * @param[in] item
1251  *   Item specification.
1252  * @param[in] mask
1253  *   @p item->mask or flow default bit-masks.
1254  * @param[in] nic_mask
1255  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1256  * @param[in] size
1257  *   Bit-masks size in bytes.
1258  * @param[in] range_accepted
1259  *   True if range of values is accepted for specific fields, false otherwise.
1260  * @param[out] error
1261  *   Pointer to error structure.
1262  *
1263  * @return
1264  *   0 on success, a negative errno value otherwise and rte_errno is set.
1265  */
1266 int
1267 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1268 			  const uint8_t *mask,
1269 			  const uint8_t *nic_mask,
1270 			  unsigned int size,
1271 			  bool range_accepted,
1272 			  struct rte_flow_error *error)
1273 {
1274 	unsigned int i;
1275 
1276 	MLX5_ASSERT(nic_mask);
1277 	for (i = 0; i < size; ++i)
1278 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
1279 			return rte_flow_error_set(error, ENOTSUP,
1280 						  RTE_FLOW_ERROR_TYPE_ITEM,
1281 						  item,
1282 						  "mask enables non supported"
1283 						  " bits");
1284 	if (!item->spec && (item->mask || item->last))
1285 		return rte_flow_error_set(error, EINVAL,
1286 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1287 					  "mask/last without a spec is not"
1288 					  " supported");
1289 	if (item->spec && item->last && !range_accepted) {
1290 		uint8_t spec[size];
1291 		uint8_t last[size];
1292 		unsigned int i;
1293 		int ret;
1294 
1295 		for (i = 0; i < size; ++i) {
1296 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1297 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1298 		}
1299 		ret = memcmp(spec, last, size);
1300 		if (ret != 0)
1301 			return rte_flow_error_set(error, EINVAL,
1302 						  RTE_FLOW_ERROR_TYPE_ITEM,
1303 						  item,
1304 						  "range is not valid");
1305 	}
1306 	return 0;
1307 }
1308 
1309 /**
1310  * Adjust the hash fields according to the @p flow information.
1311  *
1312  * @param[in] dev_flow.
1313  *   Pointer to the mlx5_flow.
1314  * @param[in] tunnel
1315  *   1 when the hash field is for a tunnel item.
1316  * @param[in] layer_types
1317  *   RTE_ETH_RSS_* types.
1318  * @param[in] hash_fields
1319  *   Item hash fields.
1320  *
1321  * @return
1322  *   The hash fields that should be used.
1323  */
1324 uint64_t
1325 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1326 			    int tunnel __rte_unused, uint64_t layer_types,
1327 			    uint64_t hash_fields)
1328 {
1329 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1330 	int rss_request_inner = rss_desc->level >= 2;
1331 
1332 	/* Check RSS hash level for tunnel. */
1333 	if (tunnel && rss_request_inner)
1334 		hash_fields |= IBV_RX_HASH_INNER;
1335 	else if (tunnel || rss_request_inner)
1336 		return 0;
1337 #endif
1338 	/* Check if requested layer matches RSS hash fields. */
1339 	if (!(rss_desc->types & layer_types))
1340 		return 0;
1341 	return hash_fields;
1342 }
1343 
1344 /**
1345  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1346  * if several tunnel rules are used on this queue, the tunnel ptype will be
1347  * cleared.
1348  *
1349  * @param rxq_ctrl
1350  *   Rx queue to update.
1351  */
1352 static void
1353 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1354 {
1355 	unsigned int i;
1356 	uint32_t tunnel_ptype = 0;
1357 
1358 	/* Look up for the ptype to use. */
1359 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1360 		if (!rxq_ctrl->flow_tunnels_n[i])
1361 			continue;
1362 		if (!tunnel_ptype) {
1363 			tunnel_ptype = tunnels_info[i].ptype;
1364 		} else {
1365 			tunnel_ptype = 0;
1366 			break;
1367 		}
1368 	}
1369 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1370 }
1371 
1372 /**
1373  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1374  * flow.
1375  *
1376  * @param[in] dev
1377  *   Pointer to the Ethernet device structure.
1378  * @param[in] dev_handle
1379  *   Pointer to device flow handle structure.
1380  */
1381 void
1382 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1383 		       struct mlx5_flow_handle *dev_handle)
1384 {
1385 	struct mlx5_priv *priv = dev->data->dev_private;
1386 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1387 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1388 	unsigned int i;
1389 
1390 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1391 		struct mlx5_hrxq *hrxq;
1392 
1393 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1394 			      dev_handle->rix_hrxq);
1395 		if (hrxq)
1396 			ind_tbl = hrxq->ind_table;
1397 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1398 		struct mlx5_shared_action_rss *shared_rss;
1399 
1400 		shared_rss = mlx5_ipool_get
1401 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1402 			 dev_handle->rix_srss);
1403 		if (shared_rss)
1404 			ind_tbl = shared_rss->ind_tbl;
1405 	}
1406 	if (!ind_tbl)
1407 		return;
1408 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1409 		int idx = ind_tbl->queues[i];
1410 		struct mlx5_rxq_ctrl *rxq_ctrl;
1411 
1412 		if (mlx5_is_external_rxq(dev, idx))
1413 			continue;
1414 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1415 		MLX5_ASSERT(rxq_ctrl != NULL);
1416 		if (rxq_ctrl == NULL)
1417 			continue;
1418 		/*
1419 		 * To support metadata register copy on Tx loopback,
1420 		 * this must be always enabled (metadata may arive
1421 		 * from other port - not from local flows only.
1422 		 */
1423 		if (tunnel) {
1424 			unsigned int j;
1425 
1426 			/* Increase the counter matching the flow. */
1427 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1428 				if ((tunnels_info[j].tunnel &
1429 				     dev_handle->layers) ==
1430 				    tunnels_info[j].tunnel) {
1431 					rxq_ctrl->flow_tunnels_n[j]++;
1432 					break;
1433 				}
1434 			}
1435 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1436 		}
1437 	}
1438 }
1439 
1440 static void
1441 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1442 {
1443 	struct mlx5_priv *priv = dev->data->dev_private;
1444 	struct mlx5_rxq_ctrl *rxq_ctrl;
1445 
1446 	if (priv->mark_enabled)
1447 		return;
1448 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1449 		rxq_ctrl->rxq.mark = 1;
1450 	}
1451 	priv->mark_enabled = 1;
1452 }
1453 
1454 /**
1455  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1456  *
1457  * @param[in] dev
1458  *   Pointer to the Ethernet device structure.
1459  * @param[in] flow
1460  *   Pointer to flow structure.
1461  */
1462 static void
1463 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1464 {
1465 	struct mlx5_priv *priv = dev->data->dev_private;
1466 	uint32_t handle_idx;
1467 	struct mlx5_flow_handle *dev_handle;
1468 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1469 
1470 	MLX5_ASSERT(wks);
1471 	if (wks->mark)
1472 		flow_rxq_mark_flag_set(dev);
1473 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1474 		       handle_idx, dev_handle, next)
1475 		flow_drv_rxq_flags_set(dev, dev_handle);
1476 }
1477 
1478 /**
1479  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1480  * device flow if no other flow uses it with the same kind of request.
1481  *
1482  * @param dev
1483  *   Pointer to Ethernet device.
1484  * @param[in] dev_handle
1485  *   Pointer to the device flow handle structure.
1486  */
1487 static void
1488 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1489 			struct mlx5_flow_handle *dev_handle)
1490 {
1491 	struct mlx5_priv *priv = dev->data->dev_private;
1492 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1493 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1494 	unsigned int i;
1495 
1496 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1497 		struct mlx5_hrxq *hrxq;
1498 
1499 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1500 			      dev_handle->rix_hrxq);
1501 		if (hrxq)
1502 			ind_tbl = hrxq->ind_table;
1503 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1504 		struct mlx5_shared_action_rss *shared_rss;
1505 
1506 		shared_rss = mlx5_ipool_get
1507 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1508 			 dev_handle->rix_srss);
1509 		if (shared_rss)
1510 			ind_tbl = shared_rss->ind_tbl;
1511 	}
1512 	if (!ind_tbl)
1513 		return;
1514 	MLX5_ASSERT(dev->data->dev_started);
1515 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1516 		int idx = ind_tbl->queues[i];
1517 		struct mlx5_rxq_ctrl *rxq_ctrl;
1518 
1519 		if (mlx5_is_external_rxq(dev, idx))
1520 			continue;
1521 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1522 		MLX5_ASSERT(rxq_ctrl != NULL);
1523 		if (rxq_ctrl == NULL)
1524 			continue;
1525 		if (tunnel) {
1526 			unsigned int j;
1527 
1528 			/* Decrease the counter matching the flow. */
1529 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1530 				if ((tunnels_info[j].tunnel &
1531 				     dev_handle->layers) ==
1532 				    tunnels_info[j].tunnel) {
1533 					rxq_ctrl->flow_tunnels_n[j]--;
1534 					break;
1535 				}
1536 			}
1537 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1538 		}
1539 	}
1540 }
1541 
1542 /**
1543  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1544  * @p flow if no other flow uses it with the same kind of request.
1545  *
1546  * @param dev
1547  *   Pointer to Ethernet device.
1548  * @param[in] flow
1549  *   Pointer to the flow.
1550  */
1551 static void
1552 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1553 {
1554 	struct mlx5_priv *priv = dev->data->dev_private;
1555 	uint32_t handle_idx;
1556 	struct mlx5_flow_handle *dev_handle;
1557 
1558 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1559 		       handle_idx, dev_handle, next)
1560 		flow_drv_rxq_flags_trim(dev, dev_handle);
1561 }
1562 
1563 /**
1564  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1565  *
1566  * @param dev
1567  *   Pointer to Ethernet device.
1568  */
1569 static void
1570 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1571 {
1572 	struct mlx5_priv *priv = dev->data->dev_private;
1573 	unsigned int i;
1574 
1575 	for (i = 0; i != priv->rxqs_n; ++i) {
1576 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1577 		unsigned int j;
1578 
1579 		if (rxq == NULL || rxq->ctrl == NULL)
1580 			continue;
1581 		rxq->ctrl->rxq.mark = 0;
1582 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1583 			rxq->ctrl->flow_tunnels_n[j] = 0;
1584 		rxq->ctrl->rxq.tunnel = 0;
1585 	}
1586 	priv->mark_enabled = 0;
1587 }
1588 
1589 /**
1590  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1591  *
1592  * @param[in] dev
1593  *   Pointer to the Ethernet device structure.
1594  */
1595 void
1596 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1597 {
1598 	struct mlx5_priv *priv = dev->data->dev_private;
1599 	unsigned int i;
1600 
1601 	for (i = 0; i != priv->rxqs_n; ++i) {
1602 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1603 		struct mlx5_rxq_data *data;
1604 
1605 		if (rxq == NULL || rxq->ctrl == NULL)
1606 			continue;
1607 		data = &rxq->ctrl->rxq;
1608 		if (!rte_flow_dynf_metadata_avail()) {
1609 			data->dynf_meta = 0;
1610 			data->flow_meta_mask = 0;
1611 			data->flow_meta_offset = -1;
1612 			data->flow_meta_port_mask = 0;
1613 		} else {
1614 			data->dynf_meta = 1;
1615 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1616 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1617 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1618 		}
1619 	}
1620 }
1621 
1622 /*
1623  * return a pointer to the desired action in the list of actions.
1624  *
1625  * @param[in] actions
1626  *   The list of actions to search the action in.
1627  * @param[in] action
1628  *   The action to find.
1629  *
1630  * @return
1631  *   Pointer to the action in the list, if found. NULL otherwise.
1632  */
1633 const struct rte_flow_action *
1634 mlx5_flow_find_action(const struct rte_flow_action *actions,
1635 		      enum rte_flow_action_type action)
1636 {
1637 	if (actions == NULL)
1638 		return NULL;
1639 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1640 		if (actions->type == action)
1641 			return actions;
1642 	return NULL;
1643 }
1644 
1645 /*
1646  * Validate the flag action.
1647  *
1648  * @param[in] action_flags
1649  *   Bit-fields that holds the actions detected until now.
1650  * @param[in] attr
1651  *   Attributes of flow that includes this action.
1652  * @param[out] error
1653  *   Pointer to error structure.
1654  *
1655  * @return
1656  *   0 on success, a negative errno value otherwise and rte_errno is set.
1657  */
1658 int
1659 mlx5_flow_validate_action_flag(uint64_t action_flags,
1660 			       const struct rte_flow_attr *attr,
1661 			       struct rte_flow_error *error)
1662 {
1663 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1664 		return rte_flow_error_set(error, EINVAL,
1665 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1666 					  "can't mark and flag in same flow");
1667 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1668 		return rte_flow_error_set(error, EINVAL,
1669 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1670 					  "can't have 2 flag"
1671 					  " actions in same flow");
1672 	if (attr->egress)
1673 		return rte_flow_error_set(error, ENOTSUP,
1674 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1675 					  "flag action not supported for "
1676 					  "egress");
1677 	return 0;
1678 }
1679 
1680 /*
1681  * Validate the mark action.
1682  *
1683  * @param[in] action
1684  *   Pointer to the queue action.
1685  * @param[in] action_flags
1686  *   Bit-fields that holds the actions detected until now.
1687  * @param[in] attr
1688  *   Attributes of flow that includes this action.
1689  * @param[out] error
1690  *   Pointer to error structure.
1691  *
1692  * @return
1693  *   0 on success, a negative errno value otherwise and rte_errno is set.
1694  */
1695 int
1696 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1697 			       uint64_t action_flags,
1698 			       const struct rte_flow_attr *attr,
1699 			       struct rte_flow_error *error)
1700 {
1701 	const struct rte_flow_action_mark *mark = action->conf;
1702 
1703 	if (!mark)
1704 		return rte_flow_error_set(error, EINVAL,
1705 					  RTE_FLOW_ERROR_TYPE_ACTION,
1706 					  action,
1707 					  "configuration cannot be null");
1708 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1709 		return rte_flow_error_set(error, EINVAL,
1710 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1711 					  &mark->id,
1712 					  "mark id must in 0 <= id < "
1713 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1714 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1715 		return rte_flow_error_set(error, EINVAL,
1716 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1717 					  "can't flag and mark in same flow");
1718 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1719 		return rte_flow_error_set(error, EINVAL,
1720 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1721 					  "can't have 2 mark actions in same"
1722 					  " flow");
1723 	if (attr->egress)
1724 		return rte_flow_error_set(error, ENOTSUP,
1725 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1726 					  "mark action not supported for "
1727 					  "egress");
1728 	return 0;
1729 }
1730 
1731 /*
1732  * Validate the drop action.
1733  *
1734  * @param[in] action_flags
1735  *   Bit-fields that holds the actions detected until now.
1736  * @param[in] attr
1737  *   Attributes of flow that includes this action.
1738  * @param[out] error
1739  *   Pointer to error structure.
1740  *
1741  * @return
1742  *   0 on success, a negative errno value otherwise and rte_errno is set.
1743  */
1744 int
1745 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1746 			       const struct rte_flow_attr *attr,
1747 			       struct rte_flow_error *error)
1748 {
1749 	if (attr->egress)
1750 		return rte_flow_error_set(error, ENOTSUP,
1751 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1752 					  "drop action not supported for "
1753 					  "egress");
1754 	return 0;
1755 }
1756 
1757 /*
1758  * Validate the queue action.
1759  *
1760  * @param[in] action
1761  *   Pointer to the queue action.
1762  * @param[in] action_flags
1763  *   Bit-fields that holds the actions detected until now.
1764  * @param[in] dev
1765  *   Pointer to the Ethernet device structure.
1766  * @param[in] attr
1767  *   Attributes of flow that includes this action.
1768  * @param[out] error
1769  *   Pointer to error structure.
1770  *
1771  * @return
1772  *   0 on success, a negative errno value otherwise and rte_errno is set.
1773  */
1774 int
1775 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1776 				uint64_t action_flags,
1777 				struct rte_eth_dev *dev,
1778 				const struct rte_flow_attr *attr,
1779 				struct rte_flow_error *error)
1780 {
1781 	struct mlx5_priv *priv = dev->data->dev_private;
1782 	const struct rte_flow_action_queue *queue = action->conf;
1783 
1784 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1785 		return rte_flow_error_set(error, EINVAL,
1786 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1787 					  "can't have 2 fate actions in"
1788 					  " same flow");
1789 	if (attr->egress)
1790 		return rte_flow_error_set(error, ENOTSUP,
1791 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1792 					  "queue action not supported for egress.");
1793 	if (mlx5_is_external_rxq(dev, queue->index))
1794 		return 0;
1795 	if (!priv->rxqs_n)
1796 		return rte_flow_error_set(error, EINVAL,
1797 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1798 					  NULL, "No Rx queues configured");
1799 	if (queue->index >= priv->rxqs_n)
1800 		return rte_flow_error_set(error, EINVAL,
1801 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1802 					  &queue->index,
1803 					  "queue index out of range");
1804 	if (mlx5_rxq_get(dev, queue->index) == NULL)
1805 		return rte_flow_error_set(error, EINVAL,
1806 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1807 					  &queue->index,
1808 					  "queue is not configured");
1809 	return 0;
1810 }
1811 
1812 /**
1813  * Validate queue numbers for device RSS.
1814  *
1815  * @param[in] dev
1816  *   Configured device.
1817  * @param[in] queues
1818  *   Array of queue numbers.
1819  * @param[in] queues_n
1820  *   Size of the @p queues array.
1821  * @param[out] error
1822  *   On error, filled with a textual error description.
1823  * @param[out] queue_idx
1824  *   On error, filled with an offending queue index in @p queues array.
1825  *
1826  * @return
1827  *   0 on success, a negative errno code on error.
1828  */
1829 static int
1830 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
1831 			 const uint16_t *queues, uint32_t queues_n,
1832 			 const char **error, uint32_t *queue_idx)
1833 {
1834 	const struct mlx5_priv *priv = dev->data->dev_private;
1835 	bool is_hairpin = false;
1836 	bool is_ext_rss = false;
1837 	uint32_t i;
1838 
1839 	for (i = 0; i != queues_n; ++i) {
1840 		struct mlx5_rxq_ctrl *rxq_ctrl;
1841 
1842 		if (mlx5_is_external_rxq(dev, queues[0])) {
1843 			is_ext_rss = true;
1844 			continue;
1845 		}
1846 		if (is_ext_rss) {
1847 			*error = "Combining external and regular RSS queues is not supported";
1848 			*queue_idx = i;
1849 			return -ENOTSUP;
1850 		}
1851 		if (queues[i] >= priv->rxqs_n) {
1852 			*error = "queue index out of range";
1853 			*queue_idx = i;
1854 			return -EINVAL;
1855 		}
1856 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
1857 		if (rxq_ctrl == NULL) {
1858 			*error =  "queue is not configured";
1859 			*queue_idx = i;
1860 			return -EINVAL;
1861 		}
1862 		if (i == 0 && rxq_ctrl->is_hairpin)
1863 			is_hairpin = true;
1864 		if (is_hairpin != rxq_ctrl->is_hairpin) {
1865 			*error = "combining hairpin and regular RSS queues is not supported";
1866 			*queue_idx = i;
1867 			return -ENOTSUP;
1868 		}
1869 	}
1870 	return 0;
1871 }
1872 
1873 /*
1874  * Validate the rss action.
1875  *
1876  * @param[in] dev
1877  *   Pointer to the Ethernet device structure.
1878  * @param[in] action
1879  *   Pointer to the queue action.
1880  * @param[out] error
1881  *   Pointer to error structure.
1882  *
1883  * @return
1884  *   0 on success, a negative errno value otherwise and rte_errno is set.
1885  */
1886 int
1887 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1888 			 const struct rte_flow_action *action,
1889 			 struct rte_flow_error *error)
1890 {
1891 	struct mlx5_priv *priv = dev->data->dev_private;
1892 	const struct rte_flow_action_rss *rss = action->conf;
1893 	int ret;
1894 	const char *message;
1895 	uint32_t queue_idx;
1896 
1897 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1898 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1899 		return rte_flow_error_set(error, ENOTSUP,
1900 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1901 					  &rss->func,
1902 					  "RSS hash function not supported");
1903 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1904 	if (rss->level > 2)
1905 #else
1906 	if (rss->level > 1)
1907 #endif
1908 		return rte_flow_error_set(error, ENOTSUP,
1909 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1910 					  &rss->level,
1911 					  "tunnel RSS is not supported");
1912 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
1913 	if (rss->key_len == 0 && rss->key != NULL)
1914 		return rte_flow_error_set(error, ENOTSUP,
1915 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1916 					  &rss->key_len,
1917 					  "RSS hash key length 0");
1918 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1919 		return rte_flow_error_set(error, ENOTSUP,
1920 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1921 					  &rss->key_len,
1922 					  "RSS hash key too small");
1923 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1924 		return rte_flow_error_set(error, ENOTSUP,
1925 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1926 					  &rss->key_len,
1927 					  "RSS hash key too large");
1928 	if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
1929 		return rte_flow_error_set(error, ENOTSUP,
1930 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1931 					  &rss->queue_num,
1932 					  "number of queues too large");
1933 	if (rss->types & MLX5_RSS_HF_MASK)
1934 		return rte_flow_error_set(error, ENOTSUP,
1935 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1936 					  &rss->types,
1937 					  "some RSS protocols are not"
1938 					  " supported");
1939 	if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1940 	    !(rss->types & RTE_ETH_RSS_IP))
1941 		return rte_flow_error_set(error, EINVAL,
1942 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1943 					  "L3 partial RSS requested but L3 RSS"
1944 					  " type not specified");
1945 	if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1946 	    !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1947 		return rte_flow_error_set(error, EINVAL,
1948 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1949 					  "L4 partial RSS requested but L4 RSS"
1950 					  " type not specified");
1951 	if (!priv->rxqs_n && priv->ext_rxqs == NULL)
1952 		return rte_flow_error_set(error, EINVAL,
1953 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1954 					  NULL, "No Rx queues configured");
1955 	if (!rss->queue_num)
1956 		return rte_flow_error_set(error, EINVAL,
1957 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1958 					  NULL, "No queues configured");
1959 	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
1960 				       &message, &queue_idx);
1961 	if (ret != 0) {
1962 		return rte_flow_error_set(error, -ret,
1963 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1964 					  &rss->queue[queue_idx], message);
1965 	}
1966 	return 0;
1967 }
1968 
1969 /*
1970  * Validate the rss action.
1971  *
1972  * @param[in] action
1973  *   Pointer to the queue action.
1974  * @param[in] action_flags
1975  *   Bit-fields that holds the actions detected until now.
1976  * @param[in] dev
1977  *   Pointer to the Ethernet device structure.
1978  * @param[in] attr
1979  *   Attributes of flow that includes this action.
1980  * @param[in] item_flags
1981  *   Items that were detected.
1982  * @param[out] error
1983  *   Pointer to error structure.
1984  *
1985  * @return
1986  *   0 on success, a negative errno value otherwise and rte_errno is set.
1987  */
1988 int
1989 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1990 			      uint64_t action_flags,
1991 			      struct rte_eth_dev *dev,
1992 			      const struct rte_flow_attr *attr,
1993 			      uint64_t item_flags,
1994 			      struct rte_flow_error *error)
1995 {
1996 	const struct rte_flow_action_rss *rss = action->conf;
1997 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1998 	int ret;
1999 
2000 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2001 		return rte_flow_error_set(error, EINVAL,
2002 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2003 					  "can't have 2 fate actions"
2004 					  " in same flow");
2005 	ret = mlx5_validate_action_rss(dev, action, error);
2006 	if (ret)
2007 		return ret;
2008 	if (attr->egress)
2009 		return rte_flow_error_set(error, ENOTSUP,
2010 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2011 					  "rss action not supported for "
2012 					  "egress");
2013 	if (rss->level > 1 && !tunnel)
2014 		return rte_flow_error_set(error, EINVAL,
2015 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2016 					  "inner RSS is not supported for "
2017 					  "non-tunnel flows");
2018 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
2019 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
2020 		return rte_flow_error_set(error, EINVAL,
2021 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2022 					  "RSS on eCPRI is not supported now");
2023 	}
2024 	if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
2025 	    !(item_flags &
2026 	      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
2027 	    rss->level > 1)
2028 		return rte_flow_error_set(error, EINVAL,
2029 					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2030 					  "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2031 	return 0;
2032 }
2033 
2034 /*
2035  * Validate the default miss action.
2036  *
2037  * @param[in] action_flags
2038  *   Bit-fields that holds the actions detected until now.
2039  * @param[out] error
2040  *   Pointer to error structure.
2041  *
2042  * @return
2043  *   0 on success, a negative errno value otherwise and rte_errno is set.
2044  */
2045 int
2046 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2047 				const struct rte_flow_attr *attr,
2048 				struct rte_flow_error *error)
2049 {
2050 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2051 		return rte_flow_error_set(error, EINVAL,
2052 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2053 					  "can't have 2 fate actions in"
2054 					  " same flow");
2055 	if (attr->egress)
2056 		return rte_flow_error_set(error, ENOTSUP,
2057 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2058 					  "default miss action not supported "
2059 					  "for egress");
2060 	if (attr->group)
2061 		return rte_flow_error_set(error, ENOTSUP,
2062 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2063 					  "only group 0 is supported");
2064 	if (attr->transfer)
2065 		return rte_flow_error_set(error, ENOTSUP,
2066 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2067 					  NULL, "transfer is not supported");
2068 	return 0;
2069 }
2070 
2071 /*
2072  * Validate the count action.
2073  *
2074  * @param[in] dev
2075  *   Pointer to the Ethernet device structure.
2076  * @param[in] attr
2077  *   Attributes of flow that includes this action.
2078  * @param[out] error
2079  *   Pointer to error structure.
2080  *
2081  * @return
2082  *   0 on success, a negative errno value otherwise and rte_errno is set.
2083  */
2084 int
2085 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2086 				const struct rte_flow_attr *attr,
2087 				struct rte_flow_error *error)
2088 {
2089 	if (attr->egress)
2090 		return rte_flow_error_set(error, ENOTSUP,
2091 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2092 					  "count action not supported for "
2093 					  "egress");
2094 	return 0;
2095 }
2096 
2097 /*
2098  * Validate the ASO CT action.
2099  *
2100  * @param[in] dev
2101  *   Pointer to the Ethernet device structure.
2102  * @param[in] conntrack
2103  *   Pointer to the CT action profile.
2104  * @param[out] error
2105  *   Pointer to error structure.
2106  *
2107  * @return
2108  *   0 on success, a negative errno value otherwise and rte_errno is set.
2109  */
2110 int
2111 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2112 			const struct rte_flow_action_conntrack *conntrack,
2113 			struct rte_flow_error *error)
2114 {
2115 	RTE_SET_USED(dev);
2116 
2117 	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2118 		return rte_flow_error_set(error, EINVAL,
2119 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2120 					  "Invalid CT state");
2121 	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2122 		return rte_flow_error_set(error, EINVAL,
2123 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2124 					  "Invalid last TCP packet flag");
2125 	return 0;
2126 }
2127 
2128 /**
2129  * Verify the @p attributes will be correctly understood by the NIC and store
2130  * them in the @p flow if everything is correct.
2131  *
2132  * @param[in] dev
2133  *   Pointer to the Ethernet device structure.
2134  * @param[in] attributes
2135  *   Pointer to flow attributes
2136  * @param[out] error
2137  *   Pointer to error structure.
2138  *
2139  * @return
2140  *   0 on success, a negative errno value otherwise and rte_errno is set.
2141  */
2142 int
2143 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2144 			      const struct rte_flow_attr *attributes,
2145 			      struct rte_flow_error *error)
2146 {
2147 	struct mlx5_priv *priv = dev->data->dev_private;
2148 	uint32_t priority_max = priv->sh->flow_max_priority - 1;
2149 
2150 	if (attributes->group)
2151 		return rte_flow_error_set(error, ENOTSUP,
2152 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2153 					  NULL, "groups is not supported");
2154 	if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
2155 	    attributes->priority >= priority_max)
2156 		return rte_flow_error_set(error, ENOTSUP,
2157 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2158 					  NULL, "priority out of range");
2159 	if (attributes->egress)
2160 		return rte_flow_error_set(error, ENOTSUP,
2161 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2162 					  "egress is not supported");
2163 	if (attributes->transfer && !priv->sh->config.dv_esw_en)
2164 		return rte_flow_error_set(error, ENOTSUP,
2165 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2166 					  NULL, "transfer is not supported");
2167 	if (!attributes->ingress)
2168 		return rte_flow_error_set(error, EINVAL,
2169 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2170 					  NULL,
2171 					  "ingress attribute is mandatory");
2172 	return 0;
2173 }
2174 
2175 /**
2176  * Validate ICMP6 item.
2177  *
2178  * @param[in] item
2179  *   Item specification.
2180  * @param[in] item_flags
2181  *   Bit-fields that holds the items detected until now.
2182  * @param[in] ext_vlan_sup
2183  *   Whether extended VLAN features are supported or not.
2184  * @param[out] error
2185  *   Pointer to error structure.
2186  *
2187  * @return
2188  *   0 on success, a negative errno value otherwise and rte_errno is set.
2189  */
2190 int
2191 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2192 			       uint64_t item_flags,
2193 			       uint8_t target_protocol,
2194 			       struct rte_flow_error *error)
2195 {
2196 	const struct rte_flow_item_icmp6 *mask = item->mask;
2197 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2198 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2199 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2200 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2201 				      MLX5_FLOW_LAYER_OUTER_L4;
2202 	int ret;
2203 
2204 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2205 		return rte_flow_error_set(error, EINVAL,
2206 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2207 					  "protocol filtering not compatible"
2208 					  " with ICMP6 layer");
2209 	if (!(item_flags & l3m))
2210 		return rte_flow_error_set(error, EINVAL,
2211 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2212 					  "IPv6 is mandatory to filter on"
2213 					  " ICMP6");
2214 	if (item_flags & l4m)
2215 		return rte_flow_error_set(error, EINVAL,
2216 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2217 					  "multiple L4 layers not supported");
2218 	if (!mask)
2219 		mask = &rte_flow_item_icmp6_mask;
2220 	ret = mlx5_flow_item_acceptable
2221 		(item, (const uint8_t *)mask,
2222 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
2223 		 sizeof(struct rte_flow_item_icmp6),
2224 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2225 	if (ret < 0)
2226 		return ret;
2227 	return 0;
2228 }
2229 
2230 /**
2231  * Validate ICMP item.
2232  *
2233  * @param[in] item
2234  *   Item specification.
2235  * @param[in] item_flags
2236  *   Bit-fields that holds the items detected until now.
2237  * @param[out] error
2238  *   Pointer to error structure.
2239  *
2240  * @return
2241  *   0 on success, a negative errno value otherwise and rte_errno is set.
2242  */
2243 int
2244 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2245 			     uint64_t item_flags,
2246 			     uint8_t target_protocol,
2247 			     struct rte_flow_error *error)
2248 {
2249 	const struct rte_flow_item_icmp *mask = item->mask;
2250 	const struct rte_flow_item_icmp nic_mask = {
2251 		.hdr.icmp_type = 0xff,
2252 		.hdr.icmp_code = 0xff,
2253 		.hdr.icmp_ident = RTE_BE16(0xffff),
2254 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
2255 	};
2256 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2257 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2258 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2259 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2260 				      MLX5_FLOW_LAYER_OUTER_L4;
2261 	int ret;
2262 
2263 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2264 		return rte_flow_error_set(error, EINVAL,
2265 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2266 					  "protocol filtering not compatible"
2267 					  " with ICMP layer");
2268 	if (!(item_flags & l3m))
2269 		return rte_flow_error_set(error, EINVAL,
2270 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2271 					  "IPv4 is mandatory to filter"
2272 					  " on ICMP");
2273 	if (item_flags & l4m)
2274 		return rte_flow_error_set(error, EINVAL,
2275 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2276 					  "multiple L4 layers not supported");
2277 	if (!mask)
2278 		mask = &nic_mask;
2279 	ret = mlx5_flow_item_acceptable
2280 		(item, (const uint8_t *)mask,
2281 		 (const uint8_t *)&nic_mask,
2282 		 sizeof(struct rte_flow_item_icmp),
2283 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2284 	if (ret < 0)
2285 		return ret;
2286 	return 0;
2287 }
2288 
2289 /**
2290  * Validate Ethernet item.
2291  *
2292  * @param[in] item
2293  *   Item specification.
2294  * @param[in] item_flags
2295  *   Bit-fields that holds the items detected until now.
2296  * @param[out] error
2297  *   Pointer to error structure.
2298  *
2299  * @return
2300  *   0 on success, a negative errno value otherwise and rte_errno is set.
2301  */
2302 int
2303 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2304 			    uint64_t item_flags, bool ext_vlan_sup,
2305 			    struct rte_flow_error *error)
2306 {
2307 	const struct rte_flow_item_eth *mask = item->mask;
2308 	const struct rte_flow_item_eth nic_mask = {
2309 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2310 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2311 		.type = RTE_BE16(0xffff),
2312 		.has_vlan = ext_vlan_sup ? 1 : 0,
2313 	};
2314 	int ret;
2315 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2316 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
2317 				       MLX5_FLOW_LAYER_OUTER_L2;
2318 
2319 	if (item_flags & ethm)
2320 		return rte_flow_error_set(error, ENOTSUP,
2321 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2322 					  "multiple L2 layers not supported");
2323 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2324 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2325 		return rte_flow_error_set(error, EINVAL,
2326 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2327 					  "L2 layer should not follow "
2328 					  "L3 layers");
2329 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2330 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2331 		return rte_flow_error_set(error, EINVAL,
2332 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2333 					  "L2 layer should not follow VLAN");
2334 	if (item_flags & MLX5_FLOW_LAYER_GTP)
2335 		return rte_flow_error_set(error, EINVAL,
2336 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2337 					  "L2 layer should not follow GTP");
2338 	if (!mask)
2339 		mask = &rte_flow_item_eth_mask;
2340 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2341 					(const uint8_t *)&nic_mask,
2342 					sizeof(struct rte_flow_item_eth),
2343 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2344 	return ret;
2345 }
2346 
2347 /**
2348  * Validate VLAN item.
2349  *
2350  * @param[in] item
2351  *   Item specification.
2352  * @param[in] item_flags
2353  *   Bit-fields that holds the items detected until now.
2354  * @param[in] dev
2355  *   Ethernet device flow is being created on.
2356  * @param[out] error
2357  *   Pointer to error structure.
2358  *
2359  * @return
2360  *   0 on success, a negative errno value otherwise and rte_errno is set.
2361  */
2362 int
2363 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2364 			     uint64_t item_flags,
2365 			     struct rte_eth_dev *dev,
2366 			     struct rte_flow_error *error)
2367 {
2368 	const struct rte_flow_item_vlan *spec = item->spec;
2369 	const struct rte_flow_item_vlan *mask = item->mask;
2370 	const struct rte_flow_item_vlan nic_mask = {
2371 		.tci = RTE_BE16(UINT16_MAX),
2372 		.inner_type = RTE_BE16(UINT16_MAX),
2373 	};
2374 	uint16_t vlan_tag = 0;
2375 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2376 	int ret;
2377 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2378 					MLX5_FLOW_LAYER_INNER_L4) :
2379 				       (MLX5_FLOW_LAYER_OUTER_L3 |
2380 					MLX5_FLOW_LAYER_OUTER_L4);
2381 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2382 					MLX5_FLOW_LAYER_OUTER_VLAN;
2383 
2384 	if (item_flags & vlanm)
2385 		return rte_flow_error_set(error, EINVAL,
2386 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2387 					  "multiple VLAN layers not supported");
2388 	else if ((item_flags & l34m) != 0)
2389 		return rte_flow_error_set(error, EINVAL,
2390 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2391 					  "VLAN cannot follow L3/L4 layer");
2392 	if (!mask)
2393 		mask = &rte_flow_item_vlan_mask;
2394 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2395 					(const uint8_t *)&nic_mask,
2396 					sizeof(struct rte_flow_item_vlan),
2397 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2398 	if (ret)
2399 		return ret;
2400 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2401 		struct mlx5_priv *priv = dev->data->dev_private;
2402 
2403 		if (priv->vmwa_context) {
2404 			/*
2405 			 * Non-NULL context means we have a virtual machine
2406 			 * and SR-IOV enabled, we have to create VLAN interface
2407 			 * to make hypervisor to setup E-Switch vport
2408 			 * context correctly. We avoid creating the multiple
2409 			 * VLAN interfaces, so we cannot support VLAN tag mask.
2410 			 */
2411 			return rte_flow_error_set(error, EINVAL,
2412 						  RTE_FLOW_ERROR_TYPE_ITEM,
2413 						  item,
2414 						  "VLAN tag mask is not"
2415 						  " supported in virtual"
2416 						  " environment");
2417 		}
2418 	}
2419 	if (spec) {
2420 		vlan_tag = spec->tci;
2421 		vlan_tag &= mask->tci;
2422 	}
2423 	/*
2424 	 * From verbs perspective an empty VLAN is equivalent
2425 	 * to a packet without VLAN layer.
2426 	 */
2427 	if (!vlan_tag)
2428 		return rte_flow_error_set(error, EINVAL,
2429 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2430 					  item->spec,
2431 					  "VLAN cannot be empty");
2432 	return 0;
2433 }
2434 
2435 /**
2436  * Validate IPV4 item.
2437  *
2438  * @param[in] item
2439  *   Item specification.
2440  * @param[in] item_flags
2441  *   Bit-fields that holds the items detected until now.
2442  * @param[in] last_item
2443  *   Previous validated item in the pattern items.
2444  * @param[in] ether_type
2445  *   Type in the ethernet layer header (including dot1q).
2446  * @param[in] acc_mask
2447  *   Acceptable mask, if NULL default internal default mask
2448  *   will be used to check whether item fields are supported.
2449  * @param[in] range_accepted
2450  *   True if range of values is accepted for specific fields, false otherwise.
2451  * @param[out] error
2452  *   Pointer to error structure.
2453  *
2454  * @return
2455  *   0 on success, a negative errno value otherwise and rte_errno is set.
2456  */
2457 int
2458 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2459 			     uint64_t item_flags,
2460 			     uint64_t last_item,
2461 			     uint16_t ether_type,
2462 			     const struct rte_flow_item_ipv4 *acc_mask,
2463 			     bool range_accepted,
2464 			     struct rte_flow_error *error)
2465 {
2466 	const struct rte_flow_item_ipv4 *mask = item->mask;
2467 	const struct rte_flow_item_ipv4 *spec = item->spec;
2468 	const struct rte_flow_item_ipv4 nic_mask = {
2469 		.hdr = {
2470 			.src_addr = RTE_BE32(0xffffffff),
2471 			.dst_addr = RTE_BE32(0xffffffff),
2472 			.type_of_service = 0xff,
2473 			.next_proto_id = 0xff,
2474 		},
2475 	};
2476 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2477 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2478 				      MLX5_FLOW_LAYER_OUTER_L3;
2479 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2480 				      MLX5_FLOW_LAYER_OUTER_L4;
2481 	int ret;
2482 	uint8_t next_proto = 0xFF;
2483 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2484 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2485 				  MLX5_FLOW_LAYER_INNER_VLAN);
2486 
2487 	if ((last_item & l2_vlan) && ether_type &&
2488 	    ether_type != RTE_ETHER_TYPE_IPV4)
2489 		return rte_flow_error_set(error, EINVAL,
2490 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2491 					  "IPv4 cannot follow L2/VLAN layer "
2492 					  "which ether type is not IPv4");
2493 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2494 		if (mask && spec)
2495 			next_proto = mask->hdr.next_proto_id &
2496 				     spec->hdr.next_proto_id;
2497 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2498 			return rte_flow_error_set(error, EINVAL,
2499 						  RTE_FLOW_ERROR_TYPE_ITEM,
2500 						  item,
2501 						  "multiple tunnel "
2502 						  "not supported");
2503 	}
2504 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2505 		return rte_flow_error_set(error, EINVAL,
2506 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2507 					  "wrong tunnel type - IPv6 specified "
2508 					  "but IPv4 item provided");
2509 	if (item_flags & l3m)
2510 		return rte_flow_error_set(error, ENOTSUP,
2511 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2512 					  "multiple L3 layers not supported");
2513 	else if (item_flags & l4m)
2514 		return rte_flow_error_set(error, EINVAL,
2515 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2516 					  "L3 cannot follow an L4 layer.");
2517 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2518 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2519 		return rte_flow_error_set(error, EINVAL,
2520 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2521 					  "L3 cannot follow an NVGRE layer.");
2522 	if (!mask)
2523 		mask = &rte_flow_item_ipv4_mask;
2524 	else if (mask->hdr.next_proto_id != 0 &&
2525 		 mask->hdr.next_proto_id != 0xff)
2526 		return rte_flow_error_set(error, EINVAL,
2527 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2528 					  "partial mask is not supported"
2529 					  " for protocol");
2530 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2531 					acc_mask ? (const uint8_t *)acc_mask
2532 						 : (const uint8_t *)&nic_mask,
2533 					sizeof(struct rte_flow_item_ipv4),
2534 					range_accepted, error);
2535 	if (ret < 0)
2536 		return ret;
2537 	return 0;
2538 }
2539 
2540 /**
2541  * Validate IPV6 item.
2542  *
2543  * @param[in] item
2544  *   Item specification.
2545  * @param[in] item_flags
2546  *   Bit-fields that holds the items detected until now.
2547  * @param[in] last_item
2548  *   Previous validated item in the pattern items.
2549  * @param[in] ether_type
2550  *   Type in the ethernet layer header (including dot1q).
2551  * @param[in] acc_mask
2552  *   Acceptable mask, if NULL default internal default mask
2553  *   will be used to check whether item fields are supported.
2554  * @param[out] error
2555  *   Pointer to error structure.
2556  *
2557  * @return
2558  *   0 on success, a negative errno value otherwise and rte_errno is set.
2559  */
2560 int
2561 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2562 			     uint64_t item_flags,
2563 			     uint64_t last_item,
2564 			     uint16_t ether_type,
2565 			     const struct rte_flow_item_ipv6 *acc_mask,
2566 			     struct rte_flow_error *error)
2567 {
2568 	const struct rte_flow_item_ipv6 *mask = item->mask;
2569 	const struct rte_flow_item_ipv6 *spec = item->spec;
2570 	const struct rte_flow_item_ipv6 nic_mask = {
2571 		.hdr = {
2572 			.src_addr =
2573 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2574 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2575 			.dst_addr =
2576 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2577 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2578 			.vtc_flow = RTE_BE32(0xffffffff),
2579 			.proto = 0xff,
2580 		},
2581 	};
2582 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2583 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2584 				      MLX5_FLOW_LAYER_OUTER_L3;
2585 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2586 				      MLX5_FLOW_LAYER_OUTER_L4;
2587 	int ret;
2588 	uint8_t next_proto = 0xFF;
2589 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2590 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2591 				  MLX5_FLOW_LAYER_INNER_VLAN);
2592 
2593 	if ((last_item & l2_vlan) && ether_type &&
2594 	    ether_type != RTE_ETHER_TYPE_IPV6)
2595 		return rte_flow_error_set(error, EINVAL,
2596 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2597 					  "IPv6 cannot follow L2/VLAN layer "
2598 					  "which ether type is not IPv6");
2599 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2600 		next_proto = spec->hdr.proto;
2601 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2602 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2603 			return rte_flow_error_set(error, EINVAL,
2604 						  RTE_FLOW_ERROR_TYPE_ITEM,
2605 						  item,
2606 						  "multiple tunnel "
2607 						  "not supported");
2608 	}
2609 	if (next_proto == IPPROTO_HOPOPTS  ||
2610 	    next_proto == IPPROTO_ROUTING  ||
2611 	    next_proto == IPPROTO_FRAGMENT ||
2612 	    next_proto == IPPROTO_ESP	   ||
2613 	    next_proto == IPPROTO_AH	   ||
2614 	    next_proto == IPPROTO_DSTOPTS)
2615 		return rte_flow_error_set(error, EINVAL,
2616 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2617 					  "IPv6 proto (next header) should "
2618 					  "not be set as extension header");
2619 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2620 		return rte_flow_error_set(error, EINVAL,
2621 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2622 					  "wrong tunnel type - IPv4 specified "
2623 					  "but IPv6 item provided");
2624 	if (item_flags & l3m)
2625 		return rte_flow_error_set(error, ENOTSUP,
2626 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2627 					  "multiple L3 layers not supported");
2628 	else if (item_flags & l4m)
2629 		return rte_flow_error_set(error, EINVAL,
2630 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2631 					  "L3 cannot follow an L4 layer.");
2632 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2633 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2634 		return rte_flow_error_set(error, EINVAL,
2635 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2636 					  "L3 cannot follow an NVGRE layer.");
2637 	if (!mask)
2638 		mask = &rte_flow_item_ipv6_mask;
2639 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2640 					acc_mask ? (const uint8_t *)acc_mask
2641 						 : (const uint8_t *)&nic_mask,
2642 					sizeof(struct rte_flow_item_ipv6),
2643 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2644 	if (ret < 0)
2645 		return ret;
2646 	return 0;
2647 }
2648 
2649 /**
2650  * Validate ESP item.
2651  *
2652  * @param[in] item
2653  *   Item specification.
2654  * @param[in] item_flags
2655  *   Bit-fields that holds the items detected until now.
2656  * @param[in] target_protocol
2657  *   The next protocol in the previous item.
2658  * @param[out] error
2659  *   Pointer to error structure.
2660  *
2661  * @return
2662  *   0 on success, a negative errno value otherwise and rte_errno is set.
2663  */
2664 int
2665 mlx5_flow_validate_item_esp(const struct rte_flow_item *item,
2666 			    uint64_t item_flags,
2667 			    uint8_t target_protocol,
2668 			    struct rte_flow_error *error)
2669 {
2670 	const struct rte_flow_item_esp *mask = item->mask;
2671 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2672 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2673 				      MLX5_FLOW_LAYER_OUTER_L3;
2674 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2675 				      MLX5_FLOW_LAYER_OUTER_L4;
2676 	int ret;
2677 
2678 	if (!(item_flags & l3m))
2679 		return rte_flow_error_set(error, EINVAL,
2680 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2681 					  "L3 is mandatory to filter on L4");
2682 	if (item_flags & l4m)
2683 		return rte_flow_error_set(error, EINVAL,
2684 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2685 					  "multiple L4 layers not supported");
2686 	if (target_protocol != 0xff && target_protocol != IPPROTO_ESP)
2687 		return rte_flow_error_set(error, EINVAL,
2688 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2689 					  "protocol filtering not compatible"
2690 					  " with ESP layer");
2691 	if (!mask)
2692 		mask = &rte_flow_item_esp_mask;
2693 	ret = mlx5_flow_item_acceptable
2694 		(item, (const uint8_t *)mask,
2695 		 (const uint8_t *)&rte_flow_item_esp_mask,
2696 		 sizeof(struct rte_flow_item_esp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2697 		 error);
2698 	if (ret < 0)
2699 		return ret;
2700 	return 0;
2701 }
2702 
2703 /**
2704  * Validate UDP item.
2705  *
2706  * @param[in] item
2707  *   Item specification.
2708  * @param[in] item_flags
2709  *   Bit-fields that holds the items detected until now.
2710  * @param[in] target_protocol
2711  *   The next protocol in the previous item.
2712  * @param[in] flow_mask
2713  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2714  * @param[out] error
2715  *   Pointer to error structure.
2716  *
2717  * @return
2718  *   0 on success, a negative errno value otherwise and rte_errno is set.
2719  */
2720 int
2721 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2722 			    uint64_t item_flags,
2723 			    uint8_t target_protocol,
2724 			    struct rte_flow_error *error)
2725 {
2726 	const struct rte_flow_item_udp *mask = item->mask;
2727 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2728 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2729 				      MLX5_FLOW_LAYER_OUTER_L3;
2730 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2731 				      MLX5_FLOW_LAYER_OUTER_L4;
2732 	int ret;
2733 
2734 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2735 		return rte_flow_error_set(error, EINVAL,
2736 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2737 					  "protocol filtering not compatible"
2738 					  " with UDP layer");
2739 	if (!(item_flags & l3m))
2740 		return rte_flow_error_set(error, EINVAL,
2741 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2742 					  "L3 is mandatory to filter on L4");
2743 	if (item_flags & l4m)
2744 		return rte_flow_error_set(error, EINVAL,
2745 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2746 					  "multiple L4 layers not supported");
2747 	if (!mask)
2748 		mask = &rte_flow_item_udp_mask;
2749 	ret = mlx5_flow_item_acceptable
2750 		(item, (const uint8_t *)mask,
2751 		 (const uint8_t *)&rte_flow_item_udp_mask,
2752 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2753 		 error);
2754 	if (ret < 0)
2755 		return ret;
2756 	return 0;
2757 }
2758 
2759 /**
2760  * Validate TCP item.
2761  *
2762  * @param[in] item
2763  *   Item specification.
2764  * @param[in] item_flags
2765  *   Bit-fields that holds the items detected until now.
2766  * @param[in] target_protocol
2767  *   The next protocol in the previous item.
2768  * @param[out] error
2769  *   Pointer to error structure.
2770  *
2771  * @return
2772  *   0 on success, a negative errno value otherwise and rte_errno is set.
2773  */
2774 int
2775 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2776 			    uint64_t item_flags,
2777 			    uint8_t target_protocol,
2778 			    const struct rte_flow_item_tcp *flow_mask,
2779 			    struct rte_flow_error *error)
2780 {
2781 	const struct rte_flow_item_tcp *mask = item->mask;
2782 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2783 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2784 				      MLX5_FLOW_LAYER_OUTER_L3;
2785 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2786 				      MLX5_FLOW_LAYER_OUTER_L4;
2787 	int ret;
2788 
2789 	MLX5_ASSERT(flow_mask);
2790 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2791 		return rte_flow_error_set(error, EINVAL,
2792 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2793 					  "protocol filtering not compatible"
2794 					  " with TCP layer");
2795 	if (!(item_flags & l3m))
2796 		return rte_flow_error_set(error, EINVAL,
2797 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2798 					  "L3 is mandatory to filter on L4");
2799 	if (item_flags & l4m)
2800 		return rte_flow_error_set(error, EINVAL,
2801 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2802 					  "multiple L4 layers not supported");
2803 	if (!mask)
2804 		mask = &rte_flow_item_tcp_mask;
2805 	ret = mlx5_flow_item_acceptable
2806 		(item, (const uint8_t *)mask,
2807 		 (const uint8_t *)flow_mask,
2808 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2809 		 error);
2810 	if (ret < 0)
2811 		return ret;
2812 	return 0;
2813 }
2814 
2815 /**
2816  * Validate VXLAN item.
2817  *
2818  * @param[in] dev
2819  *   Pointer to the Ethernet device structure.
2820  * @param[in] udp_dport
2821  *   UDP destination port
2822  * @param[in] item
2823  *   Item specification.
2824  * @param[in] item_flags
2825  *   Bit-fields that holds the items detected until now.
2826  * @param[in] attr
2827  *   Flow rule attributes.
2828  * @param[out] error
2829  *   Pointer to error structure.
2830  *
2831  * @return
2832  *   0 on success, a negative errno value otherwise and rte_errno is set.
2833  */
2834 int
2835 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2836 			      uint16_t udp_dport,
2837 			      const struct rte_flow_item *item,
2838 			      uint64_t item_flags,
2839 			      const struct rte_flow_attr *attr,
2840 			      struct rte_flow_error *error)
2841 {
2842 	const struct rte_flow_item_vxlan *spec = item->spec;
2843 	const struct rte_flow_item_vxlan *mask = item->mask;
2844 	int ret;
2845 	struct mlx5_priv *priv = dev->data->dev_private;
2846 	union vni {
2847 		uint32_t vlan_id;
2848 		uint8_t vni[4];
2849 	} id = { .vlan_id = 0, };
2850 	const struct rte_flow_item_vxlan nic_mask = {
2851 		.vni = "\xff\xff\xff",
2852 		.rsvd1 = 0xff,
2853 	};
2854 	const struct rte_flow_item_vxlan *valid_mask;
2855 
2856 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2857 		return rte_flow_error_set(error, ENOTSUP,
2858 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2859 					  "multiple tunnel layers not"
2860 					  " supported");
2861 	valid_mask = &rte_flow_item_vxlan_mask;
2862 	/*
2863 	 * Verify only UDPv4 is present as defined in
2864 	 * https://tools.ietf.org/html/rfc7348
2865 	 */
2866 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2867 		return rte_flow_error_set(error, EINVAL,
2868 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2869 					  "no outer UDP layer found");
2870 	if (!mask)
2871 		mask = &rte_flow_item_vxlan_mask;
2872 
2873 	if (priv->sh->steering_format_version !=
2874 	    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2875 	    !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2876 		/* FDB domain & NIC domain non-zero group */
2877 		if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2878 			valid_mask = &nic_mask;
2879 		/* Group zero in NIC domain */
2880 		if (!attr->group && !attr->transfer &&
2881 		    priv->sh->tunnel_header_0_1)
2882 			valid_mask = &nic_mask;
2883 	}
2884 	ret = mlx5_flow_item_acceptable
2885 		(item, (const uint8_t *)mask,
2886 		 (const uint8_t *)valid_mask,
2887 		 sizeof(struct rte_flow_item_vxlan),
2888 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2889 	if (ret < 0)
2890 		return ret;
2891 	if (spec) {
2892 		memcpy(&id.vni[1], spec->vni, 3);
2893 		memcpy(&id.vni[1], mask->vni, 3);
2894 	}
2895 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2896 		return rte_flow_error_set(error, ENOTSUP,
2897 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2898 					  "VXLAN tunnel must be fully defined");
2899 	return 0;
2900 }
2901 
2902 /**
2903  * Validate VXLAN_GPE item.
2904  *
2905  * @param[in] item
2906  *   Item specification.
2907  * @param[in] item_flags
2908  *   Bit-fields that holds the items detected until now.
2909  * @param[in] priv
2910  *   Pointer to the private data structure.
2911  * @param[in] target_protocol
2912  *   The next protocol in the previous item.
2913  * @param[out] error
2914  *   Pointer to error structure.
2915  *
2916  * @return
2917  *   0 on success, a negative errno value otherwise and rte_errno is set.
2918  */
2919 int
2920 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2921 				  uint64_t item_flags,
2922 				  struct rte_eth_dev *dev,
2923 				  struct rte_flow_error *error)
2924 {
2925 	struct mlx5_priv *priv = dev->data->dev_private;
2926 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2927 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2928 	int ret;
2929 	union vni {
2930 		uint32_t vlan_id;
2931 		uint8_t vni[4];
2932 	} id = { .vlan_id = 0, };
2933 
2934 	if (!priv->sh->config.l3_vxlan_en)
2935 		return rte_flow_error_set(error, ENOTSUP,
2936 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2937 					  "L3 VXLAN is not enabled by device"
2938 					  " parameter and/or not configured in"
2939 					  " firmware");
2940 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2941 		return rte_flow_error_set(error, ENOTSUP,
2942 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2943 					  "multiple tunnel layers not"
2944 					  " supported");
2945 	/*
2946 	 * Verify only UDPv4 is present as defined in
2947 	 * https://tools.ietf.org/html/rfc7348
2948 	 */
2949 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2950 		return rte_flow_error_set(error, EINVAL,
2951 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2952 					  "no outer UDP layer found");
2953 	if (!mask)
2954 		mask = &rte_flow_item_vxlan_gpe_mask;
2955 	ret = mlx5_flow_item_acceptable
2956 		(item, (const uint8_t *)mask,
2957 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2958 		 sizeof(struct rte_flow_item_vxlan_gpe),
2959 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2960 	if (ret < 0)
2961 		return ret;
2962 	if (spec) {
2963 		if (spec->protocol)
2964 			return rte_flow_error_set(error, ENOTSUP,
2965 						  RTE_FLOW_ERROR_TYPE_ITEM,
2966 						  item,
2967 						  "VxLAN-GPE protocol"
2968 						  " not supported");
2969 		memcpy(&id.vni[1], spec->vni, 3);
2970 		memcpy(&id.vni[1], mask->vni, 3);
2971 	}
2972 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2973 		return rte_flow_error_set(error, ENOTSUP,
2974 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2975 					  "VXLAN-GPE tunnel must be fully"
2976 					  " defined");
2977 	return 0;
2978 }
2979 /**
2980  * Validate GRE Key item.
2981  *
2982  * @param[in] item
2983  *   Item specification.
2984  * @param[in] item_flags
2985  *   Bit flags to mark detected items.
2986  * @param[in] gre_item
2987  *   Pointer to gre_item
2988  * @param[out] error
2989  *   Pointer to error structure.
2990  *
2991  * @return
2992  *   0 on success, a negative errno value otherwise and rte_errno is set.
2993  */
2994 int
2995 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2996 				uint64_t item_flags,
2997 				const struct rte_flow_item *gre_item,
2998 				struct rte_flow_error *error)
2999 {
3000 	const rte_be32_t *mask = item->mask;
3001 	int ret = 0;
3002 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
3003 	const struct rte_flow_item_gre *gre_spec;
3004 	const struct rte_flow_item_gre *gre_mask;
3005 
3006 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
3007 		return rte_flow_error_set(error, ENOTSUP,
3008 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3009 					  "Multiple GRE key not support");
3010 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3011 		return rte_flow_error_set(error, ENOTSUP,
3012 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3013 					  "No preceding GRE header");
3014 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3015 		return rte_flow_error_set(error, ENOTSUP,
3016 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3017 					  "GRE key following a wrong item");
3018 	gre_mask = gre_item->mask;
3019 	if (!gre_mask)
3020 		gre_mask = &rte_flow_item_gre_mask;
3021 	gre_spec = gre_item->spec;
3022 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3023 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3024 		return rte_flow_error_set(error, EINVAL,
3025 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3026 					  "Key bit must be on");
3027 
3028 	if (!mask)
3029 		mask = &gre_key_default_mask;
3030 	ret = mlx5_flow_item_acceptable
3031 		(item, (const uint8_t *)mask,
3032 		 (const uint8_t *)&gre_key_default_mask,
3033 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3034 	return ret;
3035 }
3036 
3037 /**
3038  * Validate GRE optional item.
3039  *
3040  * @param[in] dev
3041  *   Pointer to the Ethernet device structure.
3042  * @param[in] item
3043  *   Item specification.
3044  * @param[in] item_flags
3045  *   Bit flags to mark detected items.
3046  * @param[in] attr
3047  *   Flow rule attributes.
3048  * @param[in] gre_item
3049  *   Pointer to gre_item
3050  * @param[out] error
3051  *   Pointer to error structure.
3052  *
3053  * @return
3054  *   0 on success, a negative errno value otherwise and rte_errno is set.
3055  */
3056 int
3057 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
3058 				   const struct rte_flow_item *item,
3059 				   uint64_t item_flags,
3060 				   const struct rte_flow_attr *attr,
3061 				   const struct rte_flow_item *gre_item,
3062 				   struct rte_flow_error *error)
3063 {
3064 	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
3065 	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
3066 	const struct rte_flow_item_gre_opt *spec = item->spec;
3067 	const struct rte_flow_item_gre_opt *mask = item->mask;
3068 	struct mlx5_priv *priv = dev->data->dev_private;
3069 	int ret = 0;
3070 	struct rte_flow_item_gre_opt nic_mask = {
3071 		.checksum_rsvd = {
3072 			.checksum = RTE_BE16(UINT16_MAX),
3073 			.reserved1 = 0x0,
3074 		},
3075 		.key = {
3076 			.key = RTE_BE32(UINT32_MAX),
3077 		},
3078 		.sequence = {
3079 			.sequence = RTE_BE32(UINT32_MAX),
3080 		},
3081 	};
3082 
3083 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3084 		return rte_flow_error_set(error, ENOTSUP,
3085 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3086 					  "No preceding GRE header");
3087 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3088 		return rte_flow_error_set(error, ENOTSUP,
3089 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3090 					  "GRE option following a wrong item");
3091 	if (!spec || !mask)
3092 		return rte_flow_error_set(error, EINVAL,
3093 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3094 					  "At least one field gre_option(checksum/key/sequence) must be specified");
3095 	if (!gre_mask)
3096 		gre_mask = &rte_flow_item_gre_mask;
3097 	if (mask->checksum_rsvd.checksum)
3098 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3099 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3100 			return rte_flow_error_set(error, EINVAL,
3101 						  RTE_FLOW_ERROR_TYPE_ITEM,
3102 						  item,
3103 						  "Checksum bit must be on");
3104 	if (mask->key.key)
3105 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3106 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3107 			return rte_flow_error_set(error, EINVAL,
3108 						  RTE_FLOW_ERROR_TYPE_ITEM,
3109 						  item, "Key bit must be on");
3110 	if (mask->sequence.sequence)
3111 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3112 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3113 			return rte_flow_error_set(error, EINVAL,
3114 						  RTE_FLOW_ERROR_TYPE_ITEM,
3115 						  item,
3116 						  "Sequence bit must be on");
3117 	if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3118 		if (priv->sh->steering_format_version ==
3119 		    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3120 		    ((attr->group || attr->transfer) &&
3121 		     !priv->sh->misc5_cap) ||
3122 		    (!(priv->sh->tunnel_header_0_1 &&
3123 		       priv->sh->tunnel_header_2_3) &&
3124 		    !attr->group && !attr->transfer))
3125 			return rte_flow_error_set(error, EINVAL,
3126 						  RTE_FLOW_ERROR_TYPE_ITEM,
3127 						  item,
3128 						  "Checksum/Sequence not supported");
3129 	}
3130 	ret = mlx5_flow_item_acceptable
3131 		(item, (const uint8_t *)mask,
3132 		 (const uint8_t *)&nic_mask,
3133 		 sizeof(struct rte_flow_item_gre_opt),
3134 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3135 	return ret;
3136 }
3137 
3138 /**
3139  * Validate GRE item.
3140  *
3141  * @param[in] item
3142  *   Item specification.
3143  * @param[in] item_flags
3144  *   Bit flags to mark detected items.
3145  * @param[in] target_protocol
3146  *   The next protocol in the previous item.
3147  * @param[out] error
3148  *   Pointer to error structure.
3149  *
3150  * @return
3151  *   0 on success, a negative errno value otherwise and rte_errno is set.
3152  */
3153 int
3154 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3155 			    uint64_t item_flags,
3156 			    uint8_t target_protocol,
3157 			    struct rte_flow_error *error)
3158 {
3159 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3160 	const struct rte_flow_item_gre *mask = item->mask;
3161 	int ret;
3162 	const struct rte_flow_item_gre nic_mask = {
3163 		.c_rsvd0_ver = RTE_BE16(0xB000),
3164 		.protocol = RTE_BE16(UINT16_MAX),
3165 	};
3166 
3167 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3168 		return rte_flow_error_set(error, EINVAL,
3169 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3170 					  "protocol filtering not compatible"
3171 					  " with this GRE layer");
3172 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3173 		return rte_flow_error_set(error, ENOTSUP,
3174 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3175 					  "multiple tunnel layers not"
3176 					  " supported");
3177 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3178 		return rte_flow_error_set(error, ENOTSUP,
3179 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3180 					  "L3 Layer is missing");
3181 	if (!mask)
3182 		mask = &rte_flow_item_gre_mask;
3183 	ret = mlx5_flow_item_acceptable
3184 		(item, (const uint8_t *)mask,
3185 		 (const uint8_t *)&nic_mask,
3186 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3187 		 error);
3188 	if (ret < 0)
3189 		return ret;
3190 #ifndef HAVE_MLX5DV_DR
3191 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3192 	if (spec && (spec->protocol & mask->protocol))
3193 		return rte_flow_error_set(error, ENOTSUP,
3194 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3195 					  "without MPLS support the"
3196 					  " specification cannot be used for"
3197 					  " filtering");
3198 #endif
3199 #endif
3200 	return 0;
3201 }
3202 
3203 /**
3204  * Validate Geneve item.
3205  *
3206  * @param[in] item
3207  *   Item specification.
3208  * @param[in] itemFlags
3209  *   Bit-fields that holds the items detected until now.
3210  * @param[in] enPriv
3211  *   Pointer to the private data structure.
3212  * @param[out] error
3213  *   Pointer to error structure.
3214  *
3215  * @return
3216  *   0 on success, a negative errno value otherwise and rte_errno is set.
3217  */
3218 
3219 int
3220 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3221 			       uint64_t item_flags,
3222 			       struct rte_eth_dev *dev,
3223 			       struct rte_flow_error *error)
3224 {
3225 	struct mlx5_priv *priv = dev->data->dev_private;
3226 	const struct rte_flow_item_geneve *spec = item->spec;
3227 	const struct rte_flow_item_geneve *mask = item->mask;
3228 	int ret;
3229 	uint16_t gbhdr;
3230 	uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3231 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3232 	const struct rte_flow_item_geneve nic_mask = {
3233 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3234 		.vni = "\xff\xff\xff",
3235 		.protocol = RTE_BE16(UINT16_MAX),
3236 	};
3237 
3238 	if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3239 		return rte_flow_error_set(error, ENOTSUP,
3240 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3241 					  "L3 Geneve is not enabled by device"
3242 					  " parameter and/or not configured in"
3243 					  " firmware");
3244 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3245 		return rte_flow_error_set(error, ENOTSUP,
3246 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3247 					  "multiple tunnel layers not"
3248 					  " supported");
3249 	/*
3250 	 * Verify only UDPv4 is present as defined in
3251 	 * https://tools.ietf.org/html/rfc7348
3252 	 */
3253 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3254 		return rte_flow_error_set(error, EINVAL,
3255 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3256 					  "no outer UDP layer found");
3257 	if (!mask)
3258 		mask = &rte_flow_item_geneve_mask;
3259 	ret = mlx5_flow_item_acceptable
3260 				  (item, (const uint8_t *)mask,
3261 				   (const uint8_t *)&nic_mask,
3262 				   sizeof(struct rte_flow_item_geneve),
3263 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3264 	if (ret)
3265 		return ret;
3266 	if (spec) {
3267 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3268 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3269 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3270 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3271 			return rte_flow_error_set(error, ENOTSUP,
3272 						  RTE_FLOW_ERROR_TYPE_ITEM,
3273 						  item,
3274 						  "Geneve protocol unsupported"
3275 						  " fields are being used");
3276 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3277 			return rte_flow_error_set
3278 					(error, ENOTSUP,
3279 					 RTE_FLOW_ERROR_TYPE_ITEM,
3280 					 item,
3281 					 "Unsupported Geneve options length");
3282 	}
3283 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3284 		return rte_flow_error_set
3285 				    (error, ENOTSUP,
3286 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
3287 				     "Geneve tunnel must be fully defined");
3288 	return 0;
3289 }
3290 
3291 /**
3292  * Validate Geneve TLV option item.
3293  *
3294  * @param[in] item
3295  *   Item specification.
3296  * @param[in] last_item
3297  *   Previous validated item in the pattern items.
3298  * @param[in] geneve_item
3299  *   Previous GENEVE item specification.
3300  * @param[in] dev
3301  *   Pointer to the rte_eth_dev structure.
3302  * @param[out] error
3303  *   Pointer to error structure.
3304  *
3305  * @return
3306  *   0 on success, a negative errno value otherwise and rte_errno is set.
3307  */
3308 int
3309 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3310 				   uint64_t last_item,
3311 				   const struct rte_flow_item *geneve_item,
3312 				   struct rte_eth_dev *dev,
3313 				   struct rte_flow_error *error)
3314 {
3315 	struct mlx5_priv *priv = dev->data->dev_private;
3316 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3317 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3318 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3319 	uint8_t data_max_supported =
3320 			hca_attr->max_geneve_tlv_option_data_len * 4;
3321 	const struct rte_flow_item_geneve *geneve_spec;
3322 	const struct rte_flow_item_geneve *geneve_mask;
3323 	const struct rte_flow_item_geneve_opt *spec = item->spec;
3324 	const struct rte_flow_item_geneve_opt *mask = item->mask;
3325 	unsigned int i;
3326 	unsigned int data_len;
3327 	uint8_t tlv_option_len;
3328 	uint16_t optlen_m, optlen_v;
3329 	const struct rte_flow_item_geneve_opt full_mask = {
3330 		.option_class = RTE_BE16(0xffff),
3331 		.option_type = 0xff,
3332 		.option_len = 0x1f,
3333 	};
3334 
3335 	if (!mask)
3336 		mask = &rte_flow_item_geneve_opt_mask;
3337 	if (!spec)
3338 		return rte_flow_error_set
3339 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3340 			"Geneve TLV opt class/type/length must be specified");
3341 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3342 		return rte_flow_error_set
3343 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3344 			"Geneve TLV opt length exceeds the limit (31)");
3345 	/* Check if class type and length masks are full. */
3346 	if (full_mask.option_class != mask->option_class ||
3347 	    full_mask.option_type != mask->option_type ||
3348 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
3349 		return rte_flow_error_set
3350 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3351 			"Geneve TLV opt class/type/length masks must be full");
3352 	/* Check if length is supported */
3353 	if ((uint32_t)spec->option_len >
3354 			hca_attr->max_geneve_tlv_option_data_len)
3355 		return rte_flow_error_set
3356 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3357 			"Geneve TLV opt length not supported");
3358 	if (hca_attr->max_geneve_tlv_options > 1)
3359 		DRV_LOG(DEBUG,
3360 			"max_geneve_tlv_options supports more than 1 option");
3361 	/* Check GENEVE item preceding. */
3362 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3363 		return rte_flow_error_set
3364 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3365 			"Geneve opt item must be preceded with Geneve item");
3366 	geneve_spec = geneve_item->spec;
3367 	geneve_mask = geneve_item->mask ? geneve_item->mask :
3368 					  &rte_flow_item_geneve_mask;
3369 	/* Check if GENEVE TLV option size doesn't exceed option length */
3370 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3371 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
3372 		tlv_option_len = spec->option_len & mask->option_len;
3373 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3374 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3375 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3376 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3377 		if ((optlen_v & optlen_m) <= tlv_option_len)
3378 			return rte_flow_error_set
3379 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3380 				 "GENEVE TLV option length exceeds optlen");
3381 	}
3382 	/* Check if length is 0 or data is 0. */
3383 	if (spec->data == NULL || spec->option_len == 0)
3384 		return rte_flow_error_set
3385 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3386 			"Geneve TLV opt with zero data/length not supported");
3387 	/* Check not all data & mask are 0. */
3388 	data_len = spec->option_len * 4;
3389 	if (mask->data == NULL) {
3390 		for (i = 0; i < data_len; i++)
3391 			if (spec->data[i])
3392 				break;
3393 		if (i == data_len)
3394 			return rte_flow_error_set(error, ENOTSUP,
3395 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3396 				"Can't match on Geneve option data 0");
3397 	} else {
3398 		for (i = 0; i < data_len; i++)
3399 			if (spec->data[i] & mask->data[i])
3400 				break;
3401 		if (i == data_len)
3402 			return rte_flow_error_set(error, ENOTSUP,
3403 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3404 				"Can't match on Geneve option data and mask 0");
3405 		/* Check data mask supported. */
3406 		for (i = data_max_supported; i < data_len ; i++)
3407 			if (mask->data[i])
3408 				return rte_flow_error_set(error, ENOTSUP,
3409 					RTE_FLOW_ERROR_TYPE_ITEM, item,
3410 					"Data mask is of unsupported size");
3411 	}
3412 	/* Check GENEVE option is supported in NIC. */
3413 	if (!hca_attr->geneve_tlv_opt)
3414 		return rte_flow_error_set
3415 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3416 			"Geneve TLV opt not supported");
3417 	/* Check if we already have geneve option with different type/class. */
3418 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3419 	geneve_opt_resource = sh->geneve_tlv_option_resource;
3420 	if (geneve_opt_resource != NULL)
3421 		if (geneve_opt_resource->option_class != spec->option_class ||
3422 		    geneve_opt_resource->option_type != spec->option_type ||
3423 		    geneve_opt_resource->length != spec->option_len) {
3424 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3425 			return rte_flow_error_set(error, ENOTSUP,
3426 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3427 				"Only one Geneve TLV option supported");
3428 		}
3429 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3430 	return 0;
3431 }
3432 
3433 /**
3434  * Validate MPLS item.
3435  *
3436  * @param[in] dev
3437  *   Pointer to the rte_eth_dev structure.
3438  * @param[in] item
3439  *   Item specification.
3440  * @param[in] item_flags
3441  *   Bit-fields that holds the items detected until now.
3442  * @param[in] prev_layer
3443  *   The protocol layer indicated in previous item.
3444  * @param[out] error
3445  *   Pointer to error structure.
3446  *
3447  * @return
3448  *   0 on success, a negative errno value otherwise and rte_errno is set.
3449  */
3450 int
3451 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3452 			     const struct rte_flow_item *item __rte_unused,
3453 			     uint64_t item_flags __rte_unused,
3454 			     uint64_t prev_layer __rte_unused,
3455 			     struct rte_flow_error *error)
3456 {
3457 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3458 	const struct rte_flow_item_mpls *mask = item->mask;
3459 	struct mlx5_priv *priv = dev->data->dev_private;
3460 	int ret;
3461 
3462 	if (!priv->sh->dev_cap.mpls_en)
3463 		return rte_flow_error_set(error, ENOTSUP,
3464 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3465 					  "MPLS not supported or"
3466 					  " disabled in firmware"
3467 					  " configuration.");
3468 	/* MPLS over UDP, GRE is allowed */
3469 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3470 			    MLX5_FLOW_LAYER_GRE |
3471 			    MLX5_FLOW_LAYER_GRE_KEY)))
3472 		return rte_flow_error_set(error, EINVAL,
3473 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3474 					  "protocol filtering not compatible"
3475 					  " with MPLS layer");
3476 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3477 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3478 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
3479 		return rte_flow_error_set(error, ENOTSUP,
3480 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3481 					  "multiple tunnel layers not"
3482 					  " supported");
3483 	if (!mask)
3484 		mask = &rte_flow_item_mpls_mask;
3485 	ret = mlx5_flow_item_acceptable
3486 		(item, (const uint8_t *)mask,
3487 		 (const uint8_t *)&rte_flow_item_mpls_mask,
3488 		 sizeof(struct rte_flow_item_mpls),
3489 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3490 	if (ret < 0)
3491 		return ret;
3492 	return 0;
3493 #else
3494 	return rte_flow_error_set(error, ENOTSUP,
3495 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
3496 				  "MPLS is not supported by Verbs, please"
3497 				  " update.");
3498 #endif
3499 }
3500 
3501 /**
3502  * Validate NVGRE item.
3503  *
3504  * @param[in] item
3505  *   Item specification.
3506  * @param[in] item_flags
3507  *   Bit flags to mark detected items.
3508  * @param[in] target_protocol
3509  *   The next protocol in the previous item.
3510  * @param[out] error
3511  *   Pointer to error structure.
3512  *
3513  * @return
3514  *   0 on success, a negative errno value otherwise and rte_errno is set.
3515  */
3516 int
3517 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3518 			      uint64_t item_flags,
3519 			      uint8_t target_protocol,
3520 			      struct rte_flow_error *error)
3521 {
3522 	const struct rte_flow_item_nvgre *mask = item->mask;
3523 	int ret;
3524 
3525 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3526 		return rte_flow_error_set(error, EINVAL,
3527 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3528 					  "protocol filtering not compatible"
3529 					  " with this GRE layer");
3530 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3531 		return rte_flow_error_set(error, ENOTSUP,
3532 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3533 					  "multiple tunnel layers not"
3534 					  " supported");
3535 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3536 		return rte_flow_error_set(error, ENOTSUP,
3537 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3538 					  "L3 Layer is missing");
3539 	if (!mask)
3540 		mask = &rte_flow_item_nvgre_mask;
3541 	ret = mlx5_flow_item_acceptable
3542 		(item, (const uint8_t *)mask,
3543 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
3544 		 sizeof(struct rte_flow_item_nvgre),
3545 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3546 	if (ret < 0)
3547 		return ret;
3548 	return 0;
3549 }
3550 
3551 /**
3552  * Validate eCPRI item.
3553  *
3554  * @param[in] item
3555  *   Item specification.
3556  * @param[in] item_flags
3557  *   Bit-fields that holds the items detected until now.
3558  * @param[in] last_item
3559  *   Previous validated item in the pattern items.
3560  * @param[in] ether_type
3561  *   Type in the ethernet layer header (including dot1q).
3562  * @param[in] acc_mask
3563  *   Acceptable mask, if NULL default internal default mask
3564  *   will be used to check whether item fields are supported.
3565  * @param[out] error
3566  *   Pointer to error structure.
3567  *
3568  * @return
3569  *   0 on success, a negative errno value otherwise and rte_errno is set.
3570  */
3571 int
3572 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3573 			      uint64_t item_flags,
3574 			      uint64_t last_item,
3575 			      uint16_t ether_type,
3576 			      const struct rte_flow_item_ecpri *acc_mask,
3577 			      struct rte_flow_error *error)
3578 {
3579 	const struct rte_flow_item_ecpri *mask = item->mask;
3580 	const struct rte_flow_item_ecpri nic_mask = {
3581 		.hdr = {
3582 			.common = {
3583 				.u32 =
3584 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
3585 					.type = 0xFF,
3586 					}).u32),
3587 			},
3588 			.dummy[0] = 0xFFFFFFFF,
3589 		},
3590 	};
3591 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3592 					MLX5_FLOW_LAYER_OUTER_VLAN);
3593 	struct rte_flow_item_ecpri mask_lo;
3594 
3595 	if (!(last_item & outer_l2_vlan) &&
3596 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3597 		return rte_flow_error_set(error, EINVAL,
3598 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3599 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
3600 	if ((last_item & outer_l2_vlan) && ether_type &&
3601 	    ether_type != RTE_ETHER_TYPE_ECPRI)
3602 		return rte_flow_error_set(error, EINVAL,
3603 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3604 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3605 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3606 		return rte_flow_error_set(error, EINVAL,
3607 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3608 					  "eCPRI with tunnel is not supported right now");
3609 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3610 		return rte_flow_error_set(error, ENOTSUP,
3611 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3612 					  "multiple L3 layers not supported");
3613 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3614 		return rte_flow_error_set(error, EINVAL,
3615 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3616 					  "eCPRI cannot coexist with a TCP layer");
3617 	/* In specification, eCPRI could be over UDP layer. */
3618 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3619 		return rte_flow_error_set(error, EINVAL,
3620 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3621 					  "eCPRI over UDP layer is not yet supported right now");
3622 	/* Mask for type field in common header could be zero. */
3623 	if (!mask)
3624 		mask = &rte_flow_item_ecpri_mask;
3625 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3626 	/* Input mask is in big-endian format. */
3627 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3628 		return rte_flow_error_set(error, EINVAL,
3629 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3630 					  "partial mask is not supported for protocol");
3631 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3632 		return rte_flow_error_set(error, EINVAL,
3633 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3634 					  "message header mask must be after a type mask");
3635 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3636 					 acc_mask ? (const uint8_t *)acc_mask
3637 						  : (const uint8_t *)&nic_mask,
3638 					 sizeof(struct rte_flow_item_ecpri),
3639 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3640 }
3641 
3642 static int
3643 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3644 		   const struct rte_flow_attr *attr __rte_unused,
3645 		   const struct rte_flow_item items[] __rte_unused,
3646 		   const struct rte_flow_action actions[] __rte_unused,
3647 		   bool external __rte_unused,
3648 		   int hairpin __rte_unused,
3649 		   struct rte_flow_error *error)
3650 {
3651 	return rte_flow_error_set(error, ENOTSUP,
3652 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3653 }
3654 
3655 static struct mlx5_flow *
3656 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3657 		  const struct rte_flow_attr *attr __rte_unused,
3658 		  const struct rte_flow_item items[] __rte_unused,
3659 		  const struct rte_flow_action actions[] __rte_unused,
3660 		  struct rte_flow_error *error)
3661 {
3662 	rte_flow_error_set(error, ENOTSUP,
3663 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3664 	return NULL;
3665 }
3666 
3667 static int
3668 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3669 		    struct mlx5_flow *dev_flow __rte_unused,
3670 		    const struct rte_flow_attr *attr __rte_unused,
3671 		    const struct rte_flow_item items[] __rte_unused,
3672 		    const struct rte_flow_action actions[] __rte_unused,
3673 		    struct rte_flow_error *error)
3674 {
3675 	return rte_flow_error_set(error, ENOTSUP,
3676 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3677 }
3678 
3679 static int
3680 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3681 		struct rte_flow *flow __rte_unused,
3682 		struct rte_flow_error *error)
3683 {
3684 	return rte_flow_error_set(error, ENOTSUP,
3685 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3686 }
3687 
3688 static void
3689 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3690 		 struct rte_flow *flow __rte_unused)
3691 {
3692 }
3693 
3694 static void
3695 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3696 		  struct rte_flow *flow __rte_unused)
3697 {
3698 }
3699 
3700 static int
3701 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3702 		struct rte_flow *flow __rte_unused,
3703 		const struct rte_flow_action *actions __rte_unused,
3704 		void *data __rte_unused,
3705 		struct rte_flow_error *error)
3706 {
3707 	return rte_flow_error_set(error, ENOTSUP,
3708 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3709 }
3710 
3711 static int
3712 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3713 		      uint32_t domains __rte_unused,
3714 		      uint32_t flags __rte_unused)
3715 {
3716 	return 0;
3717 }
3718 
3719 /* Void driver to protect from null pointer reference. */
3720 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3721 	.validate = flow_null_validate,
3722 	.prepare = flow_null_prepare,
3723 	.translate = flow_null_translate,
3724 	.apply = flow_null_apply,
3725 	.remove = flow_null_remove,
3726 	.destroy = flow_null_destroy,
3727 	.query = flow_null_query,
3728 	.sync_domain = flow_null_sync_domain,
3729 };
3730 
3731 /**
3732  * Select flow driver type according to flow attributes and device
3733  * configuration.
3734  *
3735  * @param[in] dev
3736  *   Pointer to the dev structure.
3737  * @param[in] attr
3738  *   Pointer to the flow attributes.
3739  *
3740  * @return
3741  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3742  */
3743 static enum mlx5_flow_drv_type
3744 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3745 {
3746 	struct mlx5_priv *priv = dev->data->dev_private;
3747 	/* The OS can determine first a specific flow type (DV, VERBS) */
3748 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3749 
3750 	if (type != MLX5_FLOW_TYPE_MAX)
3751 		return type;
3752 	/*
3753 	 * Currently when dv_flow_en == 2, only HW steering engine is
3754 	 * supported. New engines can also be chosen here if ready.
3755 	 */
3756 	if (priv->sh->config.dv_flow_en == 2)
3757 		return MLX5_FLOW_TYPE_HW;
3758 	/* If no OS specific type - continue with DV/VERBS selection */
3759 	if (attr->transfer && priv->sh->config.dv_esw_en)
3760 		type = MLX5_FLOW_TYPE_DV;
3761 	if (!attr->transfer)
3762 		type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3763 						     MLX5_FLOW_TYPE_VERBS;
3764 	return type;
3765 }
3766 
3767 #define flow_get_drv_ops(type) flow_drv_ops[type]
3768 
3769 /**
3770  * Flow driver validation API. This abstracts calling driver specific functions.
3771  * The type of flow driver is determined according to flow attributes.
3772  *
3773  * @param[in] dev
3774  *   Pointer to the dev structure.
3775  * @param[in] attr
3776  *   Pointer to the flow attributes.
3777  * @param[in] items
3778  *   Pointer to the list of items.
3779  * @param[in] actions
3780  *   Pointer to the list of actions.
3781  * @param[in] external
3782  *   This flow rule is created by request external to PMD.
3783  * @param[in] hairpin
3784  *   Number of hairpin TX actions, 0 means classic flow.
3785  * @param[out] error
3786  *   Pointer to the error structure.
3787  *
3788  * @return
3789  *   0 on success, a negative errno value otherwise and rte_errno is set.
3790  */
3791 static inline int
3792 flow_drv_validate(struct rte_eth_dev *dev,
3793 		  const struct rte_flow_attr *attr,
3794 		  const struct rte_flow_item items[],
3795 		  const struct rte_flow_action actions[],
3796 		  bool external, int hairpin, struct rte_flow_error *error)
3797 {
3798 	const struct mlx5_flow_driver_ops *fops;
3799 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3800 
3801 	fops = flow_get_drv_ops(type);
3802 	return fops->validate(dev, attr, items, actions, external,
3803 			      hairpin, error);
3804 }
3805 
3806 /**
3807  * Flow driver preparation API. This abstracts calling driver specific
3808  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3809  * calculates the size of memory required for device flow, allocates the memory,
3810  * initializes the device flow and returns the pointer.
3811  *
3812  * @note
3813  *   This function initializes device flow structure such as dv or verbs in
3814  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3815  *   rest. For example, adding returning device flow to flow->dev_flow list and
3816  *   setting backward reference to the flow should be done out of this function.
3817  *   layers field is not filled either.
3818  *
3819  * @param[in] dev
3820  *   Pointer to the dev structure.
3821  * @param[in] attr
3822  *   Pointer to the flow attributes.
3823  * @param[in] items
3824  *   Pointer to the list of items.
3825  * @param[in] actions
3826  *   Pointer to the list of actions.
3827  * @param[in] flow_idx
3828  *   This memory pool index to the flow.
3829  * @param[out] error
3830  *   Pointer to the error structure.
3831  *
3832  * @return
3833  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3834  */
3835 static inline struct mlx5_flow *
3836 flow_drv_prepare(struct rte_eth_dev *dev,
3837 		 const struct rte_flow *flow,
3838 		 const struct rte_flow_attr *attr,
3839 		 const struct rte_flow_item items[],
3840 		 const struct rte_flow_action actions[],
3841 		 uint32_t flow_idx,
3842 		 struct rte_flow_error *error)
3843 {
3844 	const struct mlx5_flow_driver_ops *fops;
3845 	enum mlx5_flow_drv_type type = flow->drv_type;
3846 	struct mlx5_flow *mlx5_flow = NULL;
3847 
3848 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3849 	fops = flow_get_drv_ops(type);
3850 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3851 	if (mlx5_flow)
3852 		mlx5_flow->flow_idx = flow_idx;
3853 	return mlx5_flow;
3854 }
3855 
3856 /**
3857  * Flow driver translation API. This abstracts calling driver specific
3858  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3859  * translates a generic flow into a driver flow. flow_drv_prepare() must
3860  * precede.
3861  *
3862  * @note
3863  *   dev_flow->layers could be filled as a result of parsing during translation
3864  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3865  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3866  *   flow->actions could be overwritten even though all the expanded dev_flows
3867  *   have the same actions.
3868  *
3869  * @param[in] dev
3870  *   Pointer to the rte dev structure.
3871  * @param[in, out] dev_flow
3872  *   Pointer to the mlx5 flow.
3873  * @param[in] attr
3874  *   Pointer to the flow attributes.
3875  * @param[in] items
3876  *   Pointer to the list of items.
3877  * @param[in] actions
3878  *   Pointer to the list of actions.
3879  * @param[out] error
3880  *   Pointer to the error structure.
3881  *
3882  * @return
3883  *   0 on success, a negative errno value otherwise and rte_errno is set.
3884  */
3885 static inline int
3886 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3887 		   const struct rte_flow_attr *attr,
3888 		   const struct rte_flow_item items[],
3889 		   const struct rte_flow_action actions[],
3890 		   struct rte_flow_error *error)
3891 {
3892 	const struct mlx5_flow_driver_ops *fops;
3893 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3894 
3895 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3896 	fops = flow_get_drv_ops(type);
3897 	return fops->translate(dev, dev_flow, attr, items, actions, error);
3898 }
3899 
3900 /**
3901  * Flow driver apply API. This abstracts calling driver specific functions.
3902  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3903  * translated driver flows on to device. flow_drv_translate() must precede.
3904  *
3905  * @param[in] dev
3906  *   Pointer to Ethernet device structure.
3907  * @param[in, out] flow
3908  *   Pointer to flow structure.
3909  * @param[out] error
3910  *   Pointer to error structure.
3911  *
3912  * @return
3913  *   0 on success, a negative errno value otherwise and rte_errno is set.
3914  */
3915 static inline int
3916 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3917 	       struct rte_flow_error *error)
3918 {
3919 	const struct mlx5_flow_driver_ops *fops;
3920 	enum mlx5_flow_drv_type type = flow->drv_type;
3921 
3922 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3923 	fops = flow_get_drv_ops(type);
3924 	return fops->apply(dev, flow, error);
3925 }
3926 
3927 /**
3928  * Flow driver destroy API. This abstracts calling driver specific functions.
3929  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3930  * on device and releases resources of the flow.
3931  *
3932  * @param[in] dev
3933  *   Pointer to Ethernet device.
3934  * @param[in, out] flow
3935  *   Pointer to flow structure.
3936  */
3937 static inline void
3938 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3939 {
3940 	const struct mlx5_flow_driver_ops *fops;
3941 	enum mlx5_flow_drv_type type = flow->drv_type;
3942 
3943 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3944 	fops = flow_get_drv_ops(type);
3945 	fops->destroy(dev, flow);
3946 }
3947 
3948 /**
3949  * Flow driver find RSS policy tbl API. This abstracts calling driver
3950  * specific functions. Parent flow (rte_flow) should have driver
3951  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3952  *
3953  * @param[in] dev
3954  *   Pointer to Ethernet device.
3955  * @param[in, out] flow
3956  *   Pointer to flow structure.
3957  * @param[in] policy
3958  *   Pointer to meter policy table.
3959  * @param[in] rss_desc
3960  *   Pointer to rss_desc
3961  */
3962 static struct mlx5_flow_meter_sub_policy *
3963 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3964 		struct rte_flow *flow,
3965 		struct mlx5_flow_meter_policy *policy,
3966 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3967 {
3968 	const struct mlx5_flow_driver_ops *fops;
3969 	enum mlx5_flow_drv_type type = flow->drv_type;
3970 
3971 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3972 	fops = flow_get_drv_ops(type);
3973 	return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3974 }
3975 
3976 /**
3977  * Flow driver color tag rule API. This abstracts calling driver
3978  * specific functions. Parent flow (rte_flow) should have driver
3979  * type (drv_type). It will create the color tag rules in hierarchy meter.
3980  *
3981  * @param[in] dev
3982  *   Pointer to Ethernet device.
3983  * @param[in, out] flow
3984  *   Pointer to flow structure.
3985  * @param[in] fm
3986  *   Pointer to flow meter structure.
3987  * @param[in] src_port
3988  *   The src port this extra rule should use.
3989  * @param[in] item
3990  *   The src port id match item.
3991  * @param[out] error
3992  *   Pointer to error structure.
3993  */
3994 static int
3995 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3996 		struct rte_flow *flow,
3997 		struct mlx5_flow_meter_info *fm,
3998 		int32_t src_port,
3999 		const struct rte_flow_item *item,
4000 		struct rte_flow_error *error)
4001 {
4002 	const struct mlx5_flow_driver_ops *fops;
4003 	enum mlx5_flow_drv_type type = flow->drv_type;
4004 
4005 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4006 	fops = flow_get_drv_ops(type);
4007 	return fops->meter_hierarchy_rule_create(dev, fm,
4008 						src_port, item, error);
4009 }
4010 
4011 /**
4012  * Get RSS action from the action list.
4013  *
4014  * @param[in] dev
4015  *   Pointer to Ethernet device.
4016  * @param[in] actions
4017  *   Pointer to the list of actions.
4018  * @param[in] flow
4019  *   Parent flow structure pointer.
4020  *
4021  * @return
4022  *   Pointer to the RSS action if exist, else return NULL.
4023  */
4024 static const struct rte_flow_action_rss*
4025 flow_get_rss_action(struct rte_eth_dev *dev,
4026 		    const struct rte_flow_action actions[])
4027 {
4028 	struct mlx5_priv *priv = dev->data->dev_private;
4029 	const struct rte_flow_action_rss *rss = NULL;
4030 	struct mlx5_meter_policy_action_container *acg;
4031 	struct mlx5_meter_policy_action_container *acy;
4032 
4033 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4034 		switch (actions->type) {
4035 		case RTE_FLOW_ACTION_TYPE_RSS:
4036 			rss = actions->conf;
4037 			break;
4038 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
4039 		{
4040 			const struct rte_flow_action_sample *sample =
4041 								actions->conf;
4042 			const struct rte_flow_action *act = sample->actions;
4043 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
4044 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
4045 					rss = act->conf;
4046 			break;
4047 		}
4048 		case RTE_FLOW_ACTION_TYPE_METER:
4049 		{
4050 			uint32_t mtr_idx;
4051 			struct mlx5_flow_meter_info *fm;
4052 			struct mlx5_flow_meter_policy *policy;
4053 			const struct rte_flow_action_meter *mtr = actions->conf;
4054 
4055 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
4056 			if (fm && !fm->def_policy) {
4057 				policy = mlx5_flow_meter_policy_find(dev,
4058 						fm->policy_id, NULL);
4059 				MLX5_ASSERT(policy);
4060 				if (policy->is_hierarchy) {
4061 					policy =
4062 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
4063 									policy);
4064 					if (!policy)
4065 						return NULL;
4066 				}
4067 				if (policy->is_rss) {
4068 					acg =
4069 					&policy->act_cnt[RTE_COLOR_GREEN];
4070 					acy =
4071 					&policy->act_cnt[RTE_COLOR_YELLOW];
4072 					if (acg->fate_action ==
4073 					    MLX5_FLOW_FATE_SHARED_RSS)
4074 						rss = acg->rss->conf;
4075 					else if (acy->fate_action ==
4076 						 MLX5_FLOW_FATE_SHARED_RSS)
4077 						rss = acy->rss->conf;
4078 				}
4079 			}
4080 			break;
4081 		}
4082 		default:
4083 			break;
4084 		}
4085 	}
4086 	return rss;
4087 }
4088 
4089 /**
4090  * Get ASO age action by index.
4091  *
4092  * @param[in] dev
4093  *   Pointer to the Ethernet device structure.
4094  * @param[in] age_idx
4095  *   Index to the ASO age action.
4096  *
4097  * @return
4098  *   The specified ASO age action.
4099  */
4100 struct mlx5_aso_age_action*
4101 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4102 {
4103 	uint16_t pool_idx = age_idx & UINT16_MAX;
4104 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4105 	struct mlx5_priv *priv = dev->data->dev_private;
4106 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4107 	struct mlx5_aso_age_pool *pool;
4108 
4109 	rte_rwlock_read_lock(&mng->resize_rwl);
4110 	pool = mng->pools[pool_idx];
4111 	rte_rwlock_read_unlock(&mng->resize_rwl);
4112 	return &pool->actions[offset - 1];
4113 }
4114 
4115 /* maps indirect action to translated direct in some actions array */
4116 struct mlx5_translated_action_handle {
4117 	struct rte_flow_action_handle *action; /**< Indirect action handle. */
4118 	int index; /**< Index in related array of rte_flow_action. */
4119 };
4120 
4121 /**
4122  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4123  * direct action if translation possible.
4124  * This functionality used to run same execution path for both direct and
4125  * indirect actions on flow create. All necessary preparations for indirect
4126  * action handling should be performed on *handle* actions list returned
4127  * from this call.
4128  *
4129  * @param[in] dev
4130  *   Pointer to Ethernet device.
4131  * @param[in] actions
4132  *   List of actions to translate.
4133  * @param[out] handle
4134  *   List to store translated indirect action object handles.
4135  * @param[in, out] indir_n
4136  *   Size of *handle* array. On return should be updated with number of
4137  *   indirect actions retrieved from the *actions* list.
4138  * @param[out] translated_actions
4139  *   List of actions where all indirect actions were translated to direct
4140  *   if possible. NULL if no translation took place.
4141  * @param[out] error
4142  *   Pointer to the error structure.
4143  *
4144  * @return
4145  *   0 on success, a negative errno value otherwise and rte_errno is set.
4146  */
4147 static int
4148 flow_action_handles_translate(struct rte_eth_dev *dev,
4149 			      const struct rte_flow_action actions[],
4150 			      struct mlx5_translated_action_handle *handle,
4151 			      int *indir_n,
4152 			      struct rte_flow_action **translated_actions,
4153 			      struct rte_flow_error *error)
4154 {
4155 	struct mlx5_priv *priv = dev->data->dev_private;
4156 	struct rte_flow_action *translated = NULL;
4157 	size_t actions_size;
4158 	int n;
4159 	int copied_n = 0;
4160 	struct mlx5_translated_action_handle *handle_end = NULL;
4161 
4162 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4163 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4164 			continue;
4165 		if (copied_n == *indir_n) {
4166 			return rte_flow_error_set
4167 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4168 				 NULL, "too many shared actions");
4169 		}
4170 		rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4171 			   sizeof(actions[n].conf));
4172 		handle[copied_n].index = n;
4173 		copied_n++;
4174 	}
4175 	n++;
4176 	*indir_n = copied_n;
4177 	if (!copied_n)
4178 		return 0;
4179 	actions_size = sizeof(struct rte_flow_action) * n;
4180 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4181 	if (!translated) {
4182 		rte_errno = ENOMEM;
4183 		return -ENOMEM;
4184 	}
4185 	memcpy(translated, actions, actions_size);
4186 	for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4187 		struct mlx5_shared_action_rss *shared_rss;
4188 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4189 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4190 		uint32_t idx = act_idx &
4191 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4192 
4193 		switch (type) {
4194 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4195 			shared_rss = mlx5_ipool_get
4196 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4197 			translated[handle->index].type =
4198 				RTE_FLOW_ACTION_TYPE_RSS;
4199 			translated[handle->index].conf =
4200 				&shared_rss->origin;
4201 			break;
4202 		case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4203 			translated[handle->index].type =
4204 						(enum rte_flow_action_type)
4205 						MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4206 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4207 			break;
4208 		case MLX5_INDIRECT_ACTION_TYPE_AGE:
4209 			if (priv->sh->flow_hit_aso_en) {
4210 				translated[handle->index].type =
4211 					(enum rte_flow_action_type)
4212 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4213 				translated[handle->index].conf =
4214 							 (void *)(uintptr_t)idx;
4215 				break;
4216 			}
4217 			/* Fall-through */
4218 		case MLX5_INDIRECT_ACTION_TYPE_CT:
4219 			if (priv->sh->ct_aso_en) {
4220 				translated[handle->index].type =
4221 					RTE_FLOW_ACTION_TYPE_CONNTRACK;
4222 				translated[handle->index].conf =
4223 							 (void *)(uintptr_t)idx;
4224 				break;
4225 			}
4226 			/* Fall-through */
4227 		default:
4228 			mlx5_free(translated);
4229 			return rte_flow_error_set
4230 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4231 				 NULL, "invalid indirect action type");
4232 		}
4233 	}
4234 	*translated_actions = translated;
4235 	return 0;
4236 }
4237 
4238 /**
4239  * Get Shared RSS action from the action list.
4240  *
4241  * @param[in] dev
4242  *   Pointer to Ethernet device.
4243  * @param[in] shared
4244  *   Pointer to the list of actions.
4245  * @param[in] shared_n
4246  *   Actions list length.
4247  *
4248  * @return
4249  *   The MLX5 RSS action ID if exists, otherwise return 0.
4250  */
4251 static uint32_t
4252 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4253 			   struct mlx5_translated_action_handle *handle,
4254 			   int shared_n)
4255 {
4256 	struct mlx5_translated_action_handle *handle_end;
4257 	struct mlx5_priv *priv = dev->data->dev_private;
4258 	struct mlx5_shared_action_rss *shared_rss;
4259 
4260 
4261 	for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4262 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4263 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4264 		uint32_t idx = act_idx &
4265 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4266 		switch (type) {
4267 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4268 			shared_rss = mlx5_ipool_get
4269 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4270 									   idx);
4271 			__atomic_add_fetch(&shared_rss->refcnt, 1,
4272 					   __ATOMIC_RELAXED);
4273 			return idx;
4274 		default:
4275 			break;
4276 		}
4277 	}
4278 	return 0;
4279 }
4280 
4281 static unsigned int
4282 find_graph_root(uint32_t rss_level)
4283 {
4284 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4285 			       MLX5_EXPANSION_ROOT_OUTER;
4286 }
4287 
4288 /**
4289  *  Get layer flags from the prefix flow.
4290  *
4291  *  Some flows may be split to several subflows, the prefix subflow gets the
4292  *  match items and the suffix sub flow gets the actions.
4293  *  Some actions need the user defined match item flags to get the detail for
4294  *  the action.
4295  *  This function helps the suffix flow to get the item layer flags from prefix
4296  *  subflow.
4297  *
4298  * @param[in] dev_flow
4299  *   Pointer the created prefix subflow.
4300  *
4301  * @return
4302  *   The layers get from prefix subflow.
4303  */
4304 static inline uint64_t
4305 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4306 {
4307 	uint64_t layers = 0;
4308 
4309 	/*
4310 	 * Layers bits could be localization, but usually the compiler will
4311 	 * help to do the optimization work for source code.
4312 	 * If no decap actions, use the layers directly.
4313 	 */
4314 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4315 		return dev_flow->handle->layers;
4316 	/* Convert L3 layers with decap action. */
4317 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4318 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4319 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4320 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4321 	/* Convert L4 layers with decap action.  */
4322 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4323 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4324 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4325 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4326 	return layers;
4327 }
4328 
4329 /**
4330  * Get metadata split action information.
4331  *
4332  * @param[in] actions
4333  *   Pointer to the list of actions.
4334  * @param[out] qrss
4335  *   Pointer to the return pointer.
4336  * @param[out] qrss_type
4337  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4338  *   if no QUEUE/RSS is found.
4339  * @param[out] encap_idx
4340  *   Pointer to the index of the encap action if exists, otherwise the last
4341  *   action index.
4342  *
4343  * @return
4344  *   Total number of actions.
4345  */
4346 static int
4347 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4348 				       const struct rte_flow_action **qrss,
4349 				       int *encap_idx)
4350 {
4351 	const struct rte_flow_action_raw_encap *raw_encap;
4352 	int actions_n = 0;
4353 	int raw_decap_idx = -1;
4354 
4355 	*encap_idx = -1;
4356 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4357 		switch (actions->type) {
4358 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4359 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4360 			*encap_idx = actions_n;
4361 			break;
4362 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4363 			raw_decap_idx = actions_n;
4364 			break;
4365 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4366 			raw_encap = actions->conf;
4367 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4368 				*encap_idx = raw_decap_idx != -1 ?
4369 						      raw_decap_idx : actions_n;
4370 			break;
4371 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4372 		case RTE_FLOW_ACTION_TYPE_RSS:
4373 			*qrss = actions;
4374 			break;
4375 		default:
4376 			break;
4377 		}
4378 		actions_n++;
4379 	}
4380 	if (*encap_idx == -1)
4381 		*encap_idx = actions_n;
4382 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4383 	return actions_n + 1;
4384 }
4385 
4386 /**
4387  * Check if the action will change packet.
4388  *
4389  * @param dev
4390  *   Pointer to Ethernet device.
4391  * @param[in] type
4392  *   action type.
4393  *
4394  * @return
4395  *   true if action will change packet, false otherwise.
4396  */
4397 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4398 					  enum rte_flow_action_type type)
4399 {
4400 	struct mlx5_priv *priv = dev->data->dev_private;
4401 
4402 	switch (type) {
4403 	case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4404 	case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4405 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4406 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4407 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4408 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4409 	case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4410 	case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4411 	case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4412 	case RTE_FLOW_ACTION_TYPE_SET_TTL:
4413 	case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4414 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4415 	case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4416 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4417 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4418 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4419 	case RTE_FLOW_ACTION_TYPE_SET_META:
4420 	case RTE_FLOW_ACTION_TYPE_SET_TAG:
4421 	case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4422 	case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4423 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4424 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4425 	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4426 	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4427 	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4428 	case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4429 	case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4430 	case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4431 	case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4432 		return true;
4433 	case RTE_FLOW_ACTION_TYPE_FLAG:
4434 	case RTE_FLOW_ACTION_TYPE_MARK:
4435 		if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4436 			return true;
4437 		else
4438 			return false;
4439 	default:
4440 		return false;
4441 	}
4442 }
4443 
4444 /**
4445  * Check meter action from the action list.
4446  *
4447  * @param dev
4448  *   Pointer to Ethernet device.
4449  * @param[in] actions
4450  *   Pointer to the list of actions.
4451  * @param[out] has_mtr
4452  *   Pointer to the meter exist flag.
4453  * @param[out] has_modify
4454  *   Pointer to the flag showing there's packet change action.
4455  * @param[out] meter_id
4456  *   Pointer to the meter id.
4457  *
4458  * @return
4459  *   Total number of actions.
4460  */
4461 static int
4462 flow_check_meter_action(struct rte_eth_dev *dev,
4463 			const struct rte_flow_action actions[],
4464 			bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4465 {
4466 	const struct rte_flow_action_meter *mtr = NULL;
4467 	int actions_n = 0;
4468 
4469 	MLX5_ASSERT(has_mtr);
4470 	*has_mtr = false;
4471 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4472 		switch (actions->type) {
4473 		case RTE_FLOW_ACTION_TYPE_METER:
4474 			mtr = actions->conf;
4475 			*meter_id = mtr->mtr_id;
4476 			*has_mtr = true;
4477 			break;
4478 		default:
4479 			break;
4480 		}
4481 		if (!*has_mtr)
4482 			*has_modify |= flow_check_modify_action_type(dev,
4483 								actions->type);
4484 		actions_n++;
4485 	}
4486 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4487 	return actions_n + 1;
4488 }
4489 
4490 /**
4491  * Check if the flow should be split due to hairpin.
4492  * The reason for the split is that in current HW we can't
4493  * support encap and push-vlan on Rx, so if a flow contains
4494  * these actions we move it to Tx.
4495  *
4496  * @param dev
4497  *   Pointer to Ethernet device.
4498  * @param[in] attr
4499  *   Flow rule attributes.
4500  * @param[in] actions
4501  *   Associated actions (list terminated by the END action).
4502  *
4503  * @return
4504  *   > 0 the number of actions and the flow should be split,
4505  *   0 when no split required.
4506  */
4507 static int
4508 flow_check_hairpin_split(struct rte_eth_dev *dev,
4509 			 const struct rte_flow_attr *attr,
4510 			 const struct rte_flow_action actions[])
4511 {
4512 	int queue_action = 0;
4513 	int action_n = 0;
4514 	int split = 0;
4515 	const struct rte_flow_action_queue *queue;
4516 	const struct rte_flow_action_rss *rss;
4517 	const struct rte_flow_action_raw_encap *raw_encap;
4518 	const struct rte_eth_hairpin_conf *conf;
4519 
4520 	if (!attr->ingress)
4521 		return 0;
4522 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4523 		switch (actions->type) {
4524 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4525 			queue = actions->conf;
4526 			if (queue == NULL)
4527 				return 0;
4528 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4529 			if (conf == NULL || conf->tx_explicit != 0)
4530 				return 0;
4531 			queue_action = 1;
4532 			action_n++;
4533 			break;
4534 		case RTE_FLOW_ACTION_TYPE_RSS:
4535 			rss = actions->conf;
4536 			if (rss == NULL || rss->queue_num == 0)
4537 				return 0;
4538 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4539 			if (conf == NULL || conf->tx_explicit != 0)
4540 				return 0;
4541 			queue_action = 1;
4542 			action_n++;
4543 			break;
4544 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4545 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4546 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4547 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4548 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4549 			split++;
4550 			action_n++;
4551 			break;
4552 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4553 			raw_encap = actions->conf;
4554 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4555 				split++;
4556 			action_n++;
4557 			break;
4558 		default:
4559 			action_n++;
4560 			break;
4561 		}
4562 	}
4563 	if (split && queue_action)
4564 		return action_n;
4565 	return 0;
4566 }
4567 
4568 /* Declare flow create/destroy prototype in advance. */
4569 static uint32_t
4570 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4571 		 const struct rte_flow_attr *attr,
4572 		 const struct rte_flow_item items[],
4573 		 const struct rte_flow_action actions[],
4574 		 bool external, struct rte_flow_error *error);
4575 
4576 static void
4577 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4578 		  uint32_t flow_idx);
4579 
4580 int
4581 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4582 		      struct mlx5_list_entry *entry, void *cb_ctx)
4583 {
4584 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4585 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4586 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4587 
4588 	return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4589 }
4590 
4591 struct mlx5_list_entry *
4592 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4593 {
4594 	struct rte_eth_dev *dev = tool_ctx;
4595 	struct mlx5_priv *priv = dev->data->dev_private;
4596 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4597 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4598 	struct rte_flow_error *error = ctx->error;
4599 	uint32_t idx = 0;
4600 	int ret;
4601 	uint32_t mark_id = *(uint32_t *)(ctx->data);
4602 	struct rte_flow_attr attr = {
4603 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4604 		.ingress = 1,
4605 	};
4606 	struct mlx5_rte_flow_item_tag tag_spec = {
4607 		.data = mark_id,
4608 	};
4609 	struct rte_flow_item items[] = {
4610 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4611 	};
4612 	struct rte_flow_action_mark ftag = {
4613 		.id = mark_id,
4614 	};
4615 	struct mlx5_flow_action_copy_mreg cp_mreg = {
4616 		.dst = REG_B,
4617 		.src = REG_NON,
4618 	};
4619 	struct rte_flow_action_jump jump = {
4620 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4621 	};
4622 	struct rte_flow_action actions[] = {
4623 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4624 	};
4625 
4626 	/* Fill the register fields in the flow. */
4627 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4628 	if (ret < 0)
4629 		return NULL;
4630 	tag_spec.id = ret;
4631 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4632 	if (ret < 0)
4633 		return NULL;
4634 	cp_mreg.src = ret;
4635 	/* Provide the full width of FLAG specific value. */
4636 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4637 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4638 	/* Build a new flow. */
4639 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
4640 		items[0] = (struct rte_flow_item){
4641 			.type = (enum rte_flow_item_type)
4642 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4643 			.spec = &tag_spec,
4644 		};
4645 		items[1] = (struct rte_flow_item){
4646 			.type = RTE_FLOW_ITEM_TYPE_END,
4647 		};
4648 		actions[0] = (struct rte_flow_action){
4649 			.type = (enum rte_flow_action_type)
4650 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4651 			.conf = &ftag,
4652 		};
4653 		actions[1] = (struct rte_flow_action){
4654 			.type = (enum rte_flow_action_type)
4655 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4656 			.conf = &cp_mreg,
4657 		};
4658 		actions[2] = (struct rte_flow_action){
4659 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4660 			.conf = &jump,
4661 		};
4662 		actions[3] = (struct rte_flow_action){
4663 			.type = RTE_FLOW_ACTION_TYPE_END,
4664 		};
4665 	} else {
4666 		/* Default rule, wildcard match. */
4667 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4668 		items[0] = (struct rte_flow_item){
4669 			.type = RTE_FLOW_ITEM_TYPE_END,
4670 		};
4671 		actions[0] = (struct rte_flow_action){
4672 			.type = (enum rte_flow_action_type)
4673 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4674 			.conf = &cp_mreg,
4675 		};
4676 		actions[1] = (struct rte_flow_action){
4677 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4678 			.conf = &jump,
4679 		};
4680 		actions[2] = (struct rte_flow_action){
4681 			.type = RTE_FLOW_ACTION_TYPE_END,
4682 		};
4683 	}
4684 	/* Build a new entry. */
4685 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4686 	if (!mcp_res) {
4687 		rte_errno = ENOMEM;
4688 		return NULL;
4689 	}
4690 	mcp_res->idx = idx;
4691 	mcp_res->mark_id = mark_id;
4692 	/*
4693 	 * The copy Flows are not included in any list. There
4694 	 * ones are referenced from other Flows and can not
4695 	 * be applied, removed, deleted in arbitrary order
4696 	 * by list traversing.
4697 	 */
4698 	mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4699 					&attr, items, actions, false, error);
4700 	if (!mcp_res->rix_flow) {
4701 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4702 		return NULL;
4703 	}
4704 	return &mcp_res->hlist_ent;
4705 }
4706 
4707 struct mlx5_list_entry *
4708 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4709 		      void *cb_ctx __rte_unused)
4710 {
4711 	struct rte_eth_dev *dev = tool_ctx;
4712 	struct mlx5_priv *priv = dev->data->dev_private;
4713 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4714 	uint32_t idx = 0;
4715 
4716 	mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4717 	if (!mcp_res) {
4718 		rte_errno = ENOMEM;
4719 		return NULL;
4720 	}
4721 	memcpy(mcp_res, oentry, sizeof(*mcp_res));
4722 	mcp_res->idx = idx;
4723 	return &mcp_res->hlist_ent;
4724 }
4725 
4726 void
4727 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4728 {
4729 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4730 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4731 	struct rte_eth_dev *dev = tool_ctx;
4732 	struct mlx5_priv *priv = dev->data->dev_private;
4733 
4734 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4735 }
4736 
4737 /**
4738  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4739  *
4740  * As mark_id is unique, if there's already a registered flow for the mark_id,
4741  * return by increasing the reference counter of the resource. Otherwise, create
4742  * the resource (mcp_res) and flow.
4743  *
4744  * Flow looks like,
4745  *   - If ingress port is ANY and reg_c[1] is mark_id,
4746  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4747  *
4748  * For default flow (zero mark_id), flow is like,
4749  *   - If ingress port is ANY,
4750  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4751  *
4752  * @param dev
4753  *   Pointer to Ethernet device.
4754  * @param mark_id
4755  *   ID of MARK action, zero means default flow for META.
4756  * @param[out] error
4757  *   Perform verbose error reporting if not NULL.
4758  *
4759  * @return
4760  *   Associated resource on success, NULL otherwise and rte_errno is set.
4761  */
4762 static struct mlx5_flow_mreg_copy_resource *
4763 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4764 			  struct rte_flow_error *error)
4765 {
4766 	struct mlx5_priv *priv = dev->data->dev_private;
4767 	struct mlx5_list_entry *entry;
4768 	struct mlx5_flow_cb_ctx ctx = {
4769 		.dev = dev,
4770 		.error = error,
4771 		.data = &mark_id,
4772 	};
4773 
4774 	/* Check if already registered. */
4775 	MLX5_ASSERT(priv->mreg_cp_tbl);
4776 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4777 	if (!entry)
4778 		return NULL;
4779 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4780 			    hlist_ent);
4781 }
4782 
4783 void
4784 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4785 {
4786 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4787 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4788 	struct rte_eth_dev *dev = tool_ctx;
4789 	struct mlx5_priv *priv = dev->data->dev_private;
4790 
4791 	MLX5_ASSERT(mcp_res->rix_flow);
4792 	flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4793 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4794 }
4795 
4796 /**
4797  * Release flow in RX_CP_TBL.
4798  *
4799  * @param dev
4800  *   Pointer to Ethernet device.
4801  * @flow
4802  *   Parent flow for wich copying is provided.
4803  */
4804 static void
4805 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4806 			  struct rte_flow *flow)
4807 {
4808 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4809 	struct mlx5_priv *priv = dev->data->dev_private;
4810 
4811 	if (!flow->rix_mreg_copy)
4812 		return;
4813 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4814 				 flow->rix_mreg_copy);
4815 	if (!mcp_res || !priv->mreg_cp_tbl)
4816 		return;
4817 	MLX5_ASSERT(mcp_res->rix_flow);
4818 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4819 	flow->rix_mreg_copy = 0;
4820 }
4821 
4822 /**
4823  * Remove the default copy action from RX_CP_TBL.
4824  *
4825  * This functions is called in the mlx5_dev_start(). No thread safe
4826  * is guaranteed.
4827  *
4828  * @param dev
4829  *   Pointer to Ethernet device.
4830  */
4831 static void
4832 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4833 {
4834 	struct mlx5_list_entry *entry;
4835 	struct mlx5_priv *priv = dev->data->dev_private;
4836 	struct mlx5_flow_cb_ctx ctx;
4837 	uint32_t mark_id;
4838 
4839 	/* Check if default flow is registered. */
4840 	if (!priv->mreg_cp_tbl)
4841 		return;
4842 	mark_id = MLX5_DEFAULT_COPY_ID;
4843 	ctx.data = &mark_id;
4844 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4845 	if (!entry)
4846 		return;
4847 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4848 }
4849 
4850 /**
4851  * Add the default copy action in in RX_CP_TBL.
4852  *
4853  * This functions is called in the mlx5_dev_start(). No thread safe
4854  * is guaranteed.
4855  *
4856  * @param dev
4857  *   Pointer to Ethernet device.
4858  * @param[out] error
4859  *   Perform verbose error reporting if not NULL.
4860  *
4861  * @return
4862  *   0 for success, negative value otherwise and rte_errno is set.
4863  */
4864 static int
4865 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4866 				  struct rte_flow_error *error)
4867 {
4868 	struct mlx5_priv *priv = dev->data->dev_private;
4869 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4870 	struct mlx5_flow_cb_ctx ctx;
4871 	uint32_t mark_id;
4872 
4873 	/* Check whether extensive metadata feature is engaged. */
4874 	if (!priv->sh->config.dv_flow_en ||
4875 	    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4876 	    !mlx5_flow_ext_mreg_supported(dev) ||
4877 	    !priv->sh->dv_regc0_mask)
4878 		return 0;
4879 	/*
4880 	 * Add default mreg copy flow may be called multiple time, but
4881 	 * only be called once in stop. Avoid register it twice.
4882 	 */
4883 	mark_id = MLX5_DEFAULT_COPY_ID;
4884 	ctx.data = &mark_id;
4885 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4886 		return 0;
4887 	mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4888 	if (!mcp_res)
4889 		return -rte_errno;
4890 	return 0;
4891 }
4892 
4893 /**
4894  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4895  *
4896  * All the flow having Q/RSS action should be split by
4897  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4898  * performs the following,
4899  *   - CQE->flow_tag := reg_c[1] (MARK)
4900  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4901  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4902  * but there should be a flow per each MARK ID set by MARK action.
4903  *
4904  * For the aforementioned reason, if there's a MARK action in flow's action
4905  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4906  * the MARK ID to CQE's flow_tag like,
4907  *   - If reg_c[1] is mark_id,
4908  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4909  *
4910  * For SET_META action which stores value in reg_c[0], as the destination is
4911  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4912  * MARK ID means the default flow. The default flow looks like,
4913  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4914  *
4915  * @param dev
4916  *   Pointer to Ethernet device.
4917  * @param flow
4918  *   Pointer to flow structure.
4919  * @param[in] actions
4920  *   Pointer to the list of actions.
4921  * @param[out] error
4922  *   Perform verbose error reporting if not NULL.
4923  *
4924  * @return
4925  *   0 on success, negative value otherwise and rte_errno is set.
4926  */
4927 static int
4928 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4929 			    struct rte_flow *flow,
4930 			    const struct rte_flow_action *actions,
4931 			    struct rte_flow_error *error)
4932 {
4933 	struct mlx5_priv *priv = dev->data->dev_private;
4934 	struct mlx5_sh_config *config = &priv->sh->config;
4935 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4936 	const struct rte_flow_action_mark *mark;
4937 
4938 	/* Check whether extensive metadata feature is engaged. */
4939 	if (!config->dv_flow_en ||
4940 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4941 	    !mlx5_flow_ext_mreg_supported(dev) ||
4942 	    !priv->sh->dv_regc0_mask)
4943 		return 0;
4944 	/* Find MARK action. */
4945 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4946 		switch (actions->type) {
4947 		case RTE_FLOW_ACTION_TYPE_FLAG:
4948 			mcp_res = flow_mreg_add_copy_action
4949 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
4950 			if (!mcp_res)
4951 				return -rte_errno;
4952 			flow->rix_mreg_copy = mcp_res->idx;
4953 			return 0;
4954 		case RTE_FLOW_ACTION_TYPE_MARK:
4955 			mark = (const struct rte_flow_action_mark *)
4956 				actions->conf;
4957 			mcp_res =
4958 				flow_mreg_add_copy_action(dev, mark->id, error);
4959 			if (!mcp_res)
4960 				return -rte_errno;
4961 			flow->rix_mreg_copy = mcp_res->idx;
4962 			return 0;
4963 		default:
4964 			break;
4965 		}
4966 	}
4967 	return 0;
4968 }
4969 
4970 #define MLX5_MAX_SPLIT_ACTIONS 24
4971 #define MLX5_MAX_SPLIT_ITEMS 24
4972 
4973 /**
4974  * Split the hairpin flow.
4975  * Since HW can't support encap and push-vlan on Rx, we move these
4976  * actions to Tx.
4977  * If the count action is after the encap then we also
4978  * move the count action. in this case the count will also measure
4979  * the outer bytes.
4980  *
4981  * @param dev
4982  *   Pointer to Ethernet device.
4983  * @param[in] actions
4984  *   Associated actions (list terminated by the END action).
4985  * @param[out] actions_rx
4986  *   Rx flow actions.
4987  * @param[out] actions_tx
4988  *   Tx flow actions..
4989  * @param[out] pattern_tx
4990  *   The pattern items for the Tx flow.
4991  * @param[out] flow_id
4992  *   The flow ID connected to this flow.
4993  *
4994  * @return
4995  *   0 on success.
4996  */
4997 static int
4998 flow_hairpin_split(struct rte_eth_dev *dev,
4999 		   const struct rte_flow_action actions[],
5000 		   struct rte_flow_action actions_rx[],
5001 		   struct rte_flow_action actions_tx[],
5002 		   struct rte_flow_item pattern_tx[],
5003 		   uint32_t flow_id)
5004 {
5005 	const struct rte_flow_action_raw_encap *raw_encap;
5006 	const struct rte_flow_action_raw_decap *raw_decap;
5007 	struct mlx5_rte_flow_action_set_tag *set_tag;
5008 	struct rte_flow_action *tag_action;
5009 	struct mlx5_rte_flow_item_tag *tag_item;
5010 	struct rte_flow_item *item;
5011 	char *addr;
5012 	int encap = 0;
5013 
5014 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5015 		switch (actions->type) {
5016 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5017 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5018 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5019 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5020 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5021 			rte_memcpy(actions_tx, actions,
5022 			       sizeof(struct rte_flow_action));
5023 			actions_tx++;
5024 			break;
5025 		case RTE_FLOW_ACTION_TYPE_COUNT:
5026 			if (encap) {
5027 				rte_memcpy(actions_tx, actions,
5028 					   sizeof(struct rte_flow_action));
5029 				actions_tx++;
5030 			} else {
5031 				rte_memcpy(actions_rx, actions,
5032 					   sizeof(struct rte_flow_action));
5033 				actions_rx++;
5034 			}
5035 			break;
5036 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5037 			raw_encap = actions->conf;
5038 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
5039 				memcpy(actions_tx, actions,
5040 				       sizeof(struct rte_flow_action));
5041 				actions_tx++;
5042 				encap = 1;
5043 			} else {
5044 				rte_memcpy(actions_rx, actions,
5045 					   sizeof(struct rte_flow_action));
5046 				actions_rx++;
5047 			}
5048 			break;
5049 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5050 			raw_decap = actions->conf;
5051 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
5052 				memcpy(actions_tx, actions,
5053 				       sizeof(struct rte_flow_action));
5054 				actions_tx++;
5055 			} else {
5056 				rte_memcpy(actions_rx, actions,
5057 					   sizeof(struct rte_flow_action));
5058 				actions_rx++;
5059 			}
5060 			break;
5061 		default:
5062 			rte_memcpy(actions_rx, actions,
5063 				   sizeof(struct rte_flow_action));
5064 			actions_rx++;
5065 			break;
5066 		}
5067 	}
5068 	/* Add set meta action and end action for the Rx flow. */
5069 	tag_action = actions_rx;
5070 	tag_action->type = (enum rte_flow_action_type)
5071 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5072 	actions_rx++;
5073 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
5074 	actions_rx++;
5075 	set_tag = (void *)actions_rx;
5076 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5077 		.id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
5078 		.data = flow_id,
5079 	};
5080 	MLX5_ASSERT(set_tag->id > REG_NON);
5081 	tag_action->conf = set_tag;
5082 	/* Create Tx item list. */
5083 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5084 	addr = (void *)&pattern_tx[2];
5085 	item = pattern_tx;
5086 	item->type = (enum rte_flow_item_type)
5087 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5088 	tag_item = (void *)addr;
5089 	tag_item->data = flow_id;
5090 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5091 	MLX5_ASSERT(set_tag->id > REG_NON);
5092 	item->spec = tag_item;
5093 	addr += sizeof(struct mlx5_rte_flow_item_tag);
5094 	tag_item = (void *)addr;
5095 	tag_item->data = UINT32_MAX;
5096 	tag_item->id = UINT16_MAX;
5097 	item->mask = tag_item;
5098 	item->last = NULL;
5099 	item++;
5100 	item->type = RTE_FLOW_ITEM_TYPE_END;
5101 	return 0;
5102 }
5103 
5104 /**
5105  * The last stage of splitting chain, just creates the subflow
5106  * without any modification.
5107  *
5108  * @param[in] dev
5109  *   Pointer to Ethernet device.
5110  * @param[in] flow
5111  *   Parent flow structure pointer.
5112  * @param[in, out] sub_flow
5113  *   Pointer to return the created subflow, may be NULL.
5114  * @param[in] attr
5115  *   Flow rule attributes.
5116  * @param[in] items
5117  *   Pattern specification (list terminated by the END pattern item).
5118  * @param[in] actions
5119  *   Associated actions (list terminated by the END action).
5120  * @param[in] flow_split_info
5121  *   Pointer to flow split info structure.
5122  * @param[out] error
5123  *   Perform verbose error reporting if not NULL.
5124  * @return
5125  *   0 on success, negative value otherwise
5126  */
5127 static int
5128 flow_create_split_inner(struct rte_eth_dev *dev,
5129 			struct rte_flow *flow,
5130 			struct mlx5_flow **sub_flow,
5131 			const struct rte_flow_attr *attr,
5132 			const struct rte_flow_item items[],
5133 			const struct rte_flow_action actions[],
5134 			struct mlx5_flow_split_info *flow_split_info,
5135 			struct rte_flow_error *error)
5136 {
5137 	struct mlx5_flow *dev_flow;
5138 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5139 
5140 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5141 				    flow_split_info->flow_idx, error);
5142 	if (!dev_flow)
5143 		return -rte_errno;
5144 	dev_flow->flow = flow;
5145 	dev_flow->external = flow_split_info->external;
5146 	dev_flow->skip_scale = flow_split_info->skip_scale;
5147 	/* Subflow object was created, we must include one in the list. */
5148 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5149 		      dev_flow->handle, next);
5150 	/*
5151 	 * If dev_flow is as one of the suffix flow, some actions in suffix
5152 	 * flow may need some user defined item layer flags, and pass the
5153 	 * Metadata rxq mark flag to suffix flow as well.
5154 	 */
5155 	if (flow_split_info->prefix_layers)
5156 		dev_flow->handle->layers = flow_split_info->prefix_layers;
5157 	if (flow_split_info->prefix_mark) {
5158 		MLX5_ASSERT(wks);
5159 		wks->mark = 1;
5160 	}
5161 	if (sub_flow)
5162 		*sub_flow = dev_flow;
5163 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5164 	dev_flow->dv.table_id = flow_split_info->table_id;
5165 #endif
5166 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5167 }
5168 
5169 /**
5170  * Get the sub policy of a meter.
5171  *
5172  * @param[in] dev
5173  *   Pointer to Ethernet device.
5174  * @param[in] flow
5175  *   Parent flow structure pointer.
5176  * @param wks
5177  *   Pointer to thread flow work space.
5178  * @param[in] attr
5179  *   Flow rule attributes.
5180  * @param[in] items
5181  *   Pattern specification (list terminated by the END pattern item).
5182  * @param[out] error
5183  *   Perform verbose error reporting if not NULL.
5184  *
5185  * @return
5186  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5187  */
5188 static struct mlx5_flow_meter_sub_policy *
5189 get_meter_sub_policy(struct rte_eth_dev *dev,
5190 		     struct rte_flow *flow,
5191 		     struct mlx5_flow_workspace *wks,
5192 		     const struct rte_flow_attr *attr,
5193 		     const struct rte_flow_item items[],
5194 		     struct rte_flow_error *error)
5195 {
5196 	struct mlx5_flow_meter_policy *policy;
5197 	struct mlx5_flow_meter_policy *final_policy;
5198 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5199 
5200 	policy = wks->policy;
5201 	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5202 	if (final_policy->is_rss || final_policy->is_queue) {
5203 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5204 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5205 		uint32_t i;
5206 
5207 		/*
5208 		 * This is a tmp dev_flow,
5209 		 * no need to register any matcher for it in translate.
5210 		 */
5211 		wks->skip_matcher_reg = 1;
5212 		for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5213 			struct mlx5_flow dev_flow = {0};
5214 			struct mlx5_flow_handle dev_handle = { {0} };
5215 			uint8_t fate = final_policy->act_cnt[i].fate_action;
5216 
5217 			if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5218 				const struct rte_flow_action_rss *rss_act =
5219 					final_policy->act_cnt[i].rss->conf;
5220 				struct rte_flow_action rss_actions[2] = {
5221 					[0] = {
5222 					.type = RTE_FLOW_ACTION_TYPE_RSS,
5223 					.conf = rss_act,
5224 					},
5225 					[1] = {
5226 					.type = RTE_FLOW_ACTION_TYPE_END,
5227 					.conf = NULL,
5228 					}
5229 				};
5230 
5231 				dev_flow.handle = &dev_handle;
5232 				dev_flow.ingress = attr->ingress;
5233 				dev_flow.flow = flow;
5234 				dev_flow.external = 0;
5235 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5236 				dev_flow.dv.transfer = attr->transfer;
5237 #endif
5238 				/**
5239 				 * Translate RSS action to get rss hash fields.
5240 				 */
5241 				if (flow_drv_translate(dev, &dev_flow, attr,
5242 						items, rss_actions, error))
5243 					goto exit;
5244 				rss_desc_v[i] = wks->rss_desc;
5245 				rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5246 				rss_desc_v[i].hash_fields =
5247 						dev_flow.hash_fields;
5248 				rss_desc_v[i].queue_num =
5249 						rss_desc_v[i].hash_fields ?
5250 						rss_desc_v[i].queue_num : 1;
5251 				rss_desc_v[i].tunnel =
5252 						!!(dev_flow.handle->layers &
5253 						   MLX5_FLOW_LAYER_TUNNEL);
5254 				/* Use the RSS queues in the containers. */
5255 				rss_desc_v[i].queue =
5256 					(uint16_t *)(uintptr_t)rss_act->queue;
5257 				rss_desc[i] = &rss_desc_v[i];
5258 			} else if (fate == MLX5_FLOW_FATE_QUEUE) {
5259 				/* This is queue action. */
5260 				rss_desc_v[i] = wks->rss_desc;
5261 				rss_desc_v[i].key_len = 0;
5262 				rss_desc_v[i].hash_fields = 0;
5263 				rss_desc_v[i].queue =
5264 					&final_policy->act_cnt[i].queue;
5265 				rss_desc_v[i].queue_num = 1;
5266 				rss_desc[i] = &rss_desc_v[i];
5267 			} else {
5268 				rss_desc[i] = NULL;
5269 			}
5270 		}
5271 		sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5272 						flow, policy, rss_desc);
5273 	} else {
5274 		enum mlx5_meter_domain mtr_domain =
5275 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5276 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5277 						MLX5_MTR_DOMAIN_INGRESS);
5278 		sub_policy = policy->sub_policys[mtr_domain][0];
5279 	}
5280 	if (!sub_policy)
5281 		rte_flow_error_set(error, EINVAL,
5282 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5283 				   "Failed to get meter sub-policy.");
5284 exit:
5285 	return sub_policy;
5286 }
5287 
5288 /**
5289  * Split the meter flow.
5290  *
5291  * As meter flow will split to three sub flow, other than meter
5292  * action, the other actions make sense to only meter accepts
5293  * the packet. If it need to be dropped, no other additional
5294  * actions should be take.
5295  *
5296  * One kind of special action which decapsulates the L3 tunnel
5297  * header will be in the prefix sub flow, as not to take the
5298  * L3 tunnel header into account.
5299  *
5300  * @param[in] dev
5301  *   Pointer to Ethernet device.
5302  * @param[in] flow
5303  *   Parent flow structure pointer.
5304  * @param wks
5305  *   Pointer to thread flow work space.
5306  * @param[in] attr
5307  *   Flow rule attributes.
5308  * @param[in] items
5309  *   Pattern specification (list terminated by the END pattern item).
5310  * @param[out] sfx_items
5311  *   Suffix flow match items (list terminated by the END pattern item).
5312  * @param[in] actions
5313  *   Associated actions (list terminated by the END action).
5314  * @param[out] actions_sfx
5315  *   Suffix flow actions.
5316  * @param[out] actions_pre
5317  *   Prefix flow actions.
5318  * @param[out] mtr_flow_id
5319  *   Pointer to meter flow id.
5320  * @param[out] error
5321  *   Perform verbose error reporting if not NULL.
5322  *
5323  * @return
5324  *   0 on success, a negative errno value otherwise and rte_errno is set.
5325  */
5326 static int
5327 flow_meter_split_prep(struct rte_eth_dev *dev,
5328 		      struct rte_flow *flow,
5329 		      struct mlx5_flow_workspace *wks,
5330 		      const struct rte_flow_attr *attr,
5331 		      const struct rte_flow_item items[],
5332 		      struct rte_flow_item sfx_items[],
5333 		      const struct rte_flow_action actions[],
5334 		      struct rte_flow_action actions_sfx[],
5335 		      struct rte_flow_action actions_pre[],
5336 		      uint32_t *mtr_flow_id,
5337 		      struct rte_flow_error *error)
5338 {
5339 	struct mlx5_priv *priv = dev->data->dev_private;
5340 	struct mlx5_flow_meter_info *fm = wks->fm;
5341 	struct rte_flow_action *tag_action = NULL;
5342 	struct rte_flow_item *tag_item;
5343 	struct mlx5_rte_flow_action_set_tag *set_tag;
5344 	const struct rte_flow_action_raw_encap *raw_encap;
5345 	const struct rte_flow_action_raw_decap *raw_decap;
5346 	struct mlx5_rte_flow_item_tag *tag_item_spec;
5347 	struct mlx5_rte_flow_item_tag *tag_item_mask;
5348 	uint32_t tag_id = 0;
5349 	struct rte_flow_item *vlan_item_dst = NULL;
5350 	const struct rte_flow_item *vlan_item_src = NULL;
5351 	const struct rte_flow_item *orig_items = items;
5352 	struct rte_flow_action *hw_mtr_action;
5353 	struct rte_flow_action *action_pre_head = NULL;
5354 	int32_t flow_src_port = priv->representor_id;
5355 	bool mtr_first;
5356 	uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5357 	uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5358 				MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5359 	uint32_t flow_id = 0;
5360 	uint32_t flow_id_reversed = 0;
5361 	uint8_t flow_id_bits = 0;
5362 	bool after_meter = false;
5363 	int shift;
5364 
5365 	/* Prepare the suffix subflow items. */
5366 	tag_item = sfx_items++;
5367 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5368 		struct mlx5_priv *port_priv;
5369 		const struct rte_flow_item_port_id *pid_v;
5370 		int item_type = items->type;
5371 
5372 		switch (item_type) {
5373 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
5374 			pid_v = items->spec;
5375 			MLX5_ASSERT(pid_v);
5376 			port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
5377 			if (!port_priv)
5378 				return rte_flow_error_set(error,
5379 						rte_errno,
5380 						RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
5381 						pid_v,
5382 						"Failed to get port info.");
5383 			flow_src_port = port_priv->representor_id;
5384 			if (!fm->def_policy && wks->policy->is_hierarchy &&
5385 			    flow_src_port != priv->representor_id) {
5386 				if (flow_drv_mtr_hierarchy_rule_create(dev,
5387 								flow, fm,
5388 								flow_src_port,
5389 								items,
5390 								error))
5391 					return -rte_errno;
5392 			}
5393 			memcpy(sfx_items, items, sizeof(*sfx_items));
5394 			sfx_items++;
5395 			break;
5396 		case RTE_FLOW_ITEM_TYPE_VLAN:
5397 			/* Determine if copy vlan item below. */
5398 			vlan_item_src = items;
5399 			vlan_item_dst = sfx_items++;
5400 			vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5401 			break;
5402 		default:
5403 			break;
5404 		}
5405 	}
5406 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5407 	sfx_items++;
5408 	mtr_first = priv->sh->meter_aso_en &&
5409 		(attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5410 	/* For ASO meter, meter must be before tag in TX direction. */
5411 	if (mtr_first) {
5412 		action_pre_head = actions_pre++;
5413 		/* Leave space for tag action. */
5414 		tag_action = actions_pre++;
5415 	}
5416 	/* Prepare the actions for prefix and suffix flow. */
5417 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5418 		struct rte_flow_action *action_cur = NULL;
5419 
5420 		switch (actions->type) {
5421 		case RTE_FLOW_ACTION_TYPE_METER:
5422 			if (mtr_first) {
5423 				action_cur = action_pre_head;
5424 			} else {
5425 				/* Leave space for tag action. */
5426 				tag_action = actions_pre++;
5427 				action_cur = actions_pre++;
5428 			}
5429 			after_meter = true;
5430 			break;
5431 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5432 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5433 			action_cur = actions_pre++;
5434 			break;
5435 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5436 			raw_encap = actions->conf;
5437 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5438 				action_cur = actions_pre++;
5439 			break;
5440 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5441 			raw_decap = actions->conf;
5442 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5443 				action_cur = actions_pre++;
5444 			break;
5445 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5446 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5447 			if (vlan_item_dst && vlan_item_src) {
5448 				memcpy(vlan_item_dst, vlan_item_src,
5449 					sizeof(*vlan_item_dst));
5450 				/*
5451 				 * Convert to internal match item, it is used
5452 				 * for vlan push and set vid.
5453 				 */
5454 				vlan_item_dst->type = (enum rte_flow_item_type)
5455 						MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5456 			}
5457 			break;
5458 		case RTE_FLOW_ACTION_TYPE_COUNT:
5459 			if (fm->def_policy)
5460 				action_cur = after_meter ?
5461 						actions_sfx++ : actions_pre++;
5462 			break;
5463 		default:
5464 			break;
5465 		}
5466 		if (!action_cur)
5467 			action_cur = (fm->def_policy) ?
5468 					actions_sfx++ : actions_pre++;
5469 		memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5470 	}
5471 	/* Add end action to the actions. */
5472 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5473 	if (priv->sh->meter_aso_en) {
5474 		/**
5475 		 * For ASO meter, need to add an extra jump action explicitly,
5476 		 * to jump from meter to policer table.
5477 		 */
5478 		struct mlx5_flow_meter_sub_policy *sub_policy;
5479 		struct mlx5_flow_tbl_data_entry *tbl_data;
5480 
5481 		if (!fm->def_policy) {
5482 			sub_policy = get_meter_sub_policy(dev, flow, wks,
5483 							  attr, orig_items,
5484 							  error);
5485 			if (!sub_policy)
5486 				return -rte_errno;
5487 		} else {
5488 			enum mlx5_meter_domain mtr_domain =
5489 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5490 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5491 						MLX5_MTR_DOMAIN_INGRESS);
5492 
5493 			sub_policy =
5494 			&priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5495 		}
5496 		tbl_data = container_of(sub_policy->tbl_rsc,
5497 					struct mlx5_flow_tbl_data_entry, tbl);
5498 		hw_mtr_action = actions_pre++;
5499 		hw_mtr_action->type = (enum rte_flow_action_type)
5500 				      MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5501 		hw_mtr_action->conf = tbl_data->jump.action;
5502 	}
5503 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5504 	actions_pre++;
5505 	if (!tag_action)
5506 		return rte_flow_error_set(error, ENOMEM,
5507 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5508 					  NULL, "No tag action space.");
5509 	if (!mtr_flow_id) {
5510 		tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5511 		goto exit;
5512 	}
5513 	/* Only default-policy Meter creates mtr flow id. */
5514 	if (fm->def_policy) {
5515 		mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5516 		if (!tag_id)
5517 			return rte_flow_error_set(error, ENOMEM,
5518 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5519 					"Failed to allocate meter flow id.");
5520 		flow_id = tag_id - 1;
5521 		flow_id_bits = (!flow_id) ? 1 :
5522 				(MLX5_REG_BITS - __builtin_clz(flow_id));
5523 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5524 		    mtr_reg_bits) {
5525 			mlx5_ipool_free(fm->flow_ipool, tag_id);
5526 			return rte_flow_error_set(error, EINVAL,
5527 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5528 					"Meter flow id exceeds max limit.");
5529 		}
5530 		if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5531 			priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5532 	}
5533 	/* Build tag actions and items for meter_id/meter flow_id. */
5534 	set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5535 	tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5536 	tag_item_mask = tag_item_spec + 1;
5537 	/* Both flow_id and meter_id share the same register. */
5538 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5539 		.id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5540 							    0, error),
5541 		.offset = mtr_id_offset,
5542 		.length = mtr_reg_bits,
5543 		.data = flow->meter,
5544 	};
5545 	/*
5546 	 * The color Reg bits used by flow_id are growing from
5547 	 * msb to lsb, so must do bit reverse for flow_id val in RegC.
5548 	 */
5549 	for (shift = 0; shift < flow_id_bits; shift++)
5550 		flow_id_reversed = (flow_id_reversed << 1) |
5551 				((flow_id >> shift) & 0x1);
5552 	set_tag->data |=
5553 		flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5554 	tag_item_spec->id = set_tag->id;
5555 	tag_item_spec->data = set_tag->data << mtr_id_offset;
5556 	tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5557 	tag_action->type = (enum rte_flow_action_type)
5558 				MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5559 	tag_action->conf = set_tag;
5560 	tag_item->type = (enum rte_flow_item_type)
5561 				MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5562 	tag_item->spec = tag_item_spec;
5563 	tag_item->last = NULL;
5564 	tag_item->mask = tag_item_mask;
5565 exit:
5566 	if (mtr_flow_id)
5567 		*mtr_flow_id = tag_id;
5568 	return 0;
5569 }
5570 
5571 /**
5572  * Split action list having QUEUE/RSS for metadata register copy.
5573  *
5574  * Once Q/RSS action is detected in user's action list, the flow action
5575  * should be split in order to copy metadata registers, which will happen in
5576  * RX_CP_TBL like,
5577  *   - CQE->flow_tag := reg_c[1] (MARK)
5578  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5579  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5580  * This is because the last action of each flow must be a terminal action
5581  * (QUEUE, RSS or DROP).
5582  *
5583  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5584  * stored and kept in the mlx5_flow structure per each sub_flow.
5585  *
5586  * The Q/RSS action is replaced with,
5587  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5588  * And the following JUMP action is added at the end,
5589  *   - JUMP, to RX_CP_TBL.
5590  *
5591  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5592  * flow_create_split_metadata() routine. The flow will look like,
5593  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5594  *
5595  * @param dev
5596  *   Pointer to Ethernet device.
5597  * @param[out] split_actions
5598  *   Pointer to store split actions to jump to CP_TBL.
5599  * @param[in] actions
5600  *   Pointer to the list of original flow actions.
5601  * @param[in] qrss
5602  *   Pointer to the Q/RSS action.
5603  * @param[in] actions_n
5604  *   Number of original actions.
5605  * @param[in] mtr_sfx
5606  *   Check if it is in meter suffix table.
5607  * @param[out] error
5608  *   Perform verbose error reporting if not NULL.
5609  *
5610  * @return
5611  *   non-zero unique flow_id on success, otherwise 0 and
5612  *   error/rte_error are set.
5613  */
5614 static uint32_t
5615 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5616 			  struct rte_flow_action *split_actions,
5617 			  const struct rte_flow_action *actions,
5618 			  const struct rte_flow_action *qrss,
5619 			  int actions_n, int mtr_sfx,
5620 			  struct rte_flow_error *error)
5621 {
5622 	struct mlx5_priv *priv = dev->data->dev_private;
5623 	struct mlx5_rte_flow_action_set_tag *set_tag;
5624 	struct rte_flow_action_jump *jump;
5625 	const int qrss_idx = qrss - actions;
5626 	uint32_t flow_id = 0;
5627 	int ret = 0;
5628 
5629 	/*
5630 	 * Given actions will be split
5631 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5632 	 * - Add jump to mreg CP_TBL.
5633 	 * As a result, there will be one more action.
5634 	 */
5635 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5636 	/* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5637 	++actions_n;
5638 	set_tag = (void *)(split_actions + actions_n);
5639 	/*
5640 	 * If we are not the meter suffix flow, add the tag action.
5641 	 * Since meter suffix flow already has the tag added.
5642 	 */
5643 	if (!mtr_sfx) {
5644 		/*
5645 		 * Allocate the new subflow ID. This one is unique within
5646 		 * device and not shared with representors. Otherwise,
5647 		 * we would have to resolve multi-thread access synch
5648 		 * issue. Each flow on the shared device is appended
5649 		 * with source vport identifier, so the resulting
5650 		 * flows will be unique in the shared (by master and
5651 		 * representors) domain even if they have coinciding
5652 		 * IDs.
5653 		 */
5654 		mlx5_ipool_malloc(priv->sh->ipool
5655 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5656 		if (!flow_id)
5657 			return rte_flow_error_set(error, ENOMEM,
5658 						  RTE_FLOW_ERROR_TYPE_ACTION,
5659 						  NULL, "can't allocate id "
5660 						  "for split Q/RSS subflow");
5661 		/* Internal SET_TAG action to set flow ID. */
5662 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
5663 			.data = flow_id,
5664 		};
5665 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5666 		if (ret < 0)
5667 			return ret;
5668 		set_tag->id = ret;
5669 		/* Construct new actions array. */
5670 		/* Replace QUEUE/RSS action. */
5671 		split_actions[qrss_idx] = (struct rte_flow_action){
5672 			.type = (enum rte_flow_action_type)
5673 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5674 			.conf = set_tag,
5675 		};
5676 	} else {
5677 		/*
5678 		 * If we are the suffix flow of meter, tag already exist.
5679 		 * Set the QUEUE/RSS action to void.
5680 		 */
5681 		split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5682 	}
5683 	/* JUMP action to jump to mreg copy table (CP_TBL). */
5684 	jump = (void *)(set_tag + 1);
5685 	*jump = (struct rte_flow_action_jump){
5686 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5687 	};
5688 	split_actions[actions_n - 2] = (struct rte_flow_action){
5689 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
5690 		.conf = jump,
5691 	};
5692 	split_actions[actions_n - 1] = (struct rte_flow_action){
5693 		.type = RTE_FLOW_ACTION_TYPE_END,
5694 	};
5695 	return flow_id;
5696 }
5697 
5698 /**
5699  * Extend the given action list for Tx metadata copy.
5700  *
5701  * Copy the given action list to the ext_actions and add flow metadata register
5702  * copy action in order to copy reg_a set by WQE to reg_c[0].
5703  *
5704  * @param[out] ext_actions
5705  *   Pointer to the extended action list.
5706  * @param[in] actions
5707  *   Pointer to the list of actions.
5708  * @param[in] actions_n
5709  *   Number of actions in the list.
5710  * @param[out] error
5711  *   Perform verbose error reporting if not NULL.
5712  * @param[in] encap_idx
5713  *   The encap action index.
5714  *
5715  * @return
5716  *   0 on success, negative value otherwise
5717  */
5718 static int
5719 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5720 		       struct rte_flow_action *ext_actions,
5721 		       const struct rte_flow_action *actions,
5722 		       int actions_n, struct rte_flow_error *error,
5723 		       int encap_idx)
5724 {
5725 	struct mlx5_flow_action_copy_mreg *cp_mreg =
5726 		(struct mlx5_flow_action_copy_mreg *)
5727 			(ext_actions + actions_n + 1);
5728 	int ret;
5729 
5730 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5731 	if (ret < 0)
5732 		return ret;
5733 	cp_mreg->dst = ret;
5734 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5735 	if (ret < 0)
5736 		return ret;
5737 	cp_mreg->src = ret;
5738 	if (encap_idx != 0)
5739 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5740 	if (encap_idx == actions_n - 1) {
5741 		ext_actions[actions_n - 1] = (struct rte_flow_action){
5742 			.type = (enum rte_flow_action_type)
5743 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5744 			.conf = cp_mreg,
5745 		};
5746 		ext_actions[actions_n] = (struct rte_flow_action){
5747 			.type = RTE_FLOW_ACTION_TYPE_END,
5748 		};
5749 	} else {
5750 		ext_actions[encap_idx] = (struct rte_flow_action){
5751 			.type = (enum rte_flow_action_type)
5752 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5753 			.conf = cp_mreg,
5754 		};
5755 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5756 				sizeof(*ext_actions) * (actions_n - encap_idx));
5757 	}
5758 	return 0;
5759 }
5760 
5761 /**
5762  * Check the match action from the action list.
5763  *
5764  * @param[in] actions
5765  *   Pointer to the list of actions.
5766  * @param[in] attr
5767  *   Flow rule attributes.
5768  * @param[in] action
5769  *   The action to be check if exist.
5770  * @param[out] match_action_pos
5771  *   Pointer to the position of the matched action if exists, otherwise is -1.
5772  * @param[out] qrss_action_pos
5773  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5774  * @param[out] modify_after_mirror
5775  *   Pointer to the flag of modify action after FDB mirroring.
5776  *
5777  * @return
5778  *   > 0 the total number of actions.
5779  *   0 if not found match action in action list.
5780  */
5781 static int
5782 flow_check_match_action(const struct rte_flow_action actions[],
5783 			const struct rte_flow_attr *attr,
5784 			enum rte_flow_action_type action,
5785 			int *match_action_pos, int *qrss_action_pos,
5786 			int *modify_after_mirror)
5787 {
5788 	const struct rte_flow_action_sample *sample;
5789 	const struct rte_flow_action_raw_decap *decap;
5790 	int actions_n = 0;
5791 	uint32_t ratio = 0;
5792 	int sub_type = 0;
5793 	int flag = 0;
5794 	int fdb_mirror = 0;
5795 
5796 	*match_action_pos = -1;
5797 	*qrss_action_pos = -1;
5798 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5799 		if (actions->type == action) {
5800 			flag = 1;
5801 			*match_action_pos = actions_n;
5802 		}
5803 		switch (actions->type) {
5804 		case RTE_FLOW_ACTION_TYPE_QUEUE:
5805 		case RTE_FLOW_ACTION_TYPE_RSS:
5806 			*qrss_action_pos = actions_n;
5807 			break;
5808 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
5809 			sample = actions->conf;
5810 			ratio = sample->ratio;
5811 			sub_type = ((const struct rte_flow_action *)
5812 					(sample->actions))->type;
5813 			if (ratio == 1 && attr->transfer)
5814 				fdb_mirror = 1;
5815 			break;
5816 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5817 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5818 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5819 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5820 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5821 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5822 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5823 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5824 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5825 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
5826 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5827 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5828 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5829 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5830 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5831 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5832 		case RTE_FLOW_ACTION_TYPE_FLAG:
5833 		case RTE_FLOW_ACTION_TYPE_MARK:
5834 		case RTE_FLOW_ACTION_TYPE_SET_META:
5835 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
5836 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5837 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5838 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5839 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5840 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5841 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5842 		case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5843 		case RTE_FLOW_ACTION_TYPE_METER:
5844 			if (fdb_mirror)
5845 				*modify_after_mirror = 1;
5846 			break;
5847 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5848 			decap = actions->conf;
5849 			while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5850 				;
5851 			actions_n++;
5852 			if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5853 				const struct rte_flow_action_raw_encap *encap =
5854 								actions->conf;
5855 				if (decap->size <=
5856 					MLX5_ENCAPSULATION_DECISION_SIZE &&
5857 				    encap->size >
5858 					MLX5_ENCAPSULATION_DECISION_SIZE)
5859 					/* L3 encap. */
5860 					break;
5861 			}
5862 			if (fdb_mirror)
5863 				*modify_after_mirror = 1;
5864 			break;
5865 		default:
5866 			break;
5867 		}
5868 		actions_n++;
5869 	}
5870 	if (flag && fdb_mirror && !*modify_after_mirror) {
5871 		/* FDB mirroring uses the destination array to implement
5872 		 * instead of FLOW_SAMPLER object.
5873 		 */
5874 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5875 			flag = 0;
5876 	}
5877 	/* Count RTE_FLOW_ACTION_TYPE_END. */
5878 	return flag ? actions_n + 1 : 0;
5879 }
5880 
5881 #define SAMPLE_SUFFIX_ITEM 3
5882 
5883 /**
5884  * Split the sample flow.
5885  *
5886  * As sample flow will split to two sub flow, sample flow with
5887  * sample action, the other actions will move to new suffix flow.
5888  *
5889  * Also add unique tag id with tag action in the sample flow,
5890  * the same tag id will be as match in the suffix flow.
5891  *
5892  * @param dev
5893  *   Pointer to Ethernet device.
5894  * @param[in] add_tag
5895  *   Add extra tag action flag.
5896  * @param[out] sfx_items
5897  *   Suffix flow match items (list terminated by the END pattern item).
5898  * @param[in] actions
5899  *   Associated actions (list terminated by the END action).
5900  * @param[out] actions_sfx
5901  *   Suffix flow actions.
5902  * @param[out] actions_pre
5903  *   Prefix flow actions.
5904  * @param[in] actions_n
5905  *  The total number of actions.
5906  * @param[in] sample_action_pos
5907  *   The sample action position.
5908  * @param[in] qrss_action_pos
5909  *   The Queue/RSS action position.
5910  * @param[in] jump_table
5911  *   Add extra jump action flag.
5912  * @param[out] error
5913  *   Perform verbose error reporting if not NULL.
5914  *
5915  * @return
5916  *   0 on success, or unique flow_id, a negative errno value
5917  *   otherwise and rte_errno is set.
5918  */
5919 static int
5920 flow_sample_split_prep(struct rte_eth_dev *dev,
5921 		       int add_tag,
5922 		       const struct rte_flow_item items[],
5923 		       struct rte_flow_item sfx_items[],
5924 		       const struct rte_flow_action actions[],
5925 		       struct rte_flow_action actions_sfx[],
5926 		       struct rte_flow_action actions_pre[],
5927 		       int actions_n,
5928 		       int sample_action_pos,
5929 		       int qrss_action_pos,
5930 		       int jump_table,
5931 		       struct rte_flow_error *error)
5932 {
5933 	struct mlx5_priv *priv = dev->data->dev_private;
5934 	struct mlx5_rte_flow_action_set_tag *set_tag;
5935 	struct mlx5_rte_flow_item_tag *tag_spec;
5936 	struct mlx5_rte_flow_item_tag *tag_mask;
5937 	struct rte_flow_action_jump *jump_action;
5938 	uint32_t tag_id = 0;
5939 	int append_index = 0;
5940 	int set_tag_idx = -1;
5941 	int index;
5942 	int ret;
5943 
5944 	if (sample_action_pos < 0)
5945 		return rte_flow_error_set(error, EINVAL,
5946 					  RTE_FLOW_ERROR_TYPE_ACTION,
5947 					  NULL, "invalid position of sample "
5948 					  "action in list");
5949 	/* Prepare the actions for prefix and suffix flow. */
5950 	if (add_tag) {
5951 		/* Update the new added tag action index preceding
5952 		 * the PUSH_VLAN or ENCAP action.
5953 		 */
5954 		const struct rte_flow_action_raw_encap *raw_encap;
5955 		const struct rte_flow_action *action = actions;
5956 		int encap_idx;
5957 		int action_idx = 0;
5958 		int raw_decap_idx = -1;
5959 		int push_vlan_idx = -1;
5960 		for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
5961 			switch (action->type) {
5962 			case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5963 				raw_decap_idx = action_idx;
5964 				break;
5965 			case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5966 				raw_encap = action->conf;
5967 				if (raw_encap->size >
5968 					MLX5_ENCAPSULATION_DECISION_SIZE) {
5969 					encap_idx = raw_decap_idx != -1 ?
5970 						    raw_decap_idx : action_idx;
5971 					if (encap_idx < sample_action_pos &&
5972 					    push_vlan_idx == -1)
5973 						set_tag_idx = encap_idx;
5974 				}
5975 				break;
5976 			case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5977 			case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5978 				encap_idx = action_idx;
5979 				if (encap_idx < sample_action_pos &&
5980 				    push_vlan_idx == -1)
5981 					set_tag_idx = encap_idx;
5982 				break;
5983 			case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5984 			case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5985 				push_vlan_idx = action_idx;
5986 				if (push_vlan_idx < sample_action_pos)
5987 					set_tag_idx = action_idx;
5988 				break;
5989 			default:
5990 				break;
5991 			}
5992 			action_idx++;
5993 		}
5994 	}
5995 	/* Prepare the actions for prefix and suffix flow. */
5996 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5997 		index = qrss_action_pos;
5998 		/* Put the preceding the Queue/RSS action into prefix flow. */
5999 		if (index != 0)
6000 			memcpy(actions_pre, actions,
6001 			       sizeof(struct rte_flow_action) * index);
6002 		/* Put others preceding the sample action into prefix flow. */
6003 		if (sample_action_pos > index + 1)
6004 			memcpy(actions_pre + index, actions + index + 1,
6005 			       sizeof(struct rte_flow_action) *
6006 			       (sample_action_pos - index - 1));
6007 		index = sample_action_pos - 1;
6008 		/* Put Queue/RSS action into Suffix flow. */
6009 		memcpy(actions_sfx, actions + qrss_action_pos,
6010 		       sizeof(struct rte_flow_action));
6011 		actions_sfx++;
6012 	} else if (add_tag && set_tag_idx >= 0) {
6013 		if (set_tag_idx > 0)
6014 			memcpy(actions_pre, actions,
6015 			       sizeof(struct rte_flow_action) * set_tag_idx);
6016 		memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
6017 		       sizeof(struct rte_flow_action) *
6018 		       (sample_action_pos - set_tag_idx));
6019 		index = sample_action_pos;
6020 	} else {
6021 		index = sample_action_pos;
6022 		if (index != 0)
6023 			memcpy(actions_pre, actions,
6024 			       sizeof(struct rte_flow_action) * index);
6025 	}
6026 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
6027 	 * For CX6DX and above, metadata registers Cx preserve their value,
6028 	 * add an extra tag action for NIC-RX and E-Switch Domain.
6029 	 */
6030 	if (add_tag) {
6031 		/* Prepare the prefix tag action. */
6032 		append_index++;
6033 		set_tag = (void *)(actions_pre + actions_n + append_index);
6034 		ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
6035 		/* Trust VF/SF on CX5 not supported meter so that the reserved
6036 		 * metadata regC is REG_NON, back to use application tag
6037 		 * index 0.
6038 		 */
6039 		if (unlikely(ret == REG_NON))
6040 			ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
6041 		if (ret < 0)
6042 			return ret;
6043 		mlx5_ipool_malloc(priv->sh->ipool
6044 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
6045 		*set_tag = (struct mlx5_rte_flow_action_set_tag) {
6046 			.id = ret,
6047 			.data = tag_id,
6048 		};
6049 		/* Prepare the suffix subflow items. */
6050 		for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
6051 			if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
6052 				memcpy(sfx_items, items, sizeof(*sfx_items));
6053 				sfx_items++;
6054 			}
6055 		}
6056 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
6057 		tag_spec->data = tag_id;
6058 		tag_spec->id = set_tag->id;
6059 		tag_mask = tag_spec + 1;
6060 		tag_mask->data = UINT32_MAX;
6061 		sfx_items[0] = (struct rte_flow_item){
6062 			.type = (enum rte_flow_item_type)
6063 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6064 			.spec = tag_spec,
6065 			.last = NULL,
6066 			.mask = tag_mask,
6067 		};
6068 		sfx_items[1] = (struct rte_flow_item){
6069 			.type = (enum rte_flow_item_type)
6070 				RTE_FLOW_ITEM_TYPE_END,
6071 		};
6072 		/* Prepare the tag action in prefix subflow. */
6073 		set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
6074 		actions_pre[set_tag_idx] =
6075 			(struct rte_flow_action){
6076 			.type = (enum rte_flow_action_type)
6077 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
6078 			.conf = set_tag,
6079 		};
6080 		/* Update next sample position due to add one tag action */
6081 		index += 1;
6082 	}
6083 	/* Copy the sample action into prefix flow. */
6084 	memcpy(actions_pre + index, actions + sample_action_pos,
6085 	       sizeof(struct rte_flow_action));
6086 	index += 1;
6087 	/* For the modify action after the sample action in E-Switch mirroring,
6088 	 * Add the extra jump action in prefix subflow and jump into the next
6089 	 * table, then do the modify action in the new table.
6090 	 */
6091 	if (jump_table) {
6092 		/* Prepare the prefix jump action. */
6093 		append_index++;
6094 		jump_action = (void *)(actions_pre + actions_n + append_index);
6095 		jump_action->group = jump_table;
6096 		actions_pre[index++] =
6097 			(struct rte_flow_action){
6098 			.type = (enum rte_flow_action_type)
6099 				RTE_FLOW_ACTION_TYPE_JUMP,
6100 			.conf = jump_action,
6101 		};
6102 	}
6103 	actions_pre[index] = (struct rte_flow_action){
6104 		.type = (enum rte_flow_action_type)
6105 			RTE_FLOW_ACTION_TYPE_END,
6106 	};
6107 	/* Put the actions after sample into Suffix flow. */
6108 	memcpy(actions_sfx, actions + sample_action_pos + 1,
6109 	       sizeof(struct rte_flow_action) *
6110 	       (actions_n - sample_action_pos - 1));
6111 	return tag_id;
6112 }
6113 
6114 /**
6115  * The splitting for metadata feature.
6116  *
6117  * - Q/RSS action on NIC Rx should be split in order to pass by
6118  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6119  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6120  *
6121  * - All the actions on NIC Tx should have a mreg copy action to
6122  *   copy reg_a from WQE to reg_c[0].
6123  *
6124  * @param dev
6125  *   Pointer to Ethernet device.
6126  * @param[in] flow
6127  *   Parent flow structure pointer.
6128  * @param[in] attr
6129  *   Flow rule attributes.
6130  * @param[in] items
6131  *   Pattern specification (list terminated by the END pattern item).
6132  * @param[in] actions
6133  *   Associated actions (list terminated by the END action).
6134  * @param[in] flow_split_info
6135  *   Pointer to flow split info structure.
6136  * @param[out] error
6137  *   Perform verbose error reporting if not NULL.
6138  * @return
6139  *   0 on success, negative value otherwise
6140  */
6141 static int
6142 flow_create_split_metadata(struct rte_eth_dev *dev,
6143 			   struct rte_flow *flow,
6144 			   const struct rte_flow_attr *attr,
6145 			   const struct rte_flow_item items[],
6146 			   const struct rte_flow_action actions[],
6147 			   struct mlx5_flow_split_info *flow_split_info,
6148 			   struct rte_flow_error *error)
6149 {
6150 	struct mlx5_priv *priv = dev->data->dev_private;
6151 	struct mlx5_sh_config *config = &priv->sh->config;
6152 	const struct rte_flow_action *qrss = NULL;
6153 	struct rte_flow_action *ext_actions = NULL;
6154 	struct mlx5_flow *dev_flow = NULL;
6155 	uint32_t qrss_id = 0;
6156 	int mtr_sfx = 0;
6157 	size_t act_size;
6158 	int actions_n;
6159 	int encap_idx;
6160 	int ret;
6161 
6162 	/* Check whether extensive metadata feature is engaged. */
6163 	if (!config->dv_flow_en ||
6164 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6165 	    !mlx5_flow_ext_mreg_supported(dev))
6166 		return flow_create_split_inner(dev, flow, NULL, attr, items,
6167 					       actions, flow_split_info, error);
6168 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6169 							   &encap_idx);
6170 	if (qrss) {
6171 		/* Exclude hairpin flows from splitting. */
6172 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6173 			const struct rte_flow_action_queue *queue;
6174 
6175 			queue = qrss->conf;
6176 			if (mlx5_rxq_is_hairpin(dev, queue->index))
6177 				qrss = NULL;
6178 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6179 			const struct rte_flow_action_rss *rss;
6180 
6181 			rss = qrss->conf;
6182 			if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6183 				qrss = NULL;
6184 		}
6185 	}
6186 	if (qrss) {
6187 		/* Check if it is in meter suffix table. */
6188 		mtr_sfx = attr->group == (attr->transfer ?
6189 			  (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6190 			  MLX5_FLOW_TABLE_LEVEL_METER);
6191 		/*
6192 		 * Q/RSS action on NIC Rx should be split in order to pass by
6193 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
6194 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
6195 		 */
6196 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6197 			   sizeof(struct rte_flow_action_set_tag) +
6198 			   sizeof(struct rte_flow_action_jump);
6199 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6200 					  SOCKET_ID_ANY);
6201 		if (!ext_actions)
6202 			return rte_flow_error_set(error, ENOMEM,
6203 						  RTE_FLOW_ERROR_TYPE_ACTION,
6204 						  NULL, "no memory to split "
6205 						  "metadata flow");
6206 		/*
6207 		 * Create the new actions list with removed Q/RSS action
6208 		 * and appended set tag and jump to register copy table
6209 		 * (RX_CP_TBL). We should preallocate unique tag ID here
6210 		 * in advance, because it is needed for set tag action.
6211 		 */
6212 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6213 						    qrss, actions_n,
6214 						    mtr_sfx, error);
6215 		if (!mtr_sfx && !qrss_id) {
6216 			ret = -rte_errno;
6217 			goto exit;
6218 		}
6219 	} else if (attr->egress && !attr->transfer) {
6220 		/*
6221 		 * All the actions on NIC Tx should have a metadata register
6222 		 * copy action to copy reg_a from WQE to reg_c[meta]
6223 		 */
6224 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6225 			   sizeof(struct mlx5_flow_action_copy_mreg);
6226 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6227 					  SOCKET_ID_ANY);
6228 		if (!ext_actions)
6229 			return rte_flow_error_set(error, ENOMEM,
6230 						  RTE_FLOW_ERROR_TYPE_ACTION,
6231 						  NULL, "no memory to split "
6232 						  "metadata flow");
6233 		/* Create the action list appended with copy register. */
6234 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6235 					     actions_n, error, encap_idx);
6236 		if (ret < 0)
6237 			goto exit;
6238 	}
6239 	/* Add the unmodified original or prefix subflow. */
6240 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6241 				      items, ext_actions ? ext_actions :
6242 				      actions, flow_split_info, error);
6243 	if (ret < 0)
6244 		goto exit;
6245 	MLX5_ASSERT(dev_flow);
6246 	if (qrss) {
6247 		const struct rte_flow_attr q_attr = {
6248 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6249 			.ingress = 1,
6250 		};
6251 		/* Internal PMD action to set register. */
6252 		struct mlx5_rte_flow_item_tag q_tag_spec = {
6253 			.data = qrss_id,
6254 			.id = REG_NON,
6255 		};
6256 		struct rte_flow_item q_items[] = {
6257 			{
6258 				.type = (enum rte_flow_item_type)
6259 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6260 				.spec = &q_tag_spec,
6261 				.last = NULL,
6262 				.mask = NULL,
6263 			},
6264 			{
6265 				.type = RTE_FLOW_ITEM_TYPE_END,
6266 			},
6267 		};
6268 		struct rte_flow_action q_actions[] = {
6269 			{
6270 				.type = qrss->type,
6271 				.conf = qrss->conf,
6272 			},
6273 			{
6274 				.type = RTE_FLOW_ACTION_TYPE_END,
6275 			},
6276 		};
6277 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6278 
6279 		/*
6280 		 * Configure the tag item only if there is no meter subflow.
6281 		 * Since tag is already marked in the meter suffix subflow
6282 		 * we can just use the meter suffix items as is.
6283 		 */
6284 		if (qrss_id) {
6285 			/* Not meter subflow. */
6286 			MLX5_ASSERT(!mtr_sfx);
6287 			/*
6288 			 * Put unique id in prefix flow due to it is destroyed
6289 			 * after suffix flow and id will be freed after there
6290 			 * is no actual flows with this id and identifier
6291 			 * reallocation becomes possible (for example, for
6292 			 * other flows in other threads).
6293 			 */
6294 			dev_flow->handle->split_flow_id = qrss_id;
6295 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6296 						   error);
6297 			if (ret < 0)
6298 				goto exit;
6299 			q_tag_spec.id = ret;
6300 		}
6301 		dev_flow = NULL;
6302 		/* Add suffix subflow to execute Q/RSS. */
6303 		flow_split_info->prefix_layers = layers;
6304 		flow_split_info->prefix_mark = 0;
6305 		flow_split_info->table_id = 0;
6306 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6307 					      &q_attr, mtr_sfx ? items :
6308 					      q_items, q_actions,
6309 					      flow_split_info, error);
6310 		if (ret < 0)
6311 			goto exit;
6312 		/* qrss ID should be freed if failed. */
6313 		qrss_id = 0;
6314 		MLX5_ASSERT(dev_flow);
6315 	}
6316 
6317 exit:
6318 	/*
6319 	 * We do not destroy the partially created sub_flows in case of error.
6320 	 * These ones are included into parent flow list and will be destroyed
6321 	 * by flow_drv_destroy.
6322 	 */
6323 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6324 			qrss_id);
6325 	mlx5_free(ext_actions);
6326 	return ret;
6327 }
6328 
6329 /**
6330  * Create meter internal drop flow with the original pattern.
6331  *
6332  * @param dev
6333  *   Pointer to Ethernet device.
6334  * @param[in] flow
6335  *   Parent flow structure pointer.
6336  * @param[in] attr
6337  *   Flow rule attributes.
6338  * @param[in] items
6339  *   Pattern specification (list terminated by the END pattern item).
6340  * @param[in] flow_split_info
6341  *   Pointer to flow split info structure.
6342  * @param[in] fm
6343  *   Pointer to flow meter structure.
6344  * @param[out] error
6345  *   Perform verbose error reporting if not NULL.
6346  * @return
6347  *   0 on success, negative value otherwise
6348  */
6349 static uint32_t
6350 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6351 			struct rte_flow *flow,
6352 			const struct rte_flow_attr *attr,
6353 			const struct rte_flow_item items[],
6354 			struct mlx5_flow_split_info *flow_split_info,
6355 			struct mlx5_flow_meter_info *fm,
6356 			struct rte_flow_error *error)
6357 {
6358 	struct mlx5_flow *dev_flow = NULL;
6359 	struct rte_flow_attr drop_attr = *attr;
6360 	struct rte_flow_action drop_actions[3];
6361 	struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6362 
6363 	MLX5_ASSERT(fm->drop_cnt);
6364 	drop_actions[0].type =
6365 		(enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6366 	drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6367 	drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6368 	drop_actions[1].conf = NULL;
6369 	drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6370 	drop_actions[2].conf = NULL;
6371 	drop_split_info.external = false;
6372 	drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6373 	drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6374 	drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6375 	return flow_create_split_inner(dev, flow, &dev_flow,
6376 				&drop_attr, items, drop_actions,
6377 				&drop_split_info, error);
6378 }
6379 
6380 /**
6381  * The splitting for meter feature.
6382  *
6383  * - The meter flow will be split to two flows as prefix and
6384  *   suffix flow. The packets make sense only it pass the prefix
6385  *   meter action.
6386  *
6387  * - Reg_C_5 is used for the packet to match betweend prefix and
6388  *   suffix flow.
6389  *
6390  * @param dev
6391  *   Pointer to Ethernet device.
6392  * @param[in] flow
6393  *   Parent flow structure pointer.
6394  * @param[in] attr
6395  *   Flow rule attributes.
6396  * @param[in] items
6397  *   Pattern specification (list terminated by the END pattern item).
6398  * @param[in] actions
6399  *   Associated actions (list terminated by the END action).
6400  * @param[in] flow_split_info
6401  *   Pointer to flow split info structure.
6402  * @param[out] error
6403  *   Perform verbose error reporting if not NULL.
6404  * @return
6405  *   0 on success, negative value otherwise
6406  */
6407 static int
6408 flow_create_split_meter(struct rte_eth_dev *dev,
6409 			struct rte_flow *flow,
6410 			const struct rte_flow_attr *attr,
6411 			const struct rte_flow_item items[],
6412 			const struct rte_flow_action actions[],
6413 			struct mlx5_flow_split_info *flow_split_info,
6414 			struct rte_flow_error *error)
6415 {
6416 	struct mlx5_priv *priv = dev->data->dev_private;
6417 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6418 	struct rte_flow_action *sfx_actions = NULL;
6419 	struct rte_flow_action *pre_actions = NULL;
6420 	struct rte_flow_item *sfx_items = NULL;
6421 	struct mlx5_flow *dev_flow = NULL;
6422 	struct rte_flow_attr sfx_attr = *attr;
6423 	struct mlx5_flow_meter_info *fm = NULL;
6424 	uint8_t skip_scale_restore;
6425 	bool has_mtr = false;
6426 	bool has_modify = false;
6427 	bool set_mtr_reg = true;
6428 	bool is_mtr_hierarchy = false;
6429 	uint32_t meter_id = 0;
6430 	uint32_t mtr_idx = 0;
6431 	uint32_t mtr_flow_id = 0;
6432 	size_t act_size;
6433 	size_t item_size;
6434 	int actions_n = 0;
6435 	int ret = 0;
6436 
6437 	if (priv->mtr_en)
6438 		actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6439 						    &has_modify, &meter_id);
6440 	if (has_mtr) {
6441 		if (flow->meter) {
6442 			fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6443 			if (!fm)
6444 				return rte_flow_error_set(error, EINVAL,
6445 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6446 						NULL, "Meter not found.");
6447 		} else {
6448 			fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6449 			if (!fm)
6450 				return rte_flow_error_set(error, EINVAL,
6451 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6452 						NULL, "Meter not found.");
6453 			ret = mlx5_flow_meter_attach(priv, fm,
6454 						     &sfx_attr, error);
6455 			if (ret)
6456 				return -rte_errno;
6457 			flow->meter = mtr_idx;
6458 		}
6459 		MLX5_ASSERT(wks);
6460 		wks->fm = fm;
6461 		if (!fm->def_policy) {
6462 			wks->policy = mlx5_flow_meter_policy_find(dev,
6463 								  fm->policy_id,
6464 								  NULL);
6465 			MLX5_ASSERT(wks->policy);
6466 			if (wks->policy->mark)
6467 				wks->mark = 1;
6468 			if (wks->policy->is_hierarchy) {
6469 				wks->final_policy =
6470 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
6471 								wks->policy);
6472 				if (!wks->final_policy)
6473 					return rte_flow_error_set(error,
6474 					EINVAL,
6475 					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6476 				"Failed to find terminal policy of hierarchy.");
6477 				is_mtr_hierarchy = true;
6478 			}
6479 		}
6480 		/*
6481 		 * If it isn't default-policy Meter, and
6482 		 * 1. There's no action in flow to change
6483 		 *    packet (modify/encap/decap etc.), OR
6484 		 * 2. No drop count needed for this meter.
6485 		 * 3. It's not meter hierarchy.
6486 		 * Then no need to use regC to save meter id anymore.
6487 		 */
6488 		if (!fm->def_policy && !is_mtr_hierarchy &&
6489 		    (!has_modify || !fm->drop_cnt))
6490 			set_mtr_reg = false;
6491 		/* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6492 #define METER_PREFIX_ACTION 7
6493 		act_size = (sizeof(struct rte_flow_action) *
6494 			    (actions_n + METER_PREFIX_ACTION)) +
6495 			   sizeof(struct mlx5_rte_flow_action_set_tag);
6496 		/* Suffix items: tag, vlan, port id, end. */
6497 #define METER_SUFFIX_ITEM 4
6498 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6499 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6500 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6501 					  0, SOCKET_ID_ANY);
6502 		if (!sfx_actions)
6503 			return rte_flow_error_set(error, ENOMEM,
6504 						  RTE_FLOW_ERROR_TYPE_ACTION,
6505 						  NULL, "no memory to split "
6506 						  "meter flow");
6507 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6508 			     act_size);
6509 		/* There's no suffix flow for meter of non-default policy. */
6510 		if (!fm->def_policy)
6511 			pre_actions = sfx_actions + 1;
6512 		else
6513 			pre_actions = sfx_actions + actions_n;
6514 		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6515 					    items, sfx_items, actions,
6516 					    sfx_actions, pre_actions,
6517 					    (set_mtr_reg ? &mtr_flow_id : NULL),
6518 					    error);
6519 		if (ret) {
6520 			ret = -rte_errno;
6521 			goto exit;
6522 		}
6523 		/* Add the prefix subflow. */
6524 		skip_scale_restore = flow_split_info->skip_scale;
6525 		flow_split_info->skip_scale |=
6526 			1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6527 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6528 					      attr, items, pre_actions,
6529 					      flow_split_info, error);
6530 		flow_split_info->skip_scale = skip_scale_restore;
6531 		if (ret) {
6532 			if (mtr_flow_id)
6533 				mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6534 			ret = -rte_errno;
6535 			goto exit;
6536 		}
6537 		if (mtr_flow_id) {
6538 			dev_flow->handle->split_flow_id = mtr_flow_id;
6539 			dev_flow->handle->is_meter_flow_id = 1;
6540 		}
6541 		if (!fm->def_policy) {
6542 			if (!set_mtr_reg && fm->drop_cnt)
6543 				ret =
6544 			flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6545 							&sfx_attr, items,
6546 							flow_split_info,
6547 							fm, error);
6548 			goto exit;
6549 		}
6550 		/* Setting the sfx group atrr. */
6551 		sfx_attr.group = sfx_attr.transfer ?
6552 				(MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6553 				 MLX5_FLOW_TABLE_LEVEL_METER;
6554 		flow_split_info->prefix_layers =
6555 				flow_get_prefix_layer_flags(dev_flow);
6556 		flow_split_info->prefix_mark |= wks->mark;
6557 		flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6558 	}
6559 	/* Add the prefix subflow. */
6560 	ret = flow_create_split_metadata(dev, flow,
6561 					 &sfx_attr, sfx_items ?
6562 					 sfx_items : items,
6563 					 sfx_actions ? sfx_actions : actions,
6564 					 flow_split_info, error);
6565 exit:
6566 	if (sfx_actions)
6567 		mlx5_free(sfx_actions);
6568 	return ret;
6569 }
6570 
6571 /**
6572  * The splitting for sample feature.
6573  *
6574  * Once Sample action is detected in the action list, the flow actions should
6575  * be split into prefix sub flow and suffix sub flow.
6576  *
6577  * The original items remain in the prefix sub flow, all actions preceding the
6578  * sample action and the sample action itself will be copied to the prefix
6579  * sub flow, the actions following the sample action will be copied to the
6580  * suffix sub flow, Queue action always be located in the suffix sub flow.
6581  *
6582  * In order to make the packet from prefix sub flow matches with suffix sub
6583  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6584  * flow uses tag item with the unique flow id.
6585  *
6586  * @param dev
6587  *   Pointer to Ethernet device.
6588  * @param[in] flow
6589  *   Parent flow structure pointer.
6590  * @param[in] attr
6591  *   Flow rule attributes.
6592  * @param[in] items
6593  *   Pattern specification (list terminated by the END pattern item).
6594  * @param[in] actions
6595  *   Associated actions (list terminated by the END action).
6596  * @param[in] flow_split_info
6597  *   Pointer to flow split info structure.
6598  * @param[out] error
6599  *   Perform verbose error reporting if not NULL.
6600  * @return
6601  *   0 on success, negative value otherwise
6602  */
6603 static int
6604 flow_create_split_sample(struct rte_eth_dev *dev,
6605 			 struct rte_flow *flow,
6606 			 const struct rte_flow_attr *attr,
6607 			 const struct rte_flow_item items[],
6608 			 const struct rte_flow_action actions[],
6609 			 struct mlx5_flow_split_info *flow_split_info,
6610 			 struct rte_flow_error *error)
6611 {
6612 	struct mlx5_priv *priv = dev->data->dev_private;
6613 	struct rte_flow_action *sfx_actions = NULL;
6614 	struct rte_flow_action *pre_actions = NULL;
6615 	struct rte_flow_item *sfx_items = NULL;
6616 	struct mlx5_flow *dev_flow = NULL;
6617 	struct rte_flow_attr sfx_attr = *attr;
6618 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6619 	struct mlx5_flow_dv_sample_resource *sample_res;
6620 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6621 	struct mlx5_flow_tbl_resource *sfx_tbl;
6622 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6623 #endif
6624 	size_t act_size;
6625 	size_t item_size;
6626 	uint32_t fdb_tx = 0;
6627 	int32_t tag_id = 0;
6628 	int actions_n = 0;
6629 	int sample_action_pos;
6630 	int qrss_action_pos;
6631 	int add_tag = 0;
6632 	int modify_after_mirror = 0;
6633 	uint16_t jump_table = 0;
6634 	const uint32_t next_ft_step = 1;
6635 	int ret = 0;
6636 
6637 	if (priv->sampler_en)
6638 		actions_n = flow_check_match_action(actions, attr,
6639 					RTE_FLOW_ACTION_TYPE_SAMPLE,
6640 					&sample_action_pos, &qrss_action_pos,
6641 					&modify_after_mirror);
6642 	if (actions_n) {
6643 		/* The prefix actions must includes sample, tag, end. */
6644 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6645 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
6646 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6647 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6648 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6649 					  item_size), 0, SOCKET_ID_ANY);
6650 		if (!sfx_actions)
6651 			return rte_flow_error_set(error, ENOMEM,
6652 						  RTE_FLOW_ERROR_TYPE_ACTION,
6653 						  NULL, "no memory to split "
6654 						  "sample flow");
6655 		/* The representor_id is UINT16_MAX for uplink. */
6656 		fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6657 		/*
6658 		 * When reg_c_preserve is set, metadata registers Cx preserve
6659 		 * their value even through packet duplication.
6660 		 */
6661 		add_tag = (!fdb_tx ||
6662 			   priv->sh->cdev->config.hca_attr.reg_c_preserve);
6663 		if (add_tag)
6664 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6665 					+ act_size);
6666 		if (modify_after_mirror)
6667 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6668 				     next_ft_step;
6669 		pre_actions = sfx_actions + actions_n;
6670 		tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
6671 						actions, sfx_actions,
6672 						pre_actions, actions_n,
6673 						sample_action_pos,
6674 						qrss_action_pos, jump_table,
6675 						error);
6676 		if (tag_id < 0 || (add_tag && !tag_id)) {
6677 			ret = -rte_errno;
6678 			goto exit;
6679 		}
6680 		if (modify_after_mirror)
6681 			flow_split_info->skip_scale =
6682 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6683 		/* Add the prefix subflow. */
6684 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6685 					      items, pre_actions,
6686 					      flow_split_info, error);
6687 		if (ret) {
6688 			ret = -rte_errno;
6689 			goto exit;
6690 		}
6691 		dev_flow->handle->split_flow_id = tag_id;
6692 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6693 		if (!modify_after_mirror) {
6694 			/* Set the sfx group attr. */
6695 			sample_res = (struct mlx5_flow_dv_sample_resource *)
6696 						dev_flow->dv.sample_res;
6697 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
6698 						sample_res->normal_path_tbl;
6699 			sfx_tbl_data = container_of(sfx_tbl,
6700 						struct mlx5_flow_tbl_data_entry,
6701 						tbl);
6702 			sfx_attr.group = sfx_attr.transfer ?
6703 			(sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6704 		} else {
6705 			MLX5_ASSERT(attr->transfer);
6706 			sfx_attr.group = jump_table;
6707 		}
6708 		flow_split_info->prefix_layers =
6709 				flow_get_prefix_layer_flags(dev_flow);
6710 		MLX5_ASSERT(wks);
6711 		flow_split_info->prefix_mark |= wks->mark;
6712 		/* Suffix group level already be scaled with factor, set
6713 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6714 		 * again in translation.
6715 		 */
6716 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6717 #endif
6718 	}
6719 	/* Add the suffix subflow. */
6720 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
6721 				      sfx_items ? sfx_items : items,
6722 				      sfx_actions ? sfx_actions : actions,
6723 				      flow_split_info, error);
6724 exit:
6725 	if (sfx_actions)
6726 		mlx5_free(sfx_actions);
6727 	return ret;
6728 }
6729 
6730 /**
6731  * Split the flow to subflow set. The splitters might be linked
6732  * in the chain, like this:
6733  * flow_create_split_outer() calls:
6734  *   flow_create_split_meter() calls:
6735  *     flow_create_split_metadata(meter_subflow_0) calls:
6736  *       flow_create_split_inner(metadata_subflow_0)
6737  *       flow_create_split_inner(metadata_subflow_1)
6738  *       flow_create_split_inner(metadata_subflow_2)
6739  *     flow_create_split_metadata(meter_subflow_1) calls:
6740  *       flow_create_split_inner(metadata_subflow_0)
6741  *       flow_create_split_inner(metadata_subflow_1)
6742  *       flow_create_split_inner(metadata_subflow_2)
6743  *
6744  * This provide flexible way to add new levels of flow splitting.
6745  * The all of successfully created subflows are included to the
6746  * parent flow dev_flow list.
6747  *
6748  * @param dev
6749  *   Pointer to Ethernet device.
6750  * @param[in] flow
6751  *   Parent flow structure pointer.
6752  * @param[in] attr
6753  *   Flow rule attributes.
6754  * @param[in] items
6755  *   Pattern specification (list terminated by the END pattern item).
6756  * @param[in] actions
6757  *   Associated actions (list terminated by the END action).
6758  * @param[in] flow_split_info
6759  *   Pointer to flow split info structure.
6760  * @param[out] error
6761  *   Perform verbose error reporting if not NULL.
6762  * @return
6763  *   0 on success, negative value otherwise
6764  */
6765 static int
6766 flow_create_split_outer(struct rte_eth_dev *dev,
6767 			struct rte_flow *flow,
6768 			const struct rte_flow_attr *attr,
6769 			const struct rte_flow_item items[],
6770 			const struct rte_flow_action actions[],
6771 			struct mlx5_flow_split_info *flow_split_info,
6772 			struct rte_flow_error *error)
6773 {
6774 	int ret;
6775 
6776 	ret = flow_create_split_sample(dev, flow, attr, items,
6777 				       actions, flow_split_info, error);
6778 	MLX5_ASSERT(ret <= 0);
6779 	return ret;
6780 }
6781 
6782 static inline struct mlx5_flow_tunnel *
6783 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6784 {
6785 	struct mlx5_flow_tunnel *tunnel;
6786 
6787 #pragma GCC diagnostic push
6788 #pragma GCC diagnostic ignored "-Wcast-qual"
6789 	tunnel = (typeof(tunnel))flow->tunnel;
6790 #pragma GCC diagnostic pop
6791 
6792 	return tunnel;
6793 }
6794 
6795 /**
6796  * Adjust flow RSS workspace if needed.
6797  *
6798  * @param wks
6799  *   Pointer to thread flow work space.
6800  * @param rss_desc
6801  *   Pointer to RSS descriptor.
6802  * @param[in] nrssq_num
6803  *   New RSS queue number.
6804  *
6805  * @return
6806  *   0 on success, -1 otherwise and rte_errno is set.
6807  */
6808 static int
6809 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6810 			  struct mlx5_flow_rss_desc *rss_desc,
6811 			  uint32_t nrssq_num)
6812 {
6813 	if (likely(nrssq_num <= wks->rssq_num))
6814 		return 0;
6815 	rss_desc->queue = realloc(rss_desc->queue,
6816 			  sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6817 	if (!rss_desc->queue) {
6818 		rte_errno = ENOMEM;
6819 		return -1;
6820 	}
6821 	wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6822 	return 0;
6823 }
6824 
6825 /**
6826  * Create a flow and add it to @p list.
6827  *
6828  * @param dev
6829  *   Pointer to Ethernet device.
6830  * @param list
6831  *   Pointer to a TAILQ flow list. If this parameter NULL,
6832  *   no list insertion occurred, flow is just created,
6833  *   this is caller's responsibility to track the
6834  *   created flow.
6835  * @param[in] attr
6836  *   Flow rule attributes.
6837  * @param[in] items
6838  *   Pattern specification (list terminated by the END pattern item).
6839  * @param[in] actions
6840  *   Associated actions (list terminated by the END action).
6841  * @param[in] external
6842  *   This flow rule is created by request external to PMD.
6843  * @param[out] error
6844  *   Perform verbose error reporting if not NULL.
6845  *
6846  * @return
6847  *   A flow index on success, 0 otherwise and rte_errno is set.
6848  */
6849 static uint32_t
6850 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6851 		 const struct rte_flow_attr *attr,
6852 		 const struct rte_flow_item items[],
6853 		 const struct rte_flow_action original_actions[],
6854 		 bool external, struct rte_flow_error *error)
6855 {
6856 	struct mlx5_priv *priv = dev->data->dev_private;
6857 	struct rte_flow *flow = NULL;
6858 	struct mlx5_flow *dev_flow;
6859 	const struct rte_flow_action_rss *rss = NULL;
6860 	struct mlx5_translated_action_handle
6861 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6862 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6863 	union {
6864 		struct mlx5_flow_expand_rss buf;
6865 		uint8_t buffer[4096];
6866 	} expand_buffer;
6867 	union {
6868 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6869 		uint8_t buffer[2048];
6870 	} actions_rx;
6871 	union {
6872 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6873 		uint8_t buffer[2048];
6874 	} actions_hairpin_tx;
6875 	union {
6876 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6877 		uint8_t buffer[2048];
6878 	} items_tx;
6879 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6880 	struct mlx5_flow_rss_desc *rss_desc;
6881 	const struct rte_flow_action *p_actions_rx;
6882 	uint32_t i;
6883 	uint32_t idx = 0;
6884 	int hairpin_flow;
6885 	struct rte_flow_attr attr_tx = { .priority = 0 };
6886 	const struct rte_flow_action *actions;
6887 	struct rte_flow_action *translated_actions = NULL;
6888 	struct mlx5_flow_tunnel *tunnel;
6889 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6890 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6891 	struct mlx5_flow_split_info flow_split_info = {
6892 		.external = !!external,
6893 		.skip_scale = 0,
6894 		.flow_idx = 0,
6895 		.prefix_mark = 0,
6896 		.prefix_layers = 0,
6897 		.table_id = 0
6898 	};
6899 	int ret;
6900 
6901 	MLX5_ASSERT(wks);
6902 	rss_desc = &wks->rss_desc;
6903 	ret = flow_action_handles_translate(dev, original_actions,
6904 					    indir_actions,
6905 					    &indir_actions_n,
6906 					    &translated_actions, error);
6907 	if (ret < 0) {
6908 		MLX5_ASSERT(translated_actions == NULL);
6909 		return 0;
6910 	}
6911 	actions = translated_actions ? translated_actions : original_actions;
6912 	p_actions_rx = actions;
6913 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6914 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6915 				external, hairpin_flow, error);
6916 	if (ret < 0)
6917 		goto error_before_hairpin_split;
6918 	flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6919 	if (!flow) {
6920 		rte_errno = ENOMEM;
6921 		goto error_before_hairpin_split;
6922 	}
6923 	if (hairpin_flow > 0) {
6924 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6925 			rte_errno = EINVAL;
6926 			goto error_before_hairpin_split;
6927 		}
6928 		flow_hairpin_split(dev, actions, actions_rx.actions,
6929 				   actions_hairpin_tx.actions, items_tx.items,
6930 				   idx);
6931 		p_actions_rx = actions_rx.actions;
6932 	}
6933 	flow_split_info.flow_idx = idx;
6934 	flow->drv_type = flow_get_drv_type(dev, attr);
6935 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6936 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
6937 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6938 	/* RSS Action only works on NIC RX domain */
6939 	if (attr->ingress && !attr->transfer)
6940 		rss = flow_get_rss_action(dev, p_actions_rx);
6941 	if (rss) {
6942 		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6943 			return 0;
6944 		/*
6945 		 * The following information is required by
6946 		 * mlx5_flow_hashfields_adjust() in advance.
6947 		 */
6948 		rss_desc->level = rss->level;
6949 		/* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6950 		rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6951 	}
6952 	flow->dev_handles = 0;
6953 	if (rss && rss->types) {
6954 		unsigned int graph_root;
6955 
6956 		graph_root = find_graph_root(rss->level);
6957 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6958 					   items, rss->types,
6959 					   mlx5_support_expansion, graph_root);
6960 		MLX5_ASSERT(ret > 0 &&
6961 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
6962 		if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6963 			for (i = 0; i < buf->entries; ++i)
6964 				mlx5_dbg__print_pattern(buf->entry[i].pattern);
6965 		}
6966 	} else {
6967 		buf->entries = 1;
6968 		buf->entry[0].pattern = (void *)(uintptr_t)items;
6969 	}
6970 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6971 						      indir_actions_n);
6972 	for (i = 0; i < buf->entries; ++i) {
6973 		/* Initialize flow split data. */
6974 		flow_split_info.prefix_layers = 0;
6975 		flow_split_info.prefix_mark = 0;
6976 		flow_split_info.skip_scale = 0;
6977 		/*
6978 		 * The splitter may create multiple dev_flows,
6979 		 * depending on configuration. In the simplest
6980 		 * case it just creates unmodified original flow.
6981 		 */
6982 		ret = flow_create_split_outer(dev, flow, attr,
6983 					      buf->entry[i].pattern,
6984 					      p_actions_rx, &flow_split_info,
6985 					      error);
6986 		if (ret < 0)
6987 			goto error;
6988 		if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6989 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
6990 							   p_actions_rx,
6991 							   idx,
6992 							   wks->flows[0].tunnel,
6993 							   &default_miss_ctx,
6994 							   error);
6995 			if (ret < 0) {
6996 				mlx5_free(default_miss_ctx.queue);
6997 				goto error;
6998 			}
6999 		}
7000 	}
7001 	/* Create the tx flow. */
7002 	if (hairpin_flow) {
7003 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
7004 		attr_tx.ingress = 0;
7005 		attr_tx.egress = 1;
7006 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
7007 					 actions_hairpin_tx.actions,
7008 					 idx, error);
7009 		if (!dev_flow)
7010 			goto error;
7011 		dev_flow->flow = flow;
7012 		dev_flow->external = 0;
7013 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
7014 			      dev_flow->handle, next);
7015 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
7016 					 items_tx.items,
7017 					 actions_hairpin_tx.actions, error);
7018 		if (ret < 0)
7019 			goto error;
7020 	}
7021 	/*
7022 	 * Update the metadata register copy table. If extensive
7023 	 * metadata feature is enabled and registers are supported
7024 	 * we might create the extra rte_flow for each unique
7025 	 * MARK/FLAG action ID.
7026 	 *
7027 	 * The table is updated for ingress Flows only, because
7028 	 * the egress Flows belong to the different device and
7029 	 * copy table should be updated in peer NIC Rx domain.
7030 	 */
7031 	if (attr->ingress &&
7032 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
7033 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
7034 		if (ret)
7035 			goto error;
7036 	}
7037 	/*
7038 	 * If the flow is external (from application) OR device is started,
7039 	 * OR mreg discover, then apply immediately.
7040 	 */
7041 	if (external || dev->data->dev_started ||
7042 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
7043 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
7044 		ret = flow_drv_apply(dev, flow, error);
7045 		if (ret < 0)
7046 			goto error;
7047 	}
7048 	flow->type = type;
7049 	flow_rxq_flags_set(dev, flow);
7050 	rte_free(translated_actions);
7051 	tunnel = flow_tunnel_from_rule(wks->flows);
7052 	if (tunnel) {
7053 		flow->tunnel = 1;
7054 		flow->tunnel_id = tunnel->tunnel_id;
7055 		__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
7056 		mlx5_free(default_miss_ctx.queue);
7057 	}
7058 	mlx5_flow_pop_thread_workspace();
7059 	return idx;
7060 error:
7061 	MLX5_ASSERT(flow);
7062 	ret = rte_errno; /* Save rte_errno before cleanup. */
7063 	flow_mreg_del_copy_action(dev, flow);
7064 	flow_drv_destroy(dev, flow);
7065 	if (rss_desc->shared_rss)
7066 		__atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
7067 			mlx5_ipool_get
7068 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7069 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
7070 	mlx5_ipool_free(priv->flows[type], idx);
7071 	rte_errno = ret; /* Restore rte_errno. */
7072 	ret = rte_errno;
7073 	rte_errno = ret;
7074 	mlx5_flow_pop_thread_workspace();
7075 error_before_hairpin_split:
7076 	rte_free(translated_actions);
7077 	return 0;
7078 }
7079 
7080 /**
7081  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7082  * incoming packets to table 1.
7083  *
7084  * Other flow rules, requested for group n, will be created in
7085  * e-switch table n+1.
7086  * Jump action to e-switch group n will be created to group n+1.
7087  *
7088  * Used when working in switchdev mode, to utilise advantages of table 1
7089  * and above.
7090  *
7091  * @param dev
7092  *   Pointer to Ethernet device.
7093  *
7094  * @return
7095  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7096  */
7097 struct rte_flow *
7098 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7099 {
7100 	const struct rte_flow_attr attr = {
7101 		.group = 0,
7102 		.priority = 0,
7103 		.ingress = 1,
7104 		.egress = 0,
7105 		.transfer = 1,
7106 	};
7107 	const struct rte_flow_item pattern = {
7108 		.type = RTE_FLOW_ITEM_TYPE_END,
7109 	};
7110 	struct rte_flow_action_jump jump = {
7111 		.group = 1,
7112 	};
7113 	const struct rte_flow_action actions[] = {
7114 		{
7115 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7116 			.conf = &jump,
7117 		},
7118 		{
7119 			.type = RTE_FLOW_ACTION_TYPE_END,
7120 		},
7121 	};
7122 	struct rte_flow_error error;
7123 
7124 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7125 						   &attr, &pattern,
7126 						   actions, false, &error);
7127 }
7128 
7129 /**
7130  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7131  * and sq number, directs all packets to peer vport.
7132  *
7133  * @param dev
7134  *   Pointer to Ethernet device.
7135  * @param txq
7136  *   Txq index.
7137  *
7138  * @return
7139  *   Flow ID on success, 0 otherwise and rte_errno is set.
7140  */
7141 uint32_t
7142 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
7143 {
7144 	struct rte_flow_attr attr = {
7145 		.group = 0,
7146 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7147 		.ingress = 1,
7148 		.egress = 0,
7149 		.transfer = 1,
7150 	};
7151 	struct rte_flow_item_port_id port_spec = {
7152 		.id = MLX5_PORT_ESW_MGR,
7153 	};
7154 	struct mlx5_rte_flow_item_tx_queue txq_spec = {
7155 		.queue = txq,
7156 	};
7157 	struct rte_flow_item pattern[] = {
7158 		{
7159 			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7160 			.spec = &port_spec,
7161 		},
7162 		{
7163 			.type = (enum rte_flow_item_type)
7164 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7165 			.spec = &txq_spec,
7166 		},
7167 		{
7168 			.type = RTE_FLOW_ITEM_TYPE_END,
7169 		},
7170 	};
7171 	struct rte_flow_action_jump jump = {
7172 		.group = 1,
7173 	};
7174 	struct rte_flow_action_port_id port = {
7175 		.id = dev->data->port_id,
7176 	};
7177 	struct rte_flow_action actions[] = {
7178 		{
7179 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7180 			.conf = &jump,
7181 		},
7182 		{
7183 			.type = RTE_FLOW_ACTION_TYPE_END,
7184 		},
7185 	};
7186 	struct rte_flow_error error;
7187 
7188 	/*
7189 	 * Creates group 0, highest priority jump flow.
7190 	 * Matches txq to bypass kernel packets.
7191 	 */
7192 	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7193 			     false, &error) == 0)
7194 		return 0;
7195 	/* Create group 1, lowest priority redirect flow for txq. */
7196 	attr.group = 1;
7197 	actions[0].conf = &port;
7198 	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7199 	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7200 				actions, false, &error);
7201 }
7202 
7203 /**
7204  * Validate a flow supported by the NIC.
7205  *
7206  * @see rte_flow_validate()
7207  * @see rte_flow_ops
7208  */
7209 int
7210 mlx5_flow_validate(struct rte_eth_dev *dev,
7211 		   const struct rte_flow_attr *attr,
7212 		   const struct rte_flow_item items[],
7213 		   const struct rte_flow_action original_actions[],
7214 		   struct rte_flow_error *error)
7215 {
7216 	int hairpin_flow;
7217 	struct mlx5_translated_action_handle
7218 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7219 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7220 	const struct rte_flow_action *actions;
7221 	struct rte_flow_action *translated_actions = NULL;
7222 	int ret = flow_action_handles_translate(dev, original_actions,
7223 						indir_actions,
7224 						&indir_actions_n,
7225 						&translated_actions, error);
7226 
7227 	if (ret)
7228 		return ret;
7229 	actions = translated_actions ? translated_actions : original_actions;
7230 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7231 	ret = flow_drv_validate(dev, attr, items, actions,
7232 				true, hairpin_flow, error);
7233 	rte_free(translated_actions);
7234 	return ret;
7235 }
7236 
7237 /**
7238  * Create a flow.
7239  *
7240  * @see rte_flow_create()
7241  * @see rte_flow_ops
7242  */
7243 struct rte_flow *
7244 mlx5_flow_create(struct rte_eth_dev *dev,
7245 		 const struct rte_flow_attr *attr,
7246 		 const struct rte_flow_item items[],
7247 		 const struct rte_flow_action actions[],
7248 		 struct rte_flow_error *error)
7249 {
7250 	struct mlx5_priv *priv = dev->data->dev_private;
7251 
7252 	if (priv->sh->config.dv_flow_en == 2) {
7253 		rte_flow_error_set(error, ENOTSUP,
7254 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7255 			  NULL,
7256 			  "Flow non-Q creation not supported");
7257 		return NULL;
7258 	}
7259 	/*
7260 	 * If the device is not started yet, it is not allowed to created a
7261 	 * flow from application. PMD default flows and traffic control flows
7262 	 * are not affected.
7263 	 */
7264 	if (unlikely(!dev->data->dev_started)) {
7265 		DRV_LOG(DEBUG, "port %u is not started when "
7266 			"inserting a flow", dev->data->port_id);
7267 		rte_flow_error_set(error, ENODEV,
7268 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7269 				   NULL,
7270 				   "port not started");
7271 		return NULL;
7272 	}
7273 
7274 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7275 						   attr, items, actions,
7276 						   true, error);
7277 }
7278 
7279 /**
7280  * Destroy a flow in a list.
7281  *
7282  * @param dev
7283  *   Pointer to Ethernet device.
7284  * @param[in] flow_idx
7285  *   Index of flow to destroy.
7286  */
7287 static void
7288 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7289 		  uint32_t flow_idx)
7290 {
7291 	struct mlx5_priv *priv = dev->data->dev_private;
7292 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7293 
7294 	if (!flow)
7295 		return;
7296 	MLX5_ASSERT(flow->type == type);
7297 	/*
7298 	 * Update RX queue flags only if port is started, otherwise it is
7299 	 * already clean.
7300 	 */
7301 	if (dev->data->dev_started)
7302 		flow_rxq_flags_trim(dev, flow);
7303 	flow_drv_destroy(dev, flow);
7304 	if (flow->tunnel) {
7305 		struct mlx5_flow_tunnel *tunnel;
7306 
7307 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7308 		RTE_VERIFY(tunnel);
7309 		if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
7310 			mlx5_flow_tunnel_free(dev, tunnel);
7311 	}
7312 	flow_mreg_del_copy_action(dev, flow);
7313 	mlx5_ipool_free(priv->flows[type], flow_idx);
7314 }
7315 
7316 /**
7317  * Destroy all flows.
7318  *
7319  * @param dev
7320  *   Pointer to Ethernet device.
7321  * @param type
7322  *   Flow type to be flushed.
7323  * @param active
7324  *   If flushing is called actively.
7325  */
7326 void
7327 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7328 		     bool active)
7329 {
7330 	struct mlx5_priv *priv = dev->data->dev_private;
7331 	uint32_t num_flushed = 0, fidx = 1;
7332 	struct rte_flow *flow;
7333 
7334 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7335 	if (priv->sh->config.dv_flow_en == 2 &&
7336 	    type == MLX5_FLOW_TYPE_GEN) {
7337 		flow_hw_q_flow_flush(dev, NULL);
7338 		return;
7339 	}
7340 #endif
7341 
7342 	MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7343 		flow_list_destroy(dev, type, fidx);
7344 		num_flushed++;
7345 	}
7346 	if (active) {
7347 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7348 			dev->data->port_id, num_flushed);
7349 	}
7350 }
7351 
7352 /**
7353  * Stop all default actions for flows.
7354  *
7355  * @param dev
7356  *   Pointer to Ethernet device.
7357  */
7358 void
7359 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7360 {
7361 	flow_mreg_del_default_copy_action(dev);
7362 	flow_rxq_flags_clear(dev);
7363 }
7364 
7365 /**
7366  * Start all default actions for flows.
7367  *
7368  * @param dev
7369  *   Pointer to Ethernet device.
7370  * @return
7371  *   0 on success, a negative errno value otherwise and rte_errno is set.
7372  */
7373 int
7374 mlx5_flow_start_default(struct rte_eth_dev *dev)
7375 {
7376 	struct rte_flow_error error;
7377 
7378 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7379 	return flow_mreg_add_default_copy_action(dev, &error);
7380 }
7381 
7382 /**
7383  * Release key of thread specific flow workspace data.
7384  */
7385 void
7386 flow_release_workspace(void *data)
7387 {
7388 	struct mlx5_flow_workspace *wks = data;
7389 	struct mlx5_flow_workspace *next;
7390 
7391 	while (wks) {
7392 		next = wks->next;
7393 		free(wks->rss_desc.queue);
7394 		free(wks);
7395 		wks = next;
7396 	}
7397 }
7398 
7399 /**
7400  * Get thread specific current flow workspace.
7401  *
7402  * @return pointer to thread specific flow workspace data, NULL on error.
7403  */
7404 struct mlx5_flow_workspace*
7405 mlx5_flow_get_thread_workspace(void)
7406 {
7407 	struct mlx5_flow_workspace *data;
7408 
7409 	data = mlx5_flow_os_get_specific_workspace();
7410 	MLX5_ASSERT(data && data->inuse);
7411 	if (!data || !data->inuse)
7412 		DRV_LOG(ERR, "flow workspace not initialized.");
7413 	return data;
7414 }
7415 
7416 /**
7417  * Allocate and init new flow workspace.
7418  *
7419  * @return pointer to flow workspace data, NULL on error.
7420  */
7421 static struct mlx5_flow_workspace*
7422 flow_alloc_thread_workspace(void)
7423 {
7424 	struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
7425 
7426 	if (!data) {
7427 		DRV_LOG(ERR, "Failed to allocate flow workspace "
7428 			"memory.");
7429 		return NULL;
7430 	}
7431 	data->rss_desc.queue = calloc(1,
7432 			sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
7433 	if (!data->rss_desc.queue)
7434 		goto err;
7435 	data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
7436 	return data;
7437 err:
7438 	free(data->rss_desc.queue);
7439 	free(data);
7440 	return NULL;
7441 }
7442 
7443 /**
7444  * Get new thread specific flow workspace.
7445  *
7446  * If current workspace inuse, create new one and set as current.
7447  *
7448  * @return pointer to thread specific flow workspace data, NULL on error.
7449  */
7450 static struct mlx5_flow_workspace*
7451 mlx5_flow_push_thread_workspace(void)
7452 {
7453 	struct mlx5_flow_workspace *curr;
7454 	struct mlx5_flow_workspace *data;
7455 
7456 	curr = mlx5_flow_os_get_specific_workspace();
7457 	if (!curr) {
7458 		data = flow_alloc_thread_workspace();
7459 		if (!data)
7460 			return NULL;
7461 	} else if (!curr->inuse) {
7462 		data = curr;
7463 	} else if (curr->next) {
7464 		data = curr->next;
7465 	} else {
7466 		data = flow_alloc_thread_workspace();
7467 		if (!data)
7468 			return NULL;
7469 		curr->next = data;
7470 		data->prev = curr;
7471 	}
7472 	data->inuse = 1;
7473 	data->flow_idx = 0;
7474 	/* Set as current workspace */
7475 	if (mlx5_flow_os_set_specific_workspace(data))
7476 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7477 	return data;
7478 }
7479 
7480 /**
7481  * Close current thread specific flow workspace.
7482  *
7483  * If previous workspace available, set it as current.
7484  *
7485  * @return pointer to thread specific flow workspace data, NULL on error.
7486  */
7487 static void
7488 mlx5_flow_pop_thread_workspace(void)
7489 {
7490 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
7491 
7492 	if (!data)
7493 		return;
7494 	if (!data->inuse) {
7495 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
7496 		return;
7497 	}
7498 	data->inuse = 0;
7499 	if (!data->prev)
7500 		return;
7501 	if (mlx5_flow_os_set_specific_workspace(data->prev))
7502 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7503 }
7504 
7505 /**
7506  * Verify the flow list is empty
7507  *
7508  * @param dev
7509  *  Pointer to Ethernet device.
7510  *
7511  * @return the number of flows not released.
7512  */
7513 int
7514 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
7515 {
7516 	struct mlx5_priv *priv = dev->data->dev_private;
7517 	struct rte_flow *flow;
7518 	uint32_t idx = 0;
7519 	int ret = 0, i;
7520 
7521 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
7522 		MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
7523 			DRV_LOG(DEBUG, "port %u flow %p still referenced",
7524 				dev->data->port_id, (void *)flow);
7525 			ret++;
7526 		}
7527 	}
7528 	return ret;
7529 }
7530 
7531 /**
7532  * Enable default hairpin egress flow.
7533  *
7534  * @param dev
7535  *   Pointer to Ethernet device.
7536  * @param queue
7537  *   The queue index.
7538  *
7539  * @return
7540  *   0 on success, a negative errno value otherwise and rte_errno is set.
7541  */
7542 int
7543 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7544 			    uint32_t queue)
7545 {
7546 	const struct rte_flow_attr attr = {
7547 		.egress = 1,
7548 		.priority = 0,
7549 	};
7550 	struct mlx5_rte_flow_item_tx_queue queue_spec = {
7551 		.queue = queue,
7552 	};
7553 	struct mlx5_rte_flow_item_tx_queue queue_mask = {
7554 		.queue = UINT32_MAX,
7555 	};
7556 	struct rte_flow_item items[] = {
7557 		{
7558 			.type = (enum rte_flow_item_type)
7559 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7560 			.spec = &queue_spec,
7561 			.last = NULL,
7562 			.mask = &queue_mask,
7563 		},
7564 		{
7565 			.type = RTE_FLOW_ITEM_TYPE_END,
7566 		},
7567 	};
7568 	struct rte_flow_action_jump jump = {
7569 		.group = MLX5_HAIRPIN_TX_TABLE,
7570 	};
7571 	struct rte_flow_action actions[2];
7572 	uint32_t flow_idx;
7573 	struct rte_flow_error error;
7574 
7575 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7576 	actions[0].conf = &jump;
7577 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7578 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7579 				    &attr, items, actions, false, &error);
7580 	if (!flow_idx) {
7581 		DRV_LOG(DEBUG,
7582 			"Failed to create ctrl flow: rte_errno(%d),"
7583 			" type(%d), message(%s)",
7584 			rte_errno, error.type,
7585 			error.message ? error.message : " (no stated reason)");
7586 		return -rte_errno;
7587 	}
7588 	return 0;
7589 }
7590 
7591 /**
7592  * Enable a control flow configured from the control plane.
7593  *
7594  * @param dev
7595  *   Pointer to Ethernet device.
7596  * @param eth_spec
7597  *   An Ethernet flow spec to apply.
7598  * @param eth_mask
7599  *   An Ethernet flow mask to apply.
7600  * @param vlan_spec
7601  *   A VLAN flow spec to apply.
7602  * @param vlan_mask
7603  *   A VLAN flow mask to apply.
7604  *
7605  * @return
7606  *   0 on success, a negative errno value otherwise and rte_errno is set.
7607  */
7608 int
7609 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7610 		    struct rte_flow_item_eth *eth_spec,
7611 		    struct rte_flow_item_eth *eth_mask,
7612 		    struct rte_flow_item_vlan *vlan_spec,
7613 		    struct rte_flow_item_vlan *vlan_mask)
7614 {
7615 	struct mlx5_priv *priv = dev->data->dev_private;
7616 	const struct rte_flow_attr attr = {
7617 		.ingress = 1,
7618 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7619 	};
7620 	struct rte_flow_item items[] = {
7621 		{
7622 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7623 			.spec = eth_spec,
7624 			.last = NULL,
7625 			.mask = eth_mask,
7626 		},
7627 		{
7628 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7629 					      RTE_FLOW_ITEM_TYPE_END,
7630 			.spec = vlan_spec,
7631 			.last = NULL,
7632 			.mask = vlan_mask,
7633 		},
7634 		{
7635 			.type = RTE_FLOW_ITEM_TYPE_END,
7636 		},
7637 	};
7638 	uint16_t queue[priv->reta_idx_n];
7639 	struct rte_flow_action_rss action_rss = {
7640 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7641 		.level = 0,
7642 		.types = priv->rss_conf.rss_hf,
7643 		.key_len = priv->rss_conf.rss_key_len,
7644 		.queue_num = priv->reta_idx_n,
7645 		.key = priv->rss_conf.rss_key,
7646 		.queue = queue,
7647 	};
7648 	struct rte_flow_action actions[] = {
7649 		{
7650 			.type = RTE_FLOW_ACTION_TYPE_RSS,
7651 			.conf = &action_rss,
7652 		},
7653 		{
7654 			.type = RTE_FLOW_ACTION_TYPE_END,
7655 		},
7656 	};
7657 	uint32_t flow_idx;
7658 	struct rte_flow_error error;
7659 	unsigned int i;
7660 
7661 	if (!priv->reta_idx_n || !priv->rxqs_n) {
7662 		return 0;
7663 	}
7664 	if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7665 		action_rss.types = 0;
7666 	for (i = 0; i != priv->reta_idx_n; ++i)
7667 		queue[i] = (*priv->reta_idx)[i];
7668 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7669 				    &attr, items, actions, false, &error);
7670 	if (!flow_idx)
7671 		return -rte_errno;
7672 	return 0;
7673 }
7674 
7675 /**
7676  * Enable a flow control configured from the control plane.
7677  *
7678  * @param dev
7679  *   Pointer to Ethernet device.
7680  * @param eth_spec
7681  *   An Ethernet flow spec to apply.
7682  * @param eth_mask
7683  *   An Ethernet flow mask to apply.
7684  *
7685  * @return
7686  *   0 on success, a negative errno value otherwise and rte_errno is set.
7687  */
7688 int
7689 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7690 	       struct rte_flow_item_eth *eth_spec,
7691 	       struct rte_flow_item_eth *eth_mask)
7692 {
7693 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7694 }
7695 
7696 /**
7697  * Create default miss flow rule matching lacp traffic
7698  *
7699  * @param dev
7700  *   Pointer to Ethernet device.
7701  * @param eth_spec
7702  *   An Ethernet flow spec to apply.
7703  *
7704  * @return
7705  *   0 on success, a negative errno value otherwise and rte_errno is set.
7706  */
7707 int
7708 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7709 {
7710 	/*
7711 	 * The LACP matching is done by only using ether type since using
7712 	 * a multicast dst mac causes kernel to give low priority to this flow.
7713 	 */
7714 	static const struct rte_flow_item_eth lacp_spec = {
7715 		.type = RTE_BE16(0x8809),
7716 	};
7717 	static const struct rte_flow_item_eth lacp_mask = {
7718 		.type = 0xffff,
7719 	};
7720 	const struct rte_flow_attr attr = {
7721 		.ingress = 1,
7722 	};
7723 	struct rte_flow_item items[] = {
7724 		{
7725 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7726 			.spec = &lacp_spec,
7727 			.mask = &lacp_mask,
7728 		},
7729 		{
7730 			.type = RTE_FLOW_ITEM_TYPE_END,
7731 		},
7732 	};
7733 	struct rte_flow_action actions[] = {
7734 		{
7735 			.type = (enum rte_flow_action_type)
7736 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7737 		},
7738 		{
7739 			.type = RTE_FLOW_ACTION_TYPE_END,
7740 		},
7741 	};
7742 	struct rte_flow_error error;
7743 	uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7744 					&attr, items, actions,
7745 					false, &error);
7746 
7747 	if (!flow_idx)
7748 		return -rte_errno;
7749 	return 0;
7750 }
7751 
7752 /**
7753  * Destroy a flow.
7754  *
7755  * @see rte_flow_destroy()
7756  * @see rte_flow_ops
7757  */
7758 int
7759 mlx5_flow_destroy(struct rte_eth_dev *dev,
7760 		  struct rte_flow *flow,
7761 		  struct rte_flow_error *error __rte_unused)
7762 {
7763 	struct mlx5_priv *priv = dev->data->dev_private;
7764 
7765 	if (priv->sh->config.dv_flow_en == 2)
7766 		return rte_flow_error_set(error, ENOTSUP,
7767 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7768 			  NULL,
7769 			  "Flow non-Q destruction not supported");
7770 	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7771 				(uintptr_t)(void *)flow);
7772 	return 0;
7773 }
7774 
7775 /**
7776  * Destroy all flows.
7777  *
7778  * @see rte_flow_flush()
7779  * @see rte_flow_ops
7780  */
7781 int
7782 mlx5_flow_flush(struct rte_eth_dev *dev,
7783 		struct rte_flow_error *error __rte_unused)
7784 {
7785 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7786 	return 0;
7787 }
7788 
7789 /**
7790  * Isolated mode.
7791  *
7792  * @see rte_flow_isolate()
7793  * @see rte_flow_ops
7794  */
7795 int
7796 mlx5_flow_isolate(struct rte_eth_dev *dev,
7797 		  int enable,
7798 		  struct rte_flow_error *error)
7799 {
7800 	struct mlx5_priv *priv = dev->data->dev_private;
7801 
7802 	if (dev->data->dev_started) {
7803 		rte_flow_error_set(error, EBUSY,
7804 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7805 				   NULL,
7806 				   "port must be stopped first");
7807 		return -rte_errno;
7808 	}
7809 	priv->isolated = !!enable;
7810 	if (enable)
7811 		dev->dev_ops = &mlx5_dev_ops_isolate;
7812 	else
7813 		dev->dev_ops = &mlx5_dev_ops;
7814 
7815 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7816 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7817 
7818 	return 0;
7819 }
7820 
7821 /**
7822  * Query a flow.
7823  *
7824  * @see rte_flow_query()
7825  * @see rte_flow_ops
7826  */
7827 static int
7828 flow_drv_query(struct rte_eth_dev *dev,
7829 	       uint32_t flow_idx,
7830 	       const struct rte_flow_action *actions,
7831 	       void *data,
7832 	       struct rte_flow_error *error)
7833 {
7834 	struct mlx5_priv *priv = dev->data->dev_private;
7835 	const struct mlx5_flow_driver_ops *fops;
7836 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7837 					       flow_idx);
7838 	enum mlx5_flow_drv_type ftype;
7839 
7840 	if (!flow) {
7841 		return rte_flow_error_set(error, ENOENT,
7842 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7843 			  NULL,
7844 			  "invalid flow handle");
7845 	}
7846 	ftype = flow->drv_type;
7847 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7848 	fops = flow_get_drv_ops(ftype);
7849 
7850 	return fops->query(dev, flow, actions, data, error);
7851 }
7852 
7853 /**
7854  * Query a flow.
7855  *
7856  * @see rte_flow_query()
7857  * @see rte_flow_ops
7858  */
7859 int
7860 mlx5_flow_query(struct rte_eth_dev *dev,
7861 		struct rte_flow *flow,
7862 		const struct rte_flow_action *actions,
7863 		void *data,
7864 		struct rte_flow_error *error)
7865 {
7866 	int ret;
7867 	struct mlx5_priv *priv = dev->data->dev_private;
7868 
7869 	if (priv->sh->config.dv_flow_en == 2)
7870 		return rte_flow_error_set(error, ENOTSUP,
7871 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7872 			  NULL,
7873 			  "Flow non-Q query not supported");
7874 	ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7875 			     error);
7876 	if (ret < 0)
7877 		return ret;
7878 	return 0;
7879 }
7880 
7881 /**
7882  * Get rte_flow callbacks.
7883  *
7884  * @param dev
7885  *   Pointer to Ethernet device structure.
7886  * @param ops
7887  *   Pointer to operation-specific structure.
7888  *
7889  * @return 0
7890  */
7891 int
7892 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7893 		  const struct rte_flow_ops **ops)
7894 {
7895 	*ops = &mlx5_flow_ops;
7896 	return 0;
7897 }
7898 
7899 /**
7900  * Validate meter policy actions.
7901  * Dispatcher for action type specific validation.
7902  *
7903  * @param[in] dev
7904  *   Pointer to the Ethernet device structure.
7905  * @param[in] action
7906  *   The meter policy action object to validate.
7907  * @param[in] attr
7908  *   Attributes of flow to determine steering domain.
7909  * @param[out] is_rss
7910  *   Is RSS or not.
7911  * @param[out] domain_bitmap
7912  *   Domain bitmap.
7913  * @param[out] is_def_policy
7914  *   Is default policy or not.
7915  * @param[out] error
7916  *   Perform verbose error reporting if not NULL. Initialized in case of
7917  *   error only.
7918  *
7919  * @return
7920  *   0 on success, otherwise negative errno value.
7921  */
7922 int
7923 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7924 			const struct rte_flow_action *actions[RTE_COLORS],
7925 			struct rte_flow_attr *attr,
7926 			bool *is_rss,
7927 			uint8_t *domain_bitmap,
7928 			uint8_t *policy_mode,
7929 			struct rte_mtr_error *error)
7930 {
7931 	const struct mlx5_flow_driver_ops *fops;
7932 
7933 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7934 	return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7935 				       domain_bitmap, policy_mode, error);
7936 }
7937 
7938 /**
7939  * Destroy the meter table set.
7940  *
7941  * @param[in] dev
7942  *   Pointer to Ethernet device.
7943  * @param[in] mtr_policy
7944  *   Meter policy struct.
7945  */
7946 void
7947 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7948 		      struct mlx5_flow_meter_policy *mtr_policy)
7949 {
7950 	const struct mlx5_flow_driver_ops *fops;
7951 
7952 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7953 	fops->destroy_mtr_acts(dev, mtr_policy);
7954 }
7955 
7956 /**
7957  * Create policy action, lock free,
7958  * (mutex should be acquired by caller).
7959  * Dispatcher for action type specific call.
7960  *
7961  * @param[in] dev
7962  *   Pointer to the Ethernet device structure.
7963  * @param[in] mtr_policy
7964  *   Meter policy struct.
7965  * @param[in] action
7966  *   Action specification used to create meter actions.
7967  * @param[out] error
7968  *   Perform verbose error reporting if not NULL. Initialized in case of
7969  *   error only.
7970  *
7971  * @return
7972  *   0 on success, otherwise negative errno value.
7973  */
7974 int
7975 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7976 		      struct mlx5_flow_meter_policy *mtr_policy,
7977 		      const struct rte_flow_action *actions[RTE_COLORS],
7978 		      struct rte_mtr_error *error)
7979 {
7980 	const struct mlx5_flow_driver_ops *fops;
7981 
7982 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7983 	return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7984 }
7985 
7986 /**
7987  * Create policy rules, lock free,
7988  * (mutex should be acquired by caller).
7989  * Dispatcher for action type specific call.
7990  *
7991  * @param[in] dev
7992  *   Pointer to the Ethernet device structure.
7993  * @param[in] mtr_policy
7994  *   Meter policy struct.
7995  *
7996  * @return
7997  *   0 on success, -1 otherwise.
7998  */
7999 int
8000 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
8001 			     struct mlx5_flow_meter_policy *mtr_policy)
8002 {
8003 	const struct mlx5_flow_driver_ops *fops;
8004 
8005 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8006 	return fops->create_policy_rules(dev, mtr_policy);
8007 }
8008 
8009 /**
8010  * Destroy policy rules, lock free,
8011  * (mutex should be acquired by caller).
8012  * Dispatcher for action type specific call.
8013  *
8014  * @param[in] dev
8015  *   Pointer to the Ethernet device structure.
8016  * @param[in] mtr_policy
8017  *   Meter policy struct.
8018  */
8019 void
8020 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
8021 			     struct mlx5_flow_meter_policy *mtr_policy)
8022 {
8023 	const struct mlx5_flow_driver_ops *fops;
8024 
8025 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8026 	fops->destroy_policy_rules(dev, mtr_policy);
8027 }
8028 
8029 /**
8030  * Destroy the default policy table set.
8031  *
8032  * @param[in] dev
8033  *   Pointer to Ethernet device.
8034  */
8035 void
8036 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
8037 {
8038 	const struct mlx5_flow_driver_ops *fops;
8039 
8040 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8041 	fops->destroy_def_policy(dev);
8042 }
8043 
8044 /**
8045  * Destroy the default policy table set.
8046  *
8047  * @param[in] dev
8048  *   Pointer to Ethernet device.
8049  *
8050  * @return
8051  *   0 on success, -1 otherwise.
8052  */
8053 int
8054 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
8055 {
8056 	const struct mlx5_flow_driver_ops *fops;
8057 
8058 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8059 	return fops->create_def_policy(dev);
8060 }
8061 
8062 /**
8063  * Create the needed meter and suffix tables.
8064  *
8065  * @param[in] dev
8066  *   Pointer to Ethernet device.
8067  *
8068  * @return
8069  *   0 on success, -1 otherwise.
8070  */
8071 int
8072 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
8073 			struct mlx5_flow_meter_info *fm,
8074 			uint32_t mtr_idx,
8075 			uint8_t domain_bitmap)
8076 {
8077 	const struct mlx5_flow_driver_ops *fops;
8078 
8079 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8080 	return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
8081 }
8082 
8083 /**
8084  * Destroy the meter table set.
8085  *
8086  * @param[in] dev
8087  *   Pointer to Ethernet device.
8088  * @param[in] tbl
8089  *   Pointer to the meter table set.
8090  */
8091 void
8092 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8093 			   struct mlx5_flow_meter_info *fm)
8094 {
8095 	const struct mlx5_flow_driver_ops *fops;
8096 
8097 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8098 	fops->destroy_mtr_tbls(dev, fm);
8099 }
8100 
8101 /**
8102  * Destroy the global meter drop table.
8103  *
8104  * @param[in] dev
8105  *   Pointer to Ethernet device.
8106  */
8107 void
8108 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8109 {
8110 	const struct mlx5_flow_driver_ops *fops;
8111 
8112 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8113 	fops->destroy_mtr_drop_tbls(dev);
8114 }
8115 
8116 /**
8117  * Destroy the sub policy table with RX queue.
8118  *
8119  * @param[in] dev
8120  *   Pointer to Ethernet device.
8121  * @param[in] mtr_policy
8122  *   Pointer to meter policy table.
8123  */
8124 void
8125 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8126 		struct mlx5_flow_meter_policy *mtr_policy)
8127 {
8128 	const struct mlx5_flow_driver_ops *fops;
8129 
8130 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8131 	fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8132 }
8133 
8134 /**
8135  * Allocate the needed aso flow meter id.
8136  *
8137  * @param[in] dev
8138  *   Pointer to Ethernet device.
8139  *
8140  * @return
8141  *   Index to aso flow meter on success, NULL otherwise.
8142  */
8143 uint32_t
8144 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8145 {
8146 	const struct mlx5_flow_driver_ops *fops;
8147 
8148 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8149 	return fops->create_meter(dev);
8150 }
8151 
8152 /**
8153  * Free the aso flow meter id.
8154  *
8155  * @param[in] dev
8156  *   Pointer to Ethernet device.
8157  * @param[in] mtr_idx
8158  *  Index to aso flow meter to be free.
8159  *
8160  * @return
8161  *   0 on success.
8162  */
8163 void
8164 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8165 {
8166 	const struct mlx5_flow_driver_ops *fops;
8167 
8168 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8169 	fops->free_meter(dev, mtr_idx);
8170 }
8171 
8172 /**
8173  * Allocate a counter.
8174  *
8175  * @param[in] dev
8176  *   Pointer to Ethernet device structure.
8177  *
8178  * @return
8179  *   Index to allocated counter  on success, 0 otherwise.
8180  */
8181 uint32_t
8182 mlx5_counter_alloc(struct rte_eth_dev *dev)
8183 {
8184 	const struct mlx5_flow_driver_ops *fops;
8185 	struct rte_flow_attr attr = { .transfer = 0 };
8186 
8187 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8188 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8189 		return fops->counter_alloc(dev);
8190 	}
8191 	DRV_LOG(ERR,
8192 		"port %u counter allocate is not supported.",
8193 		 dev->data->port_id);
8194 	return 0;
8195 }
8196 
8197 /**
8198  * Free a counter.
8199  *
8200  * @param[in] dev
8201  *   Pointer to Ethernet device structure.
8202  * @param[in] cnt
8203  *   Index to counter to be free.
8204  */
8205 void
8206 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8207 {
8208 	const struct mlx5_flow_driver_ops *fops;
8209 	struct rte_flow_attr attr = { .transfer = 0 };
8210 
8211 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8212 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8213 		fops->counter_free(dev, cnt);
8214 		return;
8215 	}
8216 	DRV_LOG(ERR,
8217 		"port %u counter free is not supported.",
8218 		 dev->data->port_id);
8219 }
8220 
8221 /**
8222  * Query counter statistics.
8223  *
8224  * @param[in] dev
8225  *   Pointer to Ethernet device structure.
8226  * @param[in] cnt
8227  *   Index to counter to query.
8228  * @param[in] clear
8229  *   Set to clear counter statistics.
8230  * @param[out] pkts
8231  *   The counter hits packets number to save.
8232  * @param[out] bytes
8233  *   The counter hits bytes number to save.
8234  *
8235  * @return
8236  *   0 on success, a negative errno value otherwise.
8237  */
8238 int
8239 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8240 		   bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8241 {
8242 	const struct mlx5_flow_driver_ops *fops;
8243 	struct rte_flow_attr attr = { .transfer = 0 };
8244 
8245 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8246 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8247 		return fops->counter_query(dev, cnt, clear, pkts,
8248 					bytes, action);
8249 	}
8250 	DRV_LOG(ERR,
8251 		"port %u counter query is not supported.",
8252 		 dev->data->port_id);
8253 	return -ENOTSUP;
8254 }
8255 
8256 /**
8257  * Get information about HWS pre-configurable resources.
8258  *
8259  * @param[in] dev
8260  *   Pointer to the rte_eth_dev structure.
8261  * @param[out] port_info
8262  *   Pointer to port information.
8263  * @param[out] queue_info
8264  *   Pointer to queue information.
8265  * @param[out] error
8266  *   Pointer to error structure.
8267  *
8268  * @return
8269  *   0 on success, a negative errno value otherwise and rte_errno is set.
8270  */
8271 static int
8272 mlx5_flow_info_get(struct rte_eth_dev *dev,
8273 		   struct rte_flow_port_info *port_info,
8274 		   struct rte_flow_queue_info *queue_info,
8275 		   struct rte_flow_error *error)
8276 {
8277 	const struct mlx5_flow_driver_ops *fops;
8278 
8279 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8280 		return rte_flow_error_set(error, ENOTSUP,
8281 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8282 				NULL,
8283 				"info get with incorrect steering mode");
8284 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8285 	return fops->info_get(dev, port_info, queue_info, error);
8286 }
8287 
8288 /**
8289  * Configure port HWS resources.
8290  *
8291  * @param[in] dev
8292  *   Pointer to the rte_eth_dev structure.
8293  * @param[in] port_attr
8294  *   Port configuration attributes.
8295  * @param[in] nb_queue
8296  *   Number of queue.
8297  * @param[in] queue_attr
8298  *   Array that holds attributes for each flow queue.
8299  * @param[out] error
8300  *   Pointer to error structure.
8301  *
8302  * @return
8303  *   0 on success, a negative errno value otherwise and rte_errno is set.
8304  */
8305 static int
8306 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8307 			 const struct rte_flow_port_attr *port_attr,
8308 			 uint16_t nb_queue,
8309 			 const struct rte_flow_queue_attr *queue_attr[],
8310 			 struct rte_flow_error *error)
8311 {
8312 	const struct mlx5_flow_driver_ops *fops;
8313 
8314 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8315 		return rte_flow_error_set(error, ENOTSUP,
8316 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8317 				NULL,
8318 				"port configure with incorrect steering mode");
8319 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8320 	return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
8321 }
8322 
8323 /**
8324  * Create flow item template.
8325  *
8326  * @param[in] dev
8327  *   Pointer to the rte_eth_dev structure.
8328  * @param[in] attr
8329  *   Pointer to the item template attributes.
8330  * @param[in] items
8331  *   The template item pattern.
8332  * @param[out] error
8333  *   Pointer to error structure.
8334  *
8335  * @return
8336  *   0 on success, a negative errno value otherwise and rte_errno is set.
8337  */
8338 static struct rte_flow_pattern_template *
8339 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
8340 		const struct rte_flow_pattern_template_attr *attr,
8341 		const struct rte_flow_item items[],
8342 		struct rte_flow_error *error)
8343 {
8344 	const struct mlx5_flow_driver_ops *fops;
8345 
8346 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8347 		rte_flow_error_set(error, ENOTSUP,
8348 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8349 				NULL,
8350 				"pattern create with incorrect steering mode");
8351 		return NULL;
8352 	}
8353 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8354 	return fops->pattern_template_create(dev, attr, items, error);
8355 }
8356 
8357 /**
8358  * Destroy flow item template.
8359  *
8360  * @param[in] dev
8361  *   Pointer to the rte_eth_dev structure.
8362  * @param[in] template
8363  *   Pointer to the item template to be destroyed.
8364  * @param[out] error
8365  *   Pointer to error structure.
8366  *
8367  * @return
8368  *   0 on success, a negative errno value otherwise and rte_errno is set.
8369  */
8370 static int
8371 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
8372 				   struct rte_flow_pattern_template *template,
8373 				   struct rte_flow_error *error)
8374 {
8375 	const struct mlx5_flow_driver_ops *fops;
8376 
8377 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8378 		return rte_flow_error_set(error, ENOTSUP,
8379 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8380 				NULL,
8381 				"pattern destroy with incorrect steering mode");
8382 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8383 	return fops->pattern_template_destroy(dev, template, error);
8384 }
8385 
8386 /**
8387  * Create flow item template.
8388  *
8389  * @param[in] dev
8390  *   Pointer to the rte_eth_dev structure.
8391  * @param[in] attr
8392  *   Pointer to the action template attributes.
8393  * @param[in] actions
8394  *   Associated actions (list terminated by the END action).
8395  * @param[in] masks
8396  *   List of actions that marks which of the action's member is constant.
8397  * @param[out] error
8398  *   Pointer to error structure.
8399  *
8400  * @return
8401  *   0 on success, a negative errno value otherwise and rte_errno is set.
8402  */
8403 static struct rte_flow_actions_template *
8404 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
8405 			const struct rte_flow_actions_template_attr *attr,
8406 			const struct rte_flow_action actions[],
8407 			const struct rte_flow_action masks[],
8408 			struct rte_flow_error *error)
8409 {
8410 	const struct mlx5_flow_driver_ops *fops;
8411 
8412 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8413 		rte_flow_error_set(error, ENOTSUP,
8414 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8415 				NULL,
8416 				"action create with incorrect steering mode");
8417 		return NULL;
8418 	}
8419 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8420 	return fops->actions_template_create(dev, attr, actions, masks, error);
8421 }
8422 
8423 /**
8424  * Destroy flow action template.
8425  *
8426  * @param[in] dev
8427  *   Pointer to the rte_eth_dev structure.
8428  * @param[in] template
8429  *   Pointer to the action template to be destroyed.
8430  * @param[out] error
8431  *   Pointer to error structure.
8432  *
8433  * @return
8434  *   0 on success, a negative errno value otherwise and rte_errno is set.
8435  */
8436 static int
8437 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
8438 				   struct rte_flow_actions_template *template,
8439 				   struct rte_flow_error *error)
8440 {
8441 	const struct mlx5_flow_driver_ops *fops;
8442 
8443 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8444 		return rte_flow_error_set(error, ENOTSUP,
8445 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8446 				NULL,
8447 				"action destroy with incorrect steering mode");
8448 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8449 	return fops->actions_template_destroy(dev, template, error);
8450 }
8451 
8452 /**
8453  * Create flow table.
8454  *
8455  * @param[in] dev
8456  *   Pointer to the rte_eth_dev structure.
8457  * @param[in] attr
8458  *   Pointer to the table attributes.
8459  * @param[in] item_templates
8460  *   Item template array to be binded to the table.
8461  * @param[in] nb_item_templates
8462  *   Number of item template.
8463  * @param[in] action_templates
8464  *   Action template array to be binded to the table.
8465  * @param[in] nb_action_templates
8466  *   Number of action template.
8467  * @param[out] error
8468  *   Pointer to error structure.
8469  *
8470  * @return
8471  *    Table on success, NULL otherwise and rte_errno is set.
8472  */
8473 static struct rte_flow_template_table *
8474 mlx5_flow_table_create(struct rte_eth_dev *dev,
8475 		       const struct rte_flow_template_table_attr *attr,
8476 		       struct rte_flow_pattern_template *item_templates[],
8477 		       uint8_t nb_item_templates,
8478 		       struct rte_flow_actions_template *action_templates[],
8479 		       uint8_t nb_action_templates,
8480 		       struct rte_flow_error *error)
8481 {
8482 	const struct mlx5_flow_driver_ops *fops;
8483 
8484 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8485 		rte_flow_error_set(error, ENOTSUP,
8486 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8487 				NULL,
8488 				"table create with incorrect steering mode");
8489 		return NULL;
8490 	}
8491 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8492 	return fops->template_table_create(dev,
8493 					   attr,
8494 					   item_templates,
8495 					   nb_item_templates,
8496 					   action_templates,
8497 					   nb_action_templates,
8498 					   error);
8499 }
8500 
8501 /**
8502  * PMD destroy flow table.
8503  *
8504  * @param[in] dev
8505  *   Pointer to the rte_eth_dev structure.
8506  * @param[in] table
8507  *   Pointer to the table to be destroyed.
8508  * @param[out] error
8509  *   Pointer to error structure.
8510  *
8511  * @return
8512  *   0 on success, a negative errno value otherwise and rte_errno is set.
8513  */
8514 static int
8515 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
8516 			struct rte_flow_template_table *table,
8517 			struct rte_flow_error *error)
8518 {
8519 	const struct mlx5_flow_driver_ops *fops;
8520 
8521 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8522 		return rte_flow_error_set(error, ENOTSUP,
8523 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8524 				NULL,
8525 				"table destroy with incorrect steering mode");
8526 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8527 	return fops->template_table_destroy(dev, table, error);
8528 }
8529 
8530 /**
8531  * Enqueue flow creation.
8532  *
8533  * @param[in] dev
8534  *   Pointer to the rte_eth_dev structure.
8535  * @param[in] queue_id
8536  *   The queue to create the flow.
8537  * @param[in] attr
8538  *   Pointer to the flow operation attributes.
8539  * @param[in] items
8540  *   Items with flow spec value.
8541  * @param[in] pattern_template_index
8542  *   The item pattern flow follows from the table.
8543  * @param[in] actions
8544  *   Action with flow spec value.
8545  * @param[in] action_template_index
8546  *   The action pattern flow follows from the table.
8547  * @param[in] user_data
8548  *   Pointer to the user_data.
8549  * @param[out] error
8550  *   Pointer to error structure.
8551  *
8552  * @return
8553  *    Flow pointer on success, NULL otherwise and rte_errno is set.
8554  */
8555 static struct rte_flow *
8556 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
8557 			    uint32_t queue_id,
8558 			    const struct rte_flow_op_attr *attr,
8559 			    struct rte_flow_template_table *table,
8560 			    const struct rte_flow_item items[],
8561 			    uint8_t pattern_template_index,
8562 			    const struct rte_flow_action actions[],
8563 			    uint8_t action_template_index,
8564 			    void *user_data,
8565 			    struct rte_flow_error *error)
8566 {
8567 	const struct mlx5_flow_driver_ops *fops;
8568 
8569 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8570 		rte_flow_error_set(error, ENOTSUP,
8571 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8572 				NULL,
8573 				"flow_q create with incorrect steering mode");
8574 		return NULL;
8575 	}
8576 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8577 	return fops->async_flow_create(dev, queue_id, attr, table,
8578 				       items, pattern_template_index,
8579 				       actions, action_template_index,
8580 				       user_data, error);
8581 }
8582 
8583 /**
8584  * Enqueue flow destruction.
8585  *
8586  * @param[in] dev
8587  *   Pointer to the rte_eth_dev structure.
8588  * @param[in] queue
8589  *   The queue to destroy the flow.
8590  * @param[in] attr
8591  *   Pointer to the flow operation attributes.
8592  * @param[in] flow
8593  *   Pointer to the flow to be destroyed.
8594  * @param[in] user_data
8595  *   Pointer to the user_data.
8596  * @param[out] error
8597  *   Pointer to error structure.
8598  *
8599  * @return
8600  *    0 on success, negative value otherwise and rte_errno is set.
8601  */
8602 static int
8603 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
8604 			     uint32_t queue,
8605 			     const struct rte_flow_op_attr *attr,
8606 			     struct rte_flow *flow,
8607 			     void *user_data,
8608 			     struct rte_flow_error *error)
8609 {
8610 	const struct mlx5_flow_driver_ops *fops;
8611 
8612 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8613 		return rte_flow_error_set(error, ENOTSUP,
8614 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8615 				NULL,
8616 				"flow_q destroy with incorrect steering mode");
8617 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8618 	return fops->async_flow_destroy(dev, queue, attr, flow,
8619 					user_data, error);
8620 }
8621 
8622 /**
8623  * Pull the enqueued flows.
8624  *
8625  * @param[in] dev
8626  *   Pointer to the rte_eth_dev structure.
8627  * @param[in] queue
8628  *   The queue to pull the result.
8629  * @param[in/out] res
8630  *   Array to save the results.
8631  * @param[in] n_res
8632  *   Available result with the array.
8633  * @param[out] error
8634  *   Pointer to error structure.
8635  *
8636  * @return
8637  *    Result number on success, negative value otherwise and rte_errno is set.
8638  */
8639 static int
8640 mlx5_flow_pull(struct rte_eth_dev *dev,
8641 	       uint32_t queue,
8642 	       struct rte_flow_op_result res[],
8643 	       uint16_t n_res,
8644 	       struct rte_flow_error *error)
8645 {
8646 	const struct mlx5_flow_driver_ops *fops;
8647 
8648 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8649 		return rte_flow_error_set(error, ENOTSUP,
8650 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8651 				NULL,
8652 				"flow_q pull with incorrect steering mode");
8653 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8654 	return fops->pull(dev, queue, res, n_res, error);
8655 }
8656 
8657 /**
8658  * Push the enqueued flows.
8659  *
8660  * @param[in] dev
8661  *   Pointer to the rte_eth_dev structure.
8662  * @param[in] queue
8663  *   The queue to push the flows.
8664  * @param[out] error
8665  *   Pointer to error structure.
8666  *
8667  * @return
8668  *    0 on success, negative value otherwise and rte_errno is set.
8669  */
8670 static int
8671 mlx5_flow_push(struct rte_eth_dev *dev,
8672 	       uint32_t queue,
8673 	       struct rte_flow_error *error)
8674 {
8675 	const struct mlx5_flow_driver_ops *fops;
8676 
8677 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8678 		return rte_flow_error_set(error, ENOTSUP,
8679 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8680 				NULL,
8681 				"flow_q push with incorrect steering mode");
8682 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8683 	return fops->push(dev, queue, error);
8684 }
8685 
8686 /**
8687  * Create shared action.
8688  *
8689  * @param[in] dev
8690  *   Pointer to the rte_eth_dev structure.
8691  * @param[in] queue
8692  *   Which queue to be used..
8693  * @param[in] attr
8694  *   Operation attribute.
8695  * @param[in] conf
8696  *   Indirect action configuration.
8697  * @param[in] action
8698  *   rte_flow action detail.
8699  * @param[in] user_data
8700  *   Pointer to the user_data.
8701  * @param[out] error
8702  *   Pointer to error structure.
8703  *
8704  * @return
8705  *   Action handle on success, NULL otherwise and rte_errno is set.
8706  */
8707 static struct rte_flow_action_handle *
8708 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
8709 				 const struct rte_flow_op_attr *attr,
8710 				 const struct rte_flow_indir_action_conf *conf,
8711 				 const struct rte_flow_action *action,
8712 				 void *user_data,
8713 				 struct rte_flow_error *error)
8714 {
8715 	const struct mlx5_flow_driver_ops *fops =
8716 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8717 
8718 	return fops->async_action_create(dev, queue, attr, conf, action,
8719 					 user_data, error);
8720 }
8721 
8722 /**
8723  * Update shared action.
8724  *
8725  * @param[in] dev
8726  *   Pointer to the rte_eth_dev structure.
8727  * @param[in] queue
8728  *   Which queue to be used..
8729  * @param[in] attr
8730  *   Operation attribute.
8731  * @param[in] handle
8732  *   Action handle to be updated.
8733  * @param[in] update
8734  *   Update value.
8735  * @param[in] user_data
8736  *   Pointer to the user_data.
8737  * @param[out] error
8738  *   Pointer to error structure.
8739  *
8740  * @return
8741  *   0 on success, negative value otherwise and rte_errno is set.
8742  */
8743 static int
8744 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
8745 				     const struct rte_flow_op_attr *attr,
8746 				     struct rte_flow_action_handle *handle,
8747 				     const void *update,
8748 				     void *user_data,
8749 				     struct rte_flow_error *error)
8750 {
8751 	const struct mlx5_flow_driver_ops *fops =
8752 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8753 
8754 	return fops->async_action_update(dev, queue, attr, handle,
8755 					 update, user_data, error);
8756 }
8757 
8758 /**
8759  * Destroy shared action.
8760  *
8761  * @param[in] dev
8762  *   Pointer to the rte_eth_dev structure.
8763  * @param[in] queue
8764  *   Which queue to be used..
8765  * @param[in] attr
8766  *   Operation attribute.
8767  * @param[in] handle
8768  *   Action handle to be destroyed.
8769  * @param[in] user_data
8770  *   Pointer to the user_data.
8771  * @param[out] error
8772  *   Pointer to error structure.
8773  *
8774  * @return
8775  *   0 on success, negative value otherwise and rte_errno is set.
8776  */
8777 static int
8778 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
8779 				      const struct rte_flow_op_attr *attr,
8780 				      struct rte_flow_action_handle *handle,
8781 				      void *user_data,
8782 				      struct rte_flow_error *error)
8783 {
8784 	const struct mlx5_flow_driver_ops *fops =
8785 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8786 
8787 	return fops->async_action_destroy(dev, queue, attr, handle,
8788 					  user_data, error);
8789 }
8790 
8791 /**
8792  * Allocate a new memory for the counter values wrapped by all the needed
8793  * management.
8794  *
8795  * @param[in] sh
8796  *   Pointer to mlx5_dev_ctx_shared object.
8797  *
8798  * @return
8799  *   0 on success, a negative errno value otherwise.
8800  */
8801 static int
8802 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
8803 {
8804 	struct mlx5_counter_stats_mem_mng *mem_mng;
8805 	volatile struct flow_counter_stats *raw_data;
8806 	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
8807 	int size = (sizeof(struct flow_counter_stats) *
8808 			MLX5_COUNTERS_PER_POOL +
8809 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
8810 			sizeof(struct mlx5_counter_stats_mem_mng);
8811 	size_t pgsize = rte_mem_page_size();
8812 	uint8_t *mem;
8813 	int ret;
8814 	int i;
8815 
8816 	if (pgsize == (size_t)-1) {
8817 		DRV_LOG(ERR, "Failed to get mem page size");
8818 		rte_errno = ENOMEM;
8819 		return -ENOMEM;
8820 	}
8821 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
8822 	if (!mem) {
8823 		rte_errno = ENOMEM;
8824 		return -ENOMEM;
8825 	}
8826 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
8827 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
8828 	ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
8829 					  sh->cdev->pdn, mem, size,
8830 					  &mem_mng->wm);
8831 	if (ret) {
8832 		rte_errno = errno;
8833 		mlx5_free(mem);
8834 		return -rte_errno;
8835 	}
8836 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
8837 	raw_data = (volatile struct flow_counter_stats *)mem;
8838 	for (i = 0; i < raws_n; ++i) {
8839 		mem_mng->raws[i].mem_mng = mem_mng;
8840 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
8841 	}
8842 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
8843 		LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
8844 				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
8845 				 next);
8846 	LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
8847 	sh->cmng.mem_mng = mem_mng;
8848 	return 0;
8849 }
8850 
8851 /**
8852  * Set the statistic memory to the new counter pool.
8853  *
8854  * @param[in] sh
8855  *   Pointer to mlx5_dev_ctx_shared object.
8856  * @param[in] pool
8857  *   Pointer to the pool to set the statistic memory.
8858  *
8859  * @return
8860  *   0 on success, a negative errno value otherwise.
8861  */
8862 static int
8863 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
8864 			       struct mlx5_flow_counter_pool *pool)
8865 {
8866 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8867 	/* Resize statistic memory once used out. */
8868 	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
8869 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
8870 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
8871 		return -1;
8872 	}
8873 	rte_spinlock_lock(&pool->sl);
8874 	pool->raw = cmng->mem_mng->raws + pool->index %
8875 		    MLX5_CNT_CONTAINER_RESIZE;
8876 	rte_spinlock_unlock(&pool->sl);
8877 	pool->raw_hw = NULL;
8878 	return 0;
8879 }
8880 
8881 #define MLX5_POOL_QUERY_FREQ_US 1000000
8882 
8883 /**
8884  * Set the periodic procedure for triggering asynchronous batch queries for all
8885  * the counter pools.
8886  *
8887  * @param[in] sh
8888  *   Pointer to mlx5_dev_ctx_shared object.
8889  */
8890 void
8891 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
8892 {
8893 	uint32_t pools_n, us;
8894 
8895 	pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
8896 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
8897 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
8898 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
8899 		sh->cmng.query_thread_on = 0;
8900 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
8901 	} else {
8902 		sh->cmng.query_thread_on = 1;
8903 	}
8904 }
8905 
8906 /**
8907  * The periodic procedure for triggering asynchronous batch queries for all the
8908  * counter pools. This function is probably called by the host thread.
8909  *
8910  * @param[in] arg
8911  *   The parameter for the alarm process.
8912  */
8913 void
8914 mlx5_flow_query_alarm(void *arg)
8915 {
8916 	struct mlx5_dev_ctx_shared *sh = arg;
8917 	int ret;
8918 	uint16_t pool_index = sh->cmng.pool_index;
8919 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8920 	struct mlx5_flow_counter_pool *pool;
8921 	uint16_t n_valid;
8922 
8923 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
8924 		goto set_alarm;
8925 	rte_spinlock_lock(&cmng->pool_update_sl);
8926 	pool = cmng->pools[pool_index];
8927 	n_valid = cmng->n_valid;
8928 	rte_spinlock_unlock(&cmng->pool_update_sl);
8929 	/* Set the statistic memory to the new created pool. */
8930 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
8931 		goto set_alarm;
8932 	if (pool->raw_hw)
8933 		/* There is a pool query in progress. */
8934 		goto set_alarm;
8935 	pool->raw_hw =
8936 		LIST_FIRST(&sh->cmng.free_stat_raws);
8937 	if (!pool->raw_hw)
8938 		/* No free counter statistics raw memory. */
8939 		goto set_alarm;
8940 	/*
8941 	 * Identify the counters released between query trigger and query
8942 	 * handle more efficiently. The counter released in this gap period
8943 	 * should wait for a new round of query as the new arrived packets
8944 	 * will not be taken into account.
8945 	 */
8946 	pool->query_gen++;
8947 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
8948 					       MLX5_COUNTERS_PER_POOL,
8949 					       NULL, NULL,
8950 					       pool->raw_hw->mem_mng->wm.lkey,
8951 					       (void *)(uintptr_t)
8952 					       pool->raw_hw->data,
8953 					       sh->devx_comp,
8954 					       (uint64_t)(uintptr_t)pool);
8955 	if (ret) {
8956 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
8957 			" %d", pool->min_dcs->id);
8958 		pool->raw_hw = NULL;
8959 		goto set_alarm;
8960 	}
8961 	LIST_REMOVE(pool->raw_hw, next);
8962 	sh->cmng.pending_queries++;
8963 	pool_index++;
8964 	if (pool_index >= n_valid)
8965 		pool_index = 0;
8966 set_alarm:
8967 	sh->cmng.pool_index = pool_index;
8968 	mlx5_set_query_alarm(sh);
8969 }
8970 
8971 /**
8972  * Check and callback event for new aged flow in the counter pool
8973  *
8974  * @param[in] sh
8975  *   Pointer to mlx5_dev_ctx_shared object.
8976  * @param[in] pool
8977  *   Pointer to Current counter pool.
8978  */
8979 static void
8980 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
8981 		   struct mlx5_flow_counter_pool *pool)
8982 {
8983 	struct mlx5_priv *priv;
8984 	struct mlx5_flow_counter *cnt;
8985 	struct mlx5_age_info *age_info;
8986 	struct mlx5_age_param *age_param;
8987 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
8988 	struct mlx5_counter_stats_raw *prev = pool->raw;
8989 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
8990 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
8991 	uint16_t expected = AGE_CANDIDATE;
8992 	uint32_t i;
8993 
8994 	pool->time_of_last_age_check = curr_time;
8995 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
8996 		cnt = MLX5_POOL_GET_CNT(pool, i);
8997 		age_param = MLX5_CNT_TO_AGE(cnt);
8998 		if (__atomic_load_n(&age_param->state,
8999 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
9000 			continue;
9001 		if (cur->data[i].hits != prev->data[i].hits) {
9002 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
9003 					 __ATOMIC_RELAXED);
9004 			continue;
9005 		}
9006 		if (__atomic_add_fetch(&age_param->sec_since_last_hit,
9007 				       time_delta,
9008 				       __ATOMIC_RELAXED) <= age_param->timeout)
9009 			continue;
9010 		/**
9011 		 * Hold the lock first, or if between the
9012 		 * state AGE_TMOUT and tailq operation the
9013 		 * release happened, the release procedure
9014 		 * may delete a non-existent tailq node.
9015 		 */
9016 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
9017 		age_info = GET_PORT_AGE_INFO(priv);
9018 		rte_spinlock_lock(&age_info->aged_sl);
9019 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
9020 						AGE_TMOUT, false,
9021 						__ATOMIC_RELAXED,
9022 						__ATOMIC_RELAXED)) {
9023 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
9024 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
9025 		}
9026 		rte_spinlock_unlock(&age_info->aged_sl);
9027 	}
9028 	mlx5_age_event_prepare(sh);
9029 }
9030 
9031 /**
9032  * Handler for the HW respond about ready values from an asynchronous batch
9033  * query. This function is probably called by the host thread.
9034  *
9035  * @param[in] sh
9036  *   The pointer to the shared device context.
9037  * @param[in] async_id
9038  *   The Devx async ID.
9039  * @param[in] status
9040  *   The status of the completion.
9041  */
9042 void
9043 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
9044 				  uint64_t async_id, int status)
9045 {
9046 	struct mlx5_flow_counter_pool *pool =
9047 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
9048 	struct mlx5_counter_stats_raw *raw_to_free;
9049 	uint8_t query_gen = pool->query_gen ^ 1;
9050 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
9051 	enum mlx5_counter_type cnt_type =
9052 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
9053 				MLX5_COUNTER_TYPE_ORIGIN;
9054 
9055 	if (unlikely(status)) {
9056 		raw_to_free = pool->raw_hw;
9057 	} else {
9058 		raw_to_free = pool->raw;
9059 		if (pool->is_aged)
9060 			mlx5_flow_aging_check(sh, pool);
9061 		rte_spinlock_lock(&pool->sl);
9062 		pool->raw = pool->raw_hw;
9063 		rte_spinlock_unlock(&pool->sl);
9064 		/* Be sure the new raw counters data is updated in memory. */
9065 		rte_io_wmb();
9066 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
9067 			rte_spinlock_lock(&cmng->csl[cnt_type]);
9068 			TAILQ_CONCAT(&cmng->counters[cnt_type],
9069 				     &pool->counters[query_gen], next);
9070 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
9071 		}
9072 	}
9073 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
9074 	pool->raw_hw = NULL;
9075 	sh->cmng.pending_queries--;
9076 }
9077 
9078 static int
9079 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
9080 		    const struct flow_grp_info *grp_info,
9081 		    struct rte_flow_error *error)
9082 {
9083 	if (grp_info->transfer && grp_info->external &&
9084 	    grp_info->fdb_def_rule) {
9085 		if (group == UINT32_MAX)
9086 			return rte_flow_error_set
9087 						(error, EINVAL,
9088 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9089 						 NULL,
9090 						 "group index not supported");
9091 		*table = group + 1;
9092 	} else {
9093 		*table = group;
9094 	}
9095 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
9096 	return 0;
9097 }
9098 
9099 /**
9100  * Translate the rte_flow group index to HW table value.
9101  *
9102  * If tunnel offload is disabled, all group ids converted to flow table
9103  * id using the standard method.
9104  * If tunnel offload is enabled, group id can be converted using the
9105  * standard or tunnel conversion method. Group conversion method
9106  * selection depends on flags in `grp_info` parameter:
9107  * - Internal (grp_info.external == 0) groups conversion uses the
9108  *   standard method.
9109  * - Group ids in JUMP action converted with the tunnel conversion.
9110  * - Group id in rule attribute conversion depends on a rule type and
9111  *   group id value:
9112  *   ** non zero group attributes converted with the tunnel method
9113  *   ** zero group attribute in non-tunnel rule is converted using the
9114  *      standard method - there's only one root table
9115  *   ** zero group attribute in steer tunnel rule is converted with the
9116  *      standard method - single root table
9117  *   ** zero group attribute in match tunnel rule is a special OvS
9118  *      case: that value is used for portability reasons. That group
9119  *      id is converted with the tunnel conversion method.
9120  *
9121  * @param[in] dev
9122  *   Port device
9123  * @param[in] tunnel
9124  *   PMD tunnel offload object
9125  * @param[in] group
9126  *   rte_flow group index value.
9127  * @param[out] table
9128  *   HW table value.
9129  * @param[in] grp_info
9130  *   flags used for conversion
9131  * @param[out] error
9132  *   Pointer to error structure.
9133  *
9134  * @return
9135  *   0 on success, a negative errno value otherwise and rte_errno is set.
9136  */
9137 int
9138 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
9139 			 const struct mlx5_flow_tunnel *tunnel,
9140 			 uint32_t group, uint32_t *table,
9141 			 const struct flow_grp_info *grp_info,
9142 			 struct rte_flow_error *error)
9143 {
9144 	int ret;
9145 	bool standard_translation;
9146 
9147 	if (!grp_info->skip_scale && grp_info->external &&
9148 	    group < MLX5_MAX_TABLES_EXTERNAL)
9149 		group *= MLX5_FLOW_TABLE_FACTOR;
9150 	if (is_tunnel_offload_active(dev)) {
9151 		standard_translation = !grp_info->external ||
9152 					grp_info->std_tbl_fix;
9153 	} else {
9154 		standard_translation = true;
9155 	}
9156 	DRV_LOG(DEBUG,
9157 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
9158 		dev->data->port_id, group, grp_info->transfer,
9159 		grp_info->external, grp_info->fdb_def_rule,
9160 		standard_translation ? "STANDARD" : "TUNNEL");
9161 	if (standard_translation)
9162 		ret = flow_group_to_table(dev->data->port_id, group, table,
9163 					  grp_info, error);
9164 	else
9165 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
9166 						      table, error);
9167 
9168 	return ret;
9169 }
9170 
9171 /**
9172  * Discover availability of metadata reg_c's.
9173  *
9174  * Iteratively use test flows to check availability.
9175  *
9176  * @param[in] dev
9177  *   Pointer to the Ethernet device structure.
9178  *
9179  * @return
9180  *   0 on success, a negative errno value otherwise and rte_errno is set.
9181  */
9182 int
9183 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
9184 {
9185 	struct mlx5_priv *priv = dev->data->dev_private;
9186 	enum modify_reg idx;
9187 	int n = 0;
9188 
9189 	/* reg_c[0] and reg_c[1] are reserved. */
9190 	priv->sh->flow_mreg_c[n++] = REG_C_0;
9191 	priv->sh->flow_mreg_c[n++] = REG_C_1;
9192 	/* Discover availability of other reg_c's. */
9193 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
9194 		struct rte_flow_attr attr = {
9195 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
9196 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
9197 			.ingress = 1,
9198 		};
9199 		struct rte_flow_item items[] = {
9200 			[0] = {
9201 				.type = RTE_FLOW_ITEM_TYPE_END,
9202 			},
9203 		};
9204 		struct rte_flow_action actions[] = {
9205 			[0] = {
9206 				.type = (enum rte_flow_action_type)
9207 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
9208 				.conf = &(struct mlx5_flow_action_copy_mreg){
9209 					.src = REG_C_1,
9210 					.dst = idx,
9211 				},
9212 			},
9213 			[1] = {
9214 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
9215 				.conf = &(struct rte_flow_action_jump){
9216 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
9217 				},
9218 			},
9219 			[2] = {
9220 				.type = RTE_FLOW_ACTION_TYPE_END,
9221 			},
9222 		};
9223 		uint32_t flow_idx;
9224 		struct rte_flow *flow;
9225 		struct rte_flow_error error;
9226 
9227 		if (!priv->sh->config.dv_flow_en)
9228 			break;
9229 		/* Create internal flow, validation skips copy action. */
9230 		flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
9231 					items, actions, false, &error);
9232 		flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9233 				      flow_idx);
9234 		if (!flow)
9235 			continue;
9236 		priv->sh->flow_mreg_c[n++] = idx;
9237 		flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
9238 	}
9239 	for (; n < MLX5_MREG_C_NUM; ++n)
9240 		priv->sh->flow_mreg_c[n] = REG_NON;
9241 	priv->sh->metadata_regc_check_flag = 1;
9242 	return 0;
9243 }
9244 
9245 int
9246 save_dump_file(const uint8_t *data, uint32_t size,
9247 	uint32_t type, uint64_t id, void *arg, FILE *file)
9248 {
9249 	char line[BUF_SIZE];
9250 	uint32_t out = 0;
9251 	uint32_t k;
9252 	uint32_t actions_num;
9253 	struct rte_flow_query_count *count;
9254 
9255 	memset(line, 0, BUF_SIZE);
9256 	switch (type) {
9257 	case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
9258 		actions_num = *(uint32_t *)(arg);
9259 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
9260 				type, id, actions_num);
9261 		break;
9262 	case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
9263 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
9264 				type, id);
9265 		break;
9266 	case DR_DUMP_REC_TYPE_PMD_COUNTER:
9267 		count = (struct rte_flow_query_count *)arg;
9268 		fprintf(file,
9269 			"%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
9270 			type, id, count->hits, count->bytes);
9271 		return 0;
9272 	default:
9273 		return -1;
9274 	}
9275 
9276 	for (k = 0; k < size; k++) {
9277 		/* Make sure we do not overrun the line buffer length. */
9278 		if (out >= BUF_SIZE - 4) {
9279 			line[out] = '\0';
9280 			break;
9281 		}
9282 		out += snprintf(line + out, BUF_SIZE - out, "%02x",
9283 				(data[k]) & 0xff);
9284 	}
9285 	fprintf(file, "%s\n", line);
9286 	return 0;
9287 }
9288 
9289 int
9290 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
9291 	struct rte_flow_query_count *count, struct rte_flow_error *error)
9292 {
9293 	struct rte_flow_action action[2];
9294 	enum mlx5_flow_drv_type ftype;
9295 	const struct mlx5_flow_driver_ops *fops;
9296 
9297 	if (!flow) {
9298 		return rte_flow_error_set(error, ENOENT,
9299 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9300 				NULL,
9301 				"invalid flow handle");
9302 	}
9303 	action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
9304 	action[1].type = RTE_FLOW_ACTION_TYPE_END;
9305 	if (flow->counter) {
9306 		memset(count, 0, sizeof(struct rte_flow_query_count));
9307 		ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
9308 		MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
9309 						ftype < MLX5_FLOW_TYPE_MAX);
9310 		fops = flow_get_drv_ops(ftype);
9311 		return fops->query(dev, flow, action, count, error);
9312 	}
9313 	return -1;
9314 }
9315 
9316 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9317 /**
9318  * Dump flow ipool data to file
9319  *
9320  * @param[in] dev
9321  *   The pointer to Ethernet device.
9322  * @param[in] file
9323  *   A pointer to a file for output.
9324  * @param[out] error
9325  *   Perform verbose error reporting if not NULL. PMDs initialize this
9326  *   structure in case of error only.
9327  * @return
9328  *   0 on success, a negative value otherwise.
9329  */
9330 int
9331 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
9332 	struct rte_flow *flow, FILE *file,
9333 	struct rte_flow_error *error)
9334 {
9335 	struct mlx5_priv *priv = dev->data->dev_private;
9336 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9337 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9338 	uint32_t handle_idx;
9339 	struct mlx5_flow_handle *dh;
9340 	struct rte_flow_query_count count;
9341 	uint32_t actions_num;
9342 	const uint8_t *data;
9343 	size_t size;
9344 	uint64_t id;
9345 	uint32_t type;
9346 	void *action = NULL;
9347 
9348 	if (!flow) {
9349 		return rte_flow_error_set(error, ENOENT,
9350 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9351 				NULL,
9352 				"invalid flow handle");
9353 	}
9354 	handle_idx = flow->dev_handles;
9355 	/* query counter */
9356 	if (flow->counter &&
9357 	(!mlx5_counter_query(dev, flow->counter, false,
9358 	&count.hits, &count.bytes, &action)) && action) {
9359 		id = (uint64_t)(uintptr_t)action;
9360 		type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9361 		save_dump_file(NULL, 0, type,
9362 			id, (void *)&count, file);
9363 	}
9364 
9365 	while (handle_idx) {
9366 		dh = mlx5_ipool_get(priv->sh->ipool
9367 				[MLX5_IPOOL_MLX5_FLOW], handle_idx);
9368 		if (!dh)
9369 			continue;
9370 		handle_idx = dh->next.next;
9371 
9372 		/* Get modify_hdr and encap_decap buf from ipools. */
9373 		encap_decap = NULL;
9374 		modify_hdr = dh->dvh.modify_hdr;
9375 
9376 		if (dh->dvh.rix_encap_decap) {
9377 			encap_decap = mlx5_ipool_get(priv->sh->ipool
9378 						[MLX5_IPOOL_DECAP_ENCAP],
9379 						dh->dvh.rix_encap_decap);
9380 		}
9381 		if (modify_hdr) {
9382 			data = (const uint8_t *)modify_hdr->actions;
9383 			size = (size_t)(modify_hdr->actions_num) * 8;
9384 			id = (uint64_t)(uintptr_t)modify_hdr->action;
9385 			actions_num = modify_hdr->actions_num;
9386 			type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9387 			save_dump_file(data, size, type, id,
9388 						(void *)(&actions_num), file);
9389 		}
9390 		if (encap_decap) {
9391 			data = encap_decap->buf;
9392 			size = encap_decap->size;
9393 			id = (uint64_t)(uintptr_t)encap_decap->action;
9394 			type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9395 			save_dump_file(data, size, type,
9396 						id, NULL, file);
9397 		}
9398 	}
9399 	return 0;
9400 }
9401 
9402 /**
9403  * Dump all flow's encap_decap/modify_hdr/counter data to file
9404  *
9405  * @param[in] dev
9406  *   The pointer to Ethernet device.
9407  * @param[in] file
9408  *   A pointer to a file for output.
9409  * @param[out] error
9410  *   Perform verbose error reporting if not NULL. PMDs initialize this
9411  *   structure in case of error only.
9412  * @return
9413  *   0 on success, a negative value otherwise.
9414  */
9415 static int
9416 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
9417 	FILE *file, struct rte_flow_error *error __rte_unused)
9418 {
9419 	struct mlx5_priv *priv = dev->data->dev_private;
9420 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9421 	struct mlx5_hlist *h;
9422 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9423 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9424 	struct rte_flow_query_count count;
9425 	uint32_t actions_num;
9426 	const uint8_t *data;
9427 	size_t size;
9428 	uint64_t id;
9429 	uint32_t type;
9430 	uint32_t i;
9431 	uint32_t j;
9432 	struct mlx5_list_inconst *l_inconst;
9433 	struct mlx5_list_entry *e;
9434 	int lcore_index;
9435 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
9436 	uint32_t max;
9437 	void *action;
9438 
9439 	/* encap_decap hlist is lcore_share, get global core cache. */
9440 	i = MLX5_LIST_GLOBAL;
9441 	h = sh->encaps_decaps;
9442 	if (h) {
9443 		for (j = 0; j <= h->mask; j++) {
9444 			l_inconst = &h->buckets[j].l;
9445 			if (!l_inconst || !l_inconst->cache[i])
9446 				continue;
9447 
9448 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9449 			while (e) {
9450 				encap_decap =
9451 				(struct mlx5_flow_dv_encap_decap_resource *)e;
9452 				data = encap_decap->buf;
9453 				size = encap_decap->size;
9454 				id = (uint64_t)(uintptr_t)encap_decap->action;
9455 				type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9456 				save_dump_file(data, size, type,
9457 					id, NULL, file);
9458 				e = LIST_NEXT(e, next);
9459 			}
9460 		}
9461 	}
9462 
9463 	/* get modify_hdr */
9464 	h = sh->modify_cmds;
9465 	if (h) {
9466 		lcore_index = rte_lcore_index(rte_lcore_id());
9467 		if (unlikely(lcore_index == -1)) {
9468 			lcore_index = MLX5_LIST_NLCORE;
9469 			rte_spinlock_lock(&h->l_const.lcore_lock);
9470 		}
9471 		i = lcore_index;
9472 
9473 		for (j = 0; j <= h->mask; j++) {
9474 			l_inconst = &h->buckets[j].l;
9475 			if (!l_inconst || !l_inconst->cache[i])
9476 				continue;
9477 
9478 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9479 			while (e) {
9480 				modify_hdr =
9481 				(struct mlx5_flow_dv_modify_hdr_resource *)e;
9482 				data = (const uint8_t *)modify_hdr->actions;
9483 				size = (size_t)(modify_hdr->actions_num) * 8;
9484 				actions_num = modify_hdr->actions_num;
9485 				id = (uint64_t)(uintptr_t)modify_hdr->action;
9486 				type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9487 				save_dump_file(data, size, type, id,
9488 						(void *)(&actions_num), file);
9489 				e = LIST_NEXT(e, next);
9490 			}
9491 		}
9492 
9493 		if (unlikely(lcore_index == MLX5_LIST_NLCORE))
9494 			rte_spinlock_unlock(&h->l_const.lcore_lock);
9495 	}
9496 
9497 	/* get counter */
9498 	MLX5_ASSERT(cmng->n_valid <= cmng->n);
9499 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
9500 	for (j = 1; j <= max; j++) {
9501 		action = NULL;
9502 		if ((!mlx5_counter_query(dev, j, false, &count.hits,
9503 		&count.bytes, &action)) && action) {
9504 			id = (uint64_t)(uintptr_t)action;
9505 			type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9506 			save_dump_file(NULL, 0, type,
9507 					id, (void *)&count, file);
9508 		}
9509 	}
9510 	return 0;
9511 }
9512 #endif
9513 
9514 /**
9515  * Dump flow raw hw data to file
9516  *
9517  * @param[in] dev
9518  *    The pointer to Ethernet device.
9519  * @param[in] file
9520  *   A pointer to a file for output.
9521  * @param[out] error
9522  *   Perform verbose error reporting if not NULL. PMDs initialize this
9523  *   structure in case of error only.
9524  * @return
9525  *   0 on success, a negative value otherwise.
9526  */
9527 int
9528 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
9529 		   FILE *file,
9530 		   struct rte_flow_error *error __rte_unused)
9531 {
9532 	struct mlx5_priv *priv = dev->data->dev_private;
9533 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9534 	uint32_t handle_idx;
9535 	int ret;
9536 	struct mlx5_flow_handle *dh;
9537 	struct rte_flow *flow;
9538 
9539 	if (!sh->config.dv_flow_en) {
9540 		if (fputs("device dv flow disabled\n", file) <= 0)
9541 			return -errno;
9542 		return -ENOTSUP;
9543 	}
9544 
9545 	/* dump all */
9546 	if (!flow_idx) {
9547 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9548 		if (mlx5_flow_dev_dump_sh_all(dev, file, error))
9549 			return -EINVAL;
9550 #endif
9551 		return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
9552 					sh->rx_domain,
9553 					sh->tx_domain, file);
9554 	}
9555 	/* dump one */
9556 	flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9557 			(uintptr_t)(void *)flow_idx);
9558 	if (!flow)
9559 		return -EINVAL;
9560 
9561 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9562 	mlx5_flow_dev_dump_ipool(dev, flow, file, error);
9563 #endif
9564 	handle_idx = flow->dev_handles;
9565 	while (handle_idx) {
9566 		dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
9567 				handle_idx);
9568 		if (!dh)
9569 			return -ENOENT;
9570 		if (dh->drv_flow) {
9571 			ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
9572 					file);
9573 			if (ret)
9574 				return -ENOENT;
9575 		}
9576 		handle_idx = dh->next.next;
9577 	}
9578 	return 0;
9579 }
9580 
9581 /**
9582  * Get aged-out flows.
9583  *
9584  * @param[in] dev
9585  *   Pointer to the Ethernet device structure.
9586  * @param[in] context
9587  *   The address of an array of pointers to the aged-out flows contexts.
9588  * @param[in] nb_countexts
9589  *   The length of context array pointers.
9590  * @param[out] error
9591  *   Perform verbose error reporting if not NULL. Initialized in case of
9592  *   error only.
9593  *
9594  * @return
9595  *   how many contexts get in success, otherwise negative errno value.
9596  *   if nb_contexts is 0, return the amount of all aged contexts.
9597  *   if nb_contexts is not 0 , return the amount of aged flows reported
9598  *   in the context array.
9599  */
9600 int
9601 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
9602 			uint32_t nb_contexts, struct rte_flow_error *error)
9603 {
9604 	const struct mlx5_flow_driver_ops *fops;
9605 	struct rte_flow_attr attr = { .transfer = 0 };
9606 
9607 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
9608 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
9609 		return fops->get_aged_flows(dev, contexts, nb_contexts,
9610 						    error);
9611 	}
9612 	DRV_LOG(ERR,
9613 		"port %u get aged flows is not supported.",
9614 		 dev->data->port_id);
9615 	return -ENOTSUP;
9616 }
9617 
9618 /* Wrapper for driver action_validate op callback */
9619 static int
9620 flow_drv_action_validate(struct rte_eth_dev *dev,
9621 			 const struct rte_flow_indir_action_conf *conf,
9622 			 const struct rte_flow_action *action,
9623 			 const struct mlx5_flow_driver_ops *fops,
9624 			 struct rte_flow_error *error)
9625 {
9626 	static const char err_msg[] = "indirect action validation unsupported";
9627 
9628 	if (!fops->action_validate) {
9629 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9630 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9631 				   NULL, err_msg);
9632 		return -rte_errno;
9633 	}
9634 	return fops->action_validate(dev, conf, action, error);
9635 }
9636 
9637 /**
9638  * Destroys the shared action by handle.
9639  *
9640  * @param dev
9641  *   Pointer to Ethernet device structure.
9642  * @param[in] handle
9643  *   Handle for the indirect action object to be destroyed.
9644  * @param[out] error
9645  *   Perform verbose error reporting if not NULL. PMDs initialize this
9646  *   structure in case of error only.
9647  *
9648  * @return
9649  *   0 on success, a negative errno value otherwise and rte_errno is set.
9650  *
9651  * @note: wrapper for driver action_create op callback.
9652  */
9653 static int
9654 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
9655 			   struct rte_flow_action_handle *handle,
9656 			   struct rte_flow_error *error)
9657 {
9658 	static const char err_msg[] = "indirect action destruction unsupported";
9659 	struct rte_flow_attr attr = { .transfer = 0 };
9660 	const struct mlx5_flow_driver_ops *fops =
9661 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9662 
9663 	if (!fops->action_destroy) {
9664 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9665 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9666 				   NULL, err_msg);
9667 		return -rte_errno;
9668 	}
9669 	return fops->action_destroy(dev, handle, error);
9670 }
9671 
9672 /* Wrapper for driver action_destroy op callback */
9673 static int
9674 flow_drv_action_update(struct rte_eth_dev *dev,
9675 		       struct rte_flow_action_handle *handle,
9676 		       const void *update,
9677 		       const struct mlx5_flow_driver_ops *fops,
9678 		       struct rte_flow_error *error)
9679 {
9680 	static const char err_msg[] = "indirect action update unsupported";
9681 
9682 	if (!fops->action_update) {
9683 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9684 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9685 				   NULL, err_msg);
9686 		return -rte_errno;
9687 	}
9688 	return fops->action_update(dev, handle, update, error);
9689 }
9690 
9691 /* Wrapper for driver action_destroy op callback */
9692 static int
9693 flow_drv_action_query(struct rte_eth_dev *dev,
9694 		      const struct rte_flow_action_handle *handle,
9695 		      void *data,
9696 		      const struct mlx5_flow_driver_ops *fops,
9697 		      struct rte_flow_error *error)
9698 {
9699 	static const char err_msg[] = "indirect action query unsupported";
9700 
9701 	if (!fops->action_query) {
9702 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9703 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9704 				   NULL, err_msg);
9705 		return -rte_errno;
9706 	}
9707 	return fops->action_query(dev, handle, data, error);
9708 }
9709 
9710 /**
9711  * Create indirect action for reuse in multiple flow rules.
9712  *
9713  * @param dev
9714  *   Pointer to Ethernet device structure.
9715  * @param conf
9716  *   Pointer to indirect action object configuration.
9717  * @param[in] action
9718  *   Action configuration for indirect action object creation.
9719  * @param[out] error
9720  *   Perform verbose error reporting if not NULL. PMDs initialize this
9721  *   structure in case of error only.
9722  * @return
9723  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
9724  */
9725 static struct rte_flow_action_handle *
9726 mlx5_action_handle_create(struct rte_eth_dev *dev,
9727 			  const struct rte_flow_indir_action_conf *conf,
9728 			  const struct rte_flow_action *action,
9729 			  struct rte_flow_error *error)
9730 {
9731 	static const char err_msg[] = "indirect action creation unsupported";
9732 	struct rte_flow_attr attr = { .transfer = 0 };
9733 	const struct mlx5_flow_driver_ops *fops =
9734 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9735 
9736 	if (flow_drv_action_validate(dev, conf, action, fops, error))
9737 		return NULL;
9738 	if (!fops->action_create) {
9739 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9740 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9741 				   NULL, err_msg);
9742 		return NULL;
9743 	}
9744 	return fops->action_create(dev, conf, action, error);
9745 }
9746 
9747 /**
9748  * Updates inplace the indirect action configuration pointed by *handle*
9749  * with the configuration provided as *update* argument.
9750  * The update of the indirect action configuration effects all flow rules
9751  * reusing the action via handle.
9752  *
9753  * @param dev
9754  *   Pointer to Ethernet device structure.
9755  * @param[in] handle
9756  *   Handle for the indirect action to be updated.
9757  * @param[in] update
9758  *   Action specification used to modify the action pointed by handle.
9759  *   *update* could be of same type with the action pointed by the *handle*
9760  *   handle argument, or some other structures like a wrapper, depending on
9761  *   the indirect action type.
9762  * @param[out] error
9763  *   Perform verbose error reporting if not NULL. PMDs initialize this
9764  *   structure in case of error only.
9765  *
9766  * @return
9767  *   0 on success, a negative errno value otherwise and rte_errno is set.
9768  */
9769 static int
9770 mlx5_action_handle_update(struct rte_eth_dev *dev,
9771 		struct rte_flow_action_handle *handle,
9772 		const void *update,
9773 		struct rte_flow_error *error)
9774 {
9775 	struct rte_flow_attr attr = { .transfer = 0 };
9776 	const struct mlx5_flow_driver_ops *fops =
9777 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9778 	int ret;
9779 
9780 	ret = flow_drv_action_validate(dev, NULL,
9781 			(const struct rte_flow_action *)update, fops, error);
9782 	if (ret)
9783 		return ret;
9784 	return flow_drv_action_update(dev, handle, update, fops,
9785 				      error);
9786 }
9787 
9788 /**
9789  * Query the indirect action by handle.
9790  *
9791  * This function allows retrieving action-specific data such as counters.
9792  * Data is gathered by special action which may be present/referenced in
9793  * more than one flow rule definition.
9794  *
9795  * see @RTE_FLOW_ACTION_TYPE_COUNT
9796  *
9797  * @param dev
9798  *   Pointer to Ethernet device structure.
9799  * @param[in] handle
9800  *   Handle for the indirect action to query.
9801  * @param[in, out] data
9802  *   Pointer to storage for the associated query data type.
9803  * @param[out] error
9804  *   Perform verbose error reporting if not NULL. PMDs initialize this
9805  *   structure in case of error only.
9806  *
9807  * @return
9808  *   0 on success, a negative errno value otherwise and rte_errno is set.
9809  */
9810 static int
9811 mlx5_action_handle_query(struct rte_eth_dev *dev,
9812 			 const struct rte_flow_action_handle *handle,
9813 			 void *data,
9814 			 struct rte_flow_error *error)
9815 {
9816 	struct rte_flow_attr attr = { .transfer = 0 };
9817 	const struct mlx5_flow_driver_ops *fops =
9818 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9819 
9820 	return flow_drv_action_query(dev, handle, data, fops, error);
9821 }
9822 
9823 /**
9824  * Destroy all indirect actions (shared RSS).
9825  *
9826  * @param dev
9827  *   Pointer to Ethernet device.
9828  *
9829  * @return
9830  *   0 on success, a negative errno value otherwise and rte_errno is set.
9831  */
9832 int
9833 mlx5_action_handle_flush(struct rte_eth_dev *dev)
9834 {
9835 	struct rte_flow_error error;
9836 	struct mlx5_priv *priv = dev->data->dev_private;
9837 	struct mlx5_shared_action_rss *shared_rss;
9838 	int ret = 0;
9839 	uint32_t idx;
9840 
9841 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
9842 		      priv->rss_shared_actions, idx, shared_rss, next) {
9843 		ret |= mlx5_action_handle_destroy(dev,
9844 		       (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
9845 	}
9846 	return ret;
9847 }
9848 
9849 /**
9850  * Validate existing indirect actions against current device configuration
9851  * and attach them to device resources.
9852  *
9853  * @param dev
9854  *   Pointer to Ethernet device.
9855  *
9856  * @return
9857  *   0 on success, a negative errno value otherwise and rte_errno is set.
9858  */
9859 int
9860 mlx5_action_handle_attach(struct rte_eth_dev *dev)
9861 {
9862 	struct mlx5_priv *priv = dev->data->dev_private;
9863 	int ret = 0;
9864 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9865 
9866 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9867 		const char *message;
9868 		uint32_t queue_idx;
9869 
9870 		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
9871 					       ind_tbl->queues_n,
9872 					       &message, &queue_idx);
9873 		if (ret != 0) {
9874 			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
9875 				dev->data->port_id, ind_tbl->queues[queue_idx],
9876 				message);
9877 			break;
9878 		}
9879 	}
9880 	if (ret != 0)
9881 		return ret;
9882 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9883 		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
9884 		if (ret != 0) {
9885 			DRV_LOG(ERR, "Port %u could not attach "
9886 				"indirection table obj %p",
9887 				dev->data->port_id, (void *)ind_tbl);
9888 			goto error;
9889 		}
9890 	}
9891 
9892 	return 0;
9893 error:
9894 	ind_tbl_last = ind_tbl;
9895 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9896 		if (ind_tbl == ind_tbl_last)
9897 			break;
9898 		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
9899 			DRV_LOG(CRIT, "Port %u could not detach "
9900 				"indirection table obj %p on rollback",
9901 				dev->data->port_id, (void *)ind_tbl);
9902 	}
9903 	return ret;
9904 }
9905 
9906 /**
9907  * Detach indirect actions of the device from its resources.
9908  *
9909  * @param dev
9910  *   Pointer to Ethernet device.
9911  *
9912  * @return
9913  *   0 on success, a negative errno value otherwise and rte_errno is set.
9914  */
9915 int
9916 mlx5_action_handle_detach(struct rte_eth_dev *dev)
9917 {
9918 	struct mlx5_priv *priv = dev->data->dev_private;
9919 	int ret = 0;
9920 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9921 
9922 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9923 		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
9924 		if (ret != 0) {
9925 			DRV_LOG(ERR, "Port %u could not detach "
9926 				"indirection table obj %p",
9927 				dev->data->port_id, (void *)ind_tbl);
9928 			goto error;
9929 		}
9930 	}
9931 	return 0;
9932 error:
9933 	ind_tbl_last = ind_tbl;
9934 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9935 		if (ind_tbl == ind_tbl_last)
9936 			break;
9937 		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
9938 			DRV_LOG(CRIT, "Port %u could not attach "
9939 				"indirection table obj %p on rollback",
9940 				dev->data->port_id, (void *)ind_tbl);
9941 	}
9942 	return ret;
9943 }
9944 
9945 #ifndef HAVE_MLX5DV_DR
9946 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
9947 #else
9948 #define MLX5_DOMAIN_SYNC_FLOW \
9949 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
9950 #endif
9951 
9952 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
9953 {
9954 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
9955 	const struct mlx5_flow_driver_ops *fops;
9956 	int ret;
9957 	struct rte_flow_attr attr = { .transfer = 0 };
9958 
9959 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9960 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
9961 	if (ret > 0)
9962 		ret = -ret;
9963 	return ret;
9964 }
9965 
9966 const struct mlx5_flow_tunnel *
9967 mlx5_get_tof(const struct rte_flow_item *item,
9968 	     const struct rte_flow_action *action,
9969 	     enum mlx5_tof_rule_type *rule_type)
9970 {
9971 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9972 		if (item->type == (typeof(item->type))
9973 				  MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
9974 			*rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
9975 			return flow_items_to_tunnel(item);
9976 		}
9977 	}
9978 	for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
9979 		if (action->type == (typeof(action->type))
9980 				    MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
9981 			*rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
9982 			return flow_actions_to_tunnel(action);
9983 		}
9984 	}
9985 	return NULL;
9986 }
9987 
9988 /**
9989  * tunnel offload functionality is defined for DV environment only
9990  */
9991 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9992 __extension__
9993 union tunnel_offload_mark {
9994 	uint32_t val;
9995 	struct {
9996 		uint32_t app_reserve:8;
9997 		uint32_t table_id:15;
9998 		uint32_t transfer:1;
9999 		uint32_t _unused_:8;
10000 	};
10001 };
10002 
10003 static bool
10004 mlx5_access_tunnel_offload_db
10005 	(struct rte_eth_dev *dev,
10006 	 bool (*match)(struct rte_eth_dev *,
10007 		       struct mlx5_flow_tunnel *, const void *),
10008 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10009 	 void (*miss)(struct rte_eth_dev *, void *),
10010 	 void *ctx, bool lock_op);
10011 
10012 static int
10013 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
10014 			     struct rte_flow *flow,
10015 			     const struct rte_flow_attr *attr,
10016 			     const struct rte_flow_action *app_actions,
10017 			     uint32_t flow_idx,
10018 			     const struct mlx5_flow_tunnel *tunnel,
10019 			     struct tunnel_default_miss_ctx *ctx,
10020 			     struct rte_flow_error *error)
10021 {
10022 	struct mlx5_priv *priv = dev->data->dev_private;
10023 	struct mlx5_flow *dev_flow;
10024 	struct rte_flow_attr miss_attr = *attr;
10025 	const struct rte_flow_item miss_items[2] = {
10026 		{
10027 			.type = RTE_FLOW_ITEM_TYPE_ETH,
10028 			.spec = NULL,
10029 			.last = NULL,
10030 			.mask = NULL
10031 		},
10032 		{
10033 			.type = RTE_FLOW_ITEM_TYPE_END,
10034 			.spec = NULL,
10035 			.last = NULL,
10036 			.mask = NULL
10037 		}
10038 	};
10039 	union tunnel_offload_mark mark_id;
10040 	struct rte_flow_action_mark miss_mark;
10041 	struct rte_flow_action miss_actions[3] = {
10042 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
10043 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
10044 	};
10045 	const struct rte_flow_action_jump *jump_data;
10046 	uint32_t i, flow_table = 0; /* prevent compilation warning */
10047 	struct flow_grp_info grp_info = {
10048 		.external = 1,
10049 		.transfer = attr->transfer,
10050 		.fdb_def_rule = !!priv->fdb_def_rule,
10051 		.std_tbl_fix = 0,
10052 	};
10053 	int ret;
10054 
10055 	if (!attr->transfer) {
10056 		uint32_t q_size;
10057 
10058 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
10059 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
10060 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
10061 					 0, SOCKET_ID_ANY);
10062 		if (!ctx->queue)
10063 			return rte_flow_error_set
10064 				(error, ENOMEM,
10065 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10066 				NULL, "invalid default miss RSS");
10067 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
10068 		ctx->action_rss.level = 0,
10069 		ctx->action_rss.types = priv->rss_conf.rss_hf,
10070 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
10071 		ctx->action_rss.queue_num = priv->reta_idx_n,
10072 		ctx->action_rss.key = priv->rss_conf.rss_key,
10073 		ctx->action_rss.queue = ctx->queue;
10074 		if (!priv->reta_idx_n || !priv->rxqs_n)
10075 			return rte_flow_error_set
10076 				(error, EINVAL,
10077 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10078 				NULL, "invalid port configuration");
10079 		if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
10080 			ctx->action_rss.types = 0;
10081 		for (i = 0; i != priv->reta_idx_n; ++i)
10082 			ctx->queue[i] = (*priv->reta_idx)[i];
10083 	} else {
10084 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
10085 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
10086 	}
10087 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
10088 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
10089 	jump_data = app_actions->conf;
10090 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
10091 	miss_attr.group = jump_data->group;
10092 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
10093 				       &flow_table, &grp_info, error);
10094 	if (ret)
10095 		return rte_flow_error_set(error, EINVAL,
10096 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10097 					  NULL, "invalid tunnel id");
10098 	mark_id.app_reserve = 0;
10099 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
10100 	mark_id.transfer = !!attr->transfer;
10101 	mark_id._unused_ = 0;
10102 	miss_mark.id = mark_id.val;
10103 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
10104 				    miss_items, miss_actions, flow_idx, error);
10105 	if (!dev_flow)
10106 		return -rte_errno;
10107 	dev_flow->flow = flow;
10108 	dev_flow->external = true;
10109 	dev_flow->tunnel = tunnel;
10110 	dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
10111 	/* Subflow object was created, we must include one in the list. */
10112 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
10113 		      dev_flow->handle, next);
10114 	DRV_LOG(DEBUG,
10115 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
10116 		dev->data->port_id, tunnel->app_tunnel.type,
10117 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
10118 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
10119 				  miss_actions, error);
10120 	if (!ret)
10121 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
10122 						  error);
10123 
10124 	return ret;
10125 }
10126 
10127 static const struct mlx5_flow_tbl_data_entry  *
10128 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
10129 {
10130 	struct mlx5_priv *priv = dev->data->dev_private;
10131 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10132 	struct mlx5_list_entry *he;
10133 	union tunnel_offload_mark mbits = { .val = mark };
10134 	union mlx5_flow_tbl_key table_key = {
10135 		{
10136 			.level = tunnel_id_to_flow_tbl(mbits.table_id),
10137 			.id = 0,
10138 			.reserved = 0,
10139 			.dummy = 0,
10140 			.is_fdb = !!mbits.transfer,
10141 			.is_egress = 0,
10142 		}
10143 	};
10144 	struct mlx5_flow_cb_ctx ctx = {
10145 		.data = &table_key.v64,
10146 	};
10147 
10148 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
10149 	return he ?
10150 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
10151 }
10152 
10153 static void
10154 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
10155 				   struct mlx5_list_entry *entry)
10156 {
10157 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10158 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10159 
10160 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10161 			tunnel_flow_tbl_to_id(tte->flow_table));
10162 	mlx5_free(tte);
10163 }
10164 
10165 static int
10166 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
10167 				  struct mlx5_list_entry *entry, void *cb_ctx)
10168 {
10169 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10170 	union tunnel_tbl_key tbl = {
10171 		.val = *(uint64_t *)(ctx->data),
10172 	};
10173 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10174 
10175 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
10176 }
10177 
10178 static struct mlx5_list_entry *
10179 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
10180 {
10181 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10182 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10183 	struct tunnel_tbl_entry *tte;
10184 	union tunnel_tbl_key tbl = {
10185 		.val = *(uint64_t *)(ctx->data),
10186 	};
10187 
10188 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
10189 			  sizeof(*tte), 0,
10190 			  SOCKET_ID_ANY);
10191 	if (!tte)
10192 		goto err;
10193 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10194 			  &tte->flow_table);
10195 	if (tte->flow_table >= MLX5_MAX_TABLES) {
10196 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
10197 			tte->flow_table);
10198 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10199 				tte->flow_table);
10200 		goto err;
10201 	} else if (!tte->flow_table) {
10202 		goto err;
10203 	}
10204 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
10205 	tte->tunnel_id = tbl.tunnel_id;
10206 	tte->group = tbl.group;
10207 	return &tte->hash;
10208 err:
10209 	if (tte)
10210 		mlx5_free(tte);
10211 	return NULL;
10212 }
10213 
10214 static struct mlx5_list_entry *
10215 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
10216 				  struct mlx5_list_entry *oentry,
10217 				  void *cb_ctx __rte_unused)
10218 {
10219 	struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
10220 						   0, SOCKET_ID_ANY);
10221 
10222 	if (!tte)
10223 		return NULL;
10224 	memcpy(tte, oentry, sizeof(*tte));
10225 	return &tte->hash;
10226 }
10227 
10228 static void
10229 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
10230 				       struct mlx5_list_entry *entry)
10231 {
10232 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10233 
10234 	mlx5_free(tte);
10235 }
10236 
10237 static uint32_t
10238 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
10239 				const struct mlx5_flow_tunnel *tunnel,
10240 				uint32_t group, uint32_t *table,
10241 				struct rte_flow_error *error)
10242 {
10243 	struct mlx5_list_entry *he;
10244 	struct tunnel_tbl_entry *tte;
10245 	union tunnel_tbl_key key = {
10246 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
10247 		.group = group
10248 	};
10249 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10250 	struct mlx5_hlist *group_hash;
10251 	struct mlx5_flow_cb_ctx ctx = {
10252 		.data = &key.val,
10253 	};
10254 
10255 	group_hash = tunnel ? tunnel->groups : thub->groups;
10256 	he = mlx5_hlist_register(group_hash, key.val, &ctx);
10257 	if (!he)
10258 		return rte_flow_error_set(error, EINVAL,
10259 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
10260 					  NULL,
10261 					  "tunnel group index not supported");
10262 	tte = container_of(he, typeof(*tte), hash);
10263 	*table = tte->flow_table;
10264 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
10265 		dev->data->port_id, key.tunnel_id, group, *table);
10266 	return 0;
10267 }
10268 
10269 static void
10270 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
10271 		      struct mlx5_flow_tunnel *tunnel)
10272 {
10273 	struct mlx5_priv *priv = dev->data->dev_private;
10274 	struct mlx5_indexed_pool *ipool;
10275 
10276 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
10277 		dev->data->port_id, tunnel->tunnel_id);
10278 	LIST_REMOVE(tunnel, chain);
10279 	mlx5_hlist_destroy(tunnel->groups);
10280 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10281 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
10282 }
10283 
10284 static bool
10285 mlx5_access_tunnel_offload_db
10286 	(struct rte_eth_dev *dev,
10287 	 bool (*match)(struct rte_eth_dev *,
10288 		       struct mlx5_flow_tunnel *, const void *),
10289 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10290 	 void (*miss)(struct rte_eth_dev *, void *),
10291 	 void *ctx, bool lock_op)
10292 {
10293 	bool verdict = false;
10294 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10295 	struct mlx5_flow_tunnel *tunnel;
10296 
10297 	rte_spinlock_lock(&thub->sl);
10298 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
10299 		verdict = match(dev, tunnel, (const void *)ctx);
10300 		if (verdict)
10301 			break;
10302 	}
10303 	if (!lock_op)
10304 		rte_spinlock_unlock(&thub->sl);
10305 	if (verdict && hit)
10306 		hit(dev, tunnel, ctx);
10307 	if (!verdict && miss)
10308 		miss(dev, ctx);
10309 	if (lock_op)
10310 		rte_spinlock_unlock(&thub->sl);
10311 
10312 	return verdict;
10313 }
10314 
10315 struct tunnel_db_find_tunnel_id_ctx {
10316 	uint32_t tunnel_id;
10317 	struct mlx5_flow_tunnel *tunnel;
10318 };
10319 
10320 static bool
10321 find_tunnel_id_match(struct rte_eth_dev *dev,
10322 		     struct mlx5_flow_tunnel *tunnel, const void *x)
10323 {
10324 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10325 
10326 	RTE_SET_USED(dev);
10327 	return tunnel->tunnel_id == ctx->tunnel_id;
10328 }
10329 
10330 static void
10331 find_tunnel_id_hit(struct rte_eth_dev *dev,
10332 		   struct mlx5_flow_tunnel *tunnel, void *x)
10333 {
10334 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10335 	RTE_SET_USED(dev);
10336 	ctx->tunnel = tunnel;
10337 }
10338 
10339 static struct mlx5_flow_tunnel *
10340 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
10341 {
10342 	struct tunnel_db_find_tunnel_id_ctx ctx = {
10343 		.tunnel_id = id,
10344 	};
10345 
10346 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
10347 				      find_tunnel_id_hit, NULL, &ctx, true);
10348 
10349 	return ctx.tunnel;
10350 }
10351 
10352 static struct mlx5_flow_tunnel *
10353 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
10354 			  const struct rte_flow_tunnel *app_tunnel)
10355 {
10356 	struct mlx5_priv *priv = dev->data->dev_private;
10357 	struct mlx5_indexed_pool *ipool;
10358 	struct mlx5_flow_tunnel *tunnel;
10359 	uint32_t id;
10360 
10361 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10362 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
10363 	if (!tunnel)
10364 		return NULL;
10365 	if (id >= MLX5_MAX_TUNNELS) {
10366 		mlx5_ipool_free(ipool, id);
10367 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
10368 		return NULL;
10369 	}
10370 	tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
10371 					   priv->sh,
10372 					   mlx5_flow_tunnel_grp2tbl_create_cb,
10373 					   mlx5_flow_tunnel_grp2tbl_match_cb,
10374 					   mlx5_flow_tunnel_grp2tbl_remove_cb,
10375 					   mlx5_flow_tunnel_grp2tbl_clone_cb,
10376 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10377 	if (!tunnel->groups) {
10378 		mlx5_ipool_free(ipool, id);
10379 		return NULL;
10380 	}
10381 	/* initiate new PMD tunnel */
10382 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
10383 	tunnel->tunnel_id = id;
10384 	tunnel->action.type = (typeof(tunnel->action.type))
10385 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
10386 	tunnel->action.conf = tunnel;
10387 	tunnel->item.type = (typeof(tunnel->item.type))
10388 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
10389 	tunnel->item.spec = tunnel;
10390 	tunnel->item.last = NULL;
10391 	tunnel->item.mask = NULL;
10392 
10393 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
10394 		dev->data->port_id, tunnel->tunnel_id);
10395 
10396 	return tunnel;
10397 }
10398 
10399 struct tunnel_db_get_tunnel_ctx {
10400 	const struct rte_flow_tunnel *app_tunnel;
10401 	struct mlx5_flow_tunnel *tunnel;
10402 };
10403 
10404 static bool get_tunnel_match(struct rte_eth_dev *dev,
10405 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10406 {
10407 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
10408 
10409 	RTE_SET_USED(dev);
10410 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
10411 		       sizeof(*ctx->app_tunnel));
10412 }
10413 
10414 static void get_tunnel_hit(struct rte_eth_dev *dev,
10415 			   struct mlx5_flow_tunnel *tunnel, void *x)
10416 {
10417 	/* called under tunnel spinlock protection */
10418 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10419 
10420 	RTE_SET_USED(dev);
10421 	tunnel->refctn++;
10422 	ctx->tunnel = tunnel;
10423 }
10424 
10425 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
10426 {
10427 	/* called under tunnel spinlock protection */
10428 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10429 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10430 
10431 	rte_spinlock_unlock(&thub->sl);
10432 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
10433 	rte_spinlock_lock(&thub->sl);
10434 	if (ctx->tunnel) {
10435 		ctx->tunnel->refctn = 1;
10436 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
10437 	}
10438 }
10439 
10440 
10441 static int
10442 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
10443 		     const struct rte_flow_tunnel *app_tunnel,
10444 		     struct mlx5_flow_tunnel **tunnel)
10445 {
10446 	struct tunnel_db_get_tunnel_ctx ctx = {
10447 		.app_tunnel = app_tunnel,
10448 	};
10449 
10450 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
10451 				      get_tunnel_miss, &ctx, true);
10452 	*tunnel = ctx.tunnel;
10453 	return ctx.tunnel ? 0 : -ENOMEM;
10454 }
10455 
10456 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
10457 {
10458 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
10459 
10460 	if (!thub)
10461 		return;
10462 	if (!LIST_EMPTY(&thub->tunnels))
10463 		DRV_LOG(WARNING, "port %u tunnels present", port_id);
10464 	mlx5_hlist_destroy(thub->groups);
10465 	mlx5_free(thub);
10466 }
10467 
10468 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
10469 {
10470 	int err;
10471 	struct mlx5_flow_tunnel_hub *thub;
10472 
10473 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
10474 			   0, SOCKET_ID_ANY);
10475 	if (!thub)
10476 		return -ENOMEM;
10477 	LIST_INIT(&thub->tunnels);
10478 	rte_spinlock_init(&thub->sl);
10479 	thub->groups = mlx5_hlist_create("flow groups", 64,
10480 					 false, true, sh,
10481 					 mlx5_flow_tunnel_grp2tbl_create_cb,
10482 					 mlx5_flow_tunnel_grp2tbl_match_cb,
10483 					 mlx5_flow_tunnel_grp2tbl_remove_cb,
10484 					 mlx5_flow_tunnel_grp2tbl_clone_cb,
10485 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10486 	if (!thub->groups) {
10487 		err = -rte_errno;
10488 		goto err;
10489 	}
10490 	sh->tunnel_hub = thub;
10491 
10492 	return 0;
10493 
10494 err:
10495 	if (thub->groups)
10496 		mlx5_hlist_destroy(thub->groups);
10497 	if (thub)
10498 		mlx5_free(thub);
10499 	return err;
10500 }
10501 
10502 static inline int
10503 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
10504 			  struct rte_flow_tunnel *tunnel,
10505 			  struct rte_flow_error *error)
10506 {
10507 	struct mlx5_priv *priv = dev->data->dev_private;
10508 
10509 	if (!priv->sh->config.dv_flow_en)
10510 		return rte_flow_error_set(error, ENOTSUP,
10511 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10512 					  "flow DV interface is off");
10513 	if (!is_tunnel_offload_active(dev))
10514 		return rte_flow_error_set(error, ENOTSUP,
10515 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10516 					  "tunnel offload was not activated");
10517 	if (!tunnel)
10518 		return rte_flow_error_set(error, EINVAL,
10519 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10520 					  "no application tunnel");
10521 	switch (tunnel->type) {
10522 	default:
10523 		return rte_flow_error_set(error, EINVAL,
10524 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10525 					  "unsupported tunnel type");
10526 	case RTE_FLOW_ITEM_TYPE_VXLAN:
10527 	case RTE_FLOW_ITEM_TYPE_GRE:
10528 	case RTE_FLOW_ITEM_TYPE_NVGRE:
10529 	case RTE_FLOW_ITEM_TYPE_GENEVE:
10530 		break;
10531 	}
10532 	return 0;
10533 }
10534 
10535 static int
10536 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
10537 		    struct rte_flow_tunnel *app_tunnel,
10538 		    struct rte_flow_action **actions,
10539 		    uint32_t *num_of_actions,
10540 		    struct rte_flow_error *error)
10541 {
10542 	struct mlx5_flow_tunnel *tunnel;
10543 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10544 
10545 	if (ret)
10546 		return ret;
10547 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10548 	if (ret < 0) {
10549 		return rte_flow_error_set(error, ret,
10550 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10551 					  "failed to initialize pmd tunnel");
10552 	}
10553 	*actions = &tunnel->action;
10554 	*num_of_actions = 1;
10555 	return 0;
10556 }
10557 
10558 static int
10559 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
10560 		       struct rte_flow_tunnel *app_tunnel,
10561 		       struct rte_flow_item **items,
10562 		       uint32_t *num_of_items,
10563 		       struct rte_flow_error *error)
10564 {
10565 	struct mlx5_flow_tunnel *tunnel;
10566 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10567 
10568 	if (ret)
10569 		return ret;
10570 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10571 	if (ret < 0) {
10572 		return rte_flow_error_set(error, ret,
10573 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10574 					  "failed to initialize pmd tunnel");
10575 	}
10576 	*items = &tunnel->item;
10577 	*num_of_items = 1;
10578 	return 0;
10579 }
10580 
10581 struct tunnel_db_element_release_ctx {
10582 	struct rte_flow_item *items;
10583 	struct rte_flow_action *actions;
10584 	uint32_t num_elements;
10585 	struct rte_flow_error *error;
10586 	int ret;
10587 };
10588 
10589 static bool
10590 tunnel_element_release_match(struct rte_eth_dev *dev,
10591 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10592 {
10593 	const struct tunnel_db_element_release_ctx *ctx = x;
10594 
10595 	RTE_SET_USED(dev);
10596 	if (ctx->num_elements != 1)
10597 		return false;
10598 	else if (ctx->items)
10599 		return ctx->items == &tunnel->item;
10600 	else if (ctx->actions)
10601 		return ctx->actions == &tunnel->action;
10602 
10603 	return false;
10604 }
10605 
10606 static void
10607 tunnel_element_release_hit(struct rte_eth_dev *dev,
10608 			   struct mlx5_flow_tunnel *tunnel, void *x)
10609 {
10610 	struct tunnel_db_element_release_ctx *ctx = x;
10611 	ctx->ret = 0;
10612 	if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
10613 		mlx5_flow_tunnel_free(dev, tunnel);
10614 }
10615 
10616 static void
10617 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
10618 {
10619 	struct tunnel_db_element_release_ctx *ctx = x;
10620 	RTE_SET_USED(dev);
10621 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
10622 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10623 				      "invalid argument");
10624 }
10625 
10626 static int
10627 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
10628 		       struct rte_flow_item *pmd_items,
10629 		       uint32_t num_items, struct rte_flow_error *err)
10630 {
10631 	struct tunnel_db_element_release_ctx ctx = {
10632 		.items = pmd_items,
10633 		.actions = NULL,
10634 		.num_elements = num_items,
10635 		.error = err,
10636 	};
10637 
10638 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10639 				      tunnel_element_release_hit,
10640 				      tunnel_element_release_miss, &ctx, false);
10641 
10642 	return ctx.ret;
10643 }
10644 
10645 static int
10646 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
10647 			 struct rte_flow_action *pmd_actions,
10648 			 uint32_t num_actions, struct rte_flow_error *err)
10649 {
10650 	struct tunnel_db_element_release_ctx ctx = {
10651 		.items = NULL,
10652 		.actions = pmd_actions,
10653 		.num_elements = num_actions,
10654 		.error = err,
10655 	};
10656 
10657 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10658 				      tunnel_element_release_hit,
10659 				      tunnel_element_release_miss, &ctx, false);
10660 
10661 	return ctx.ret;
10662 }
10663 
10664 static int
10665 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
10666 				  struct rte_mbuf *m,
10667 				  struct rte_flow_restore_info *info,
10668 				  struct rte_flow_error *err)
10669 {
10670 	uint64_t ol_flags = m->ol_flags;
10671 	const struct mlx5_flow_tbl_data_entry *tble;
10672 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
10673 
10674 	if (!is_tunnel_offload_active(dev)) {
10675 		info->flags = 0;
10676 		return 0;
10677 	}
10678 
10679 	if ((ol_flags & mask) != mask)
10680 		goto err;
10681 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
10682 	if (!tble) {
10683 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
10684 			dev->data->port_id, m->hash.fdir.hi);
10685 		goto err;
10686 	}
10687 	MLX5_ASSERT(tble->tunnel);
10688 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
10689 	info->group_id = tble->group_id;
10690 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
10691 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
10692 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
10693 
10694 	return 0;
10695 
10696 err:
10697 	return rte_flow_error_set(err, EINVAL,
10698 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10699 				  "failed to get restore info");
10700 }
10701 
10702 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
10703 static int
10704 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
10705 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
10706 			   __rte_unused struct rte_flow_action **actions,
10707 			   __rte_unused uint32_t *num_of_actions,
10708 			   __rte_unused struct rte_flow_error *error)
10709 {
10710 	return -ENOTSUP;
10711 }
10712 
10713 static int
10714 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
10715 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
10716 		       __rte_unused struct rte_flow_item **items,
10717 		       __rte_unused uint32_t *num_of_items,
10718 		       __rte_unused struct rte_flow_error *error)
10719 {
10720 	return -ENOTSUP;
10721 }
10722 
10723 static int
10724 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
10725 			      __rte_unused struct rte_flow_item *pmd_items,
10726 			      __rte_unused uint32_t num_items,
10727 			      __rte_unused struct rte_flow_error *err)
10728 {
10729 	return -ENOTSUP;
10730 }
10731 
10732 static int
10733 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
10734 				__rte_unused struct rte_flow_action *pmd_action,
10735 				__rte_unused uint32_t num_actions,
10736 				__rte_unused struct rte_flow_error *err)
10737 {
10738 	return -ENOTSUP;
10739 }
10740 
10741 static int
10742 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
10743 				  __rte_unused struct rte_mbuf *m,
10744 				  __rte_unused struct rte_flow_restore_info *i,
10745 				  __rte_unused struct rte_flow_error *err)
10746 {
10747 	return -ENOTSUP;
10748 }
10749 
10750 static int
10751 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
10752 			     __rte_unused struct rte_flow *flow,
10753 			     __rte_unused const struct rte_flow_attr *attr,
10754 			     __rte_unused const struct rte_flow_action *actions,
10755 			     __rte_unused uint32_t flow_idx,
10756 			     __rte_unused const struct mlx5_flow_tunnel *tunnel,
10757 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
10758 			     __rte_unused struct rte_flow_error *error)
10759 {
10760 	return -ENOTSUP;
10761 }
10762 
10763 static struct mlx5_flow_tunnel *
10764 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
10765 		    __rte_unused uint32_t id)
10766 {
10767 	return NULL;
10768 }
10769 
10770 static void
10771 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
10772 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
10773 {
10774 }
10775 
10776 static uint32_t
10777 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
10778 				__rte_unused const struct mlx5_flow_tunnel *t,
10779 				__rte_unused uint32_t group,
10780 				__rte_unused uint32_t *table,
10781 				struct rte_flow_error *error)
10782 {
10783 	return rte_flow_error_set(error, ENOTSUP,
10784 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10785 				  "tunnel offload requires DV support");
10786 }
10787 
10788 void
10789 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
10790 			__rte_unused  uint16_t port_id)
10791 {
10792 }
10793 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
10794 
10795 /* Flex flow item API */
10796 static struct rte_flow_item_flex_handle *
10797 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
10798 			   const struct rte_flow_item_flex_conf *conf,
10799 			   struct rte_flow_error *error)
10800 {
10801 	static const char err_msg[] = "flex item creation unsupported";
10802 	struct mlx5_priv *priv = dev->data->dev_private;
10803 	struct rte_flow_attr attr = { .transfer = 0 };
10804 	const struct mlx5_flow_driver_ops *fops =
10805 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10806 
10807 	if (!priv->pci_dev) {
10808 		rte_flow_error_set(error, ENOTSUP,
10809 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10810 				   "create flex item on PF only");
10811 		return NULL;
10812 	}
10813 	switch (priv->pci_dev->id.device_id) {
10814 	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
10815 	case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
10816 		break;
10817 	default:
10818 		rte_flow_error_set(error, ENOTSUP,
10819 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10820 				   "flex item available on BlueField ports only");
10821 		return NULL;
10822 	}
10823 	if (!fops->item_create) {
10824 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10825 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10826 				   NULL, err_msg);
10827 		return NULL;
10828 	}
10829 	return fops->item_create(dev, conf, error);
10830 }
10831 
10832 static int
10833 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
10834 			    const struct rte_flow_item_flex_handle *handle,
10835 			    struct rte_flow_error *error)
10836 {
10837 	static const char err_msg[] = "flex item release unsupported";
10838 	struct rte_flow_attr attr = { .transfer = 0 };
10839 	const struct mlx5_flow_driver_ops *fops =
10840 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10841 
10842 	if (!fops->item_release) {
10843 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10844 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10845 				   NULL, err_msg);
10846 		return -rte_errno;
10847 	}
10848 	return fops->item_release(dev, handle, error);
10849 }
10850 
10851 static void
10852 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
10853 {
10854 	int ret;
10855 	struct rte_flow_error error;
10856 
10857 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10858 		char *item_name;
10859 		ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
10860 				    sizeof(item_name),
10861 				    (void *)(uintptr_t)item->type, &error);
10862 		if (ret > 0)
10863 			printf("%s ", item_name);
10864 		else
10865 			printf("%d\n", (int)item->type);
10866 	}
10867 	printf("END\n");
10868 }
10869 
10870 static int
10871 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
10872 {
10873 	const struct rte_flow_item_udp *spec = udp_item->spec;
10874 	const struct rte_flow_item_udp *mask = udp_item->mask;
10875 	uint16_t udp_dport = 0;
10876 
10877 	if (spec != NULL) {
10878 		if (!mask)
10879 			mask = &rte_flow_item_udp_mask;
10880 		udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
10881 				mask->hdr.dst_port);
10882 	}
10883 	return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
10884 }
10885 
10886 static const struct mlx5_flow_expand_node *
10887 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
10888 		unsigned int item_idx,
10889 		const struct mlx5_flow_expand_node graph[],
10890 		const struct mlx5_flow_expand_node *node)
10891 {
10892 	const struct rte_flow_item *item = pattern + item_idx, *prev_item;
10893 
10894 	if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
10895 			node != NULL &&
10896 			node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
10897 		/*
10898 		 * The expansion node is VXLAN and it is also the last
10899 		 * expandable item in the pattern, so need to continue
10900 		 * expansion of the inner tunnel.
10901 		 */
10902 		MLX5_ASSERT(item_idx > 0);
10903 		prev_item = pattern + item_idx - 1;
10904 		MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
10905 		if (mlx5_flow_is_std_vxlan_port(prev_item))
10906 			return &graph[MLX5_EXPANSION_STD_VXLAN];
10907 		return &graph[MLX5_EXPANSION_L3_VXLAN];
10908 	}
10909 	return node;
10910 }
10911 
10912 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
10913 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
10914 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
10915 };
10916 
10917 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
10918 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
10919 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
10920 	{ 9, 10, 11 }, { 12, 13, 14 },
10921 };
10922 
10923 /**
10924  * Discover the number of available flow priorities.
10925  *
10926  * @param dev
10927  *   Ethernet device.
10928  *
10929  * @return
10930  *   On success, number of available flow priorities.
10931  *   On failure, a negative errno-style code and rte_errno is set.
10932  */
10933 int
10934 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
10935 {
10936 	static const uint16_t vprio[] = {8, 16};
10937 	const struct mlx5_priv *priv = dev->data->dev_private;
10938 	const struct mlx5_flow_driver_ops *fops;
10939 	enum mlx5_flow_drv_type type;
10940 	int ret;
10941 
10942 	type = mlx5_flow_os_get_type();
10943 	if (type == MLX5_FLOW_TYPE_MAX) {
10944 		type = MLX5_FLOW_TYPE_VERBS;
10945 		if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
10946 			type = MLX5_FLOW_TYPE_DV;
10947 	}
10948 	fops = flow_get_drv_ops(type);
10949 	if (fops->discover_priorities == NULL) {
10950 		DRV_LOG(ERR, "Priority discovery not supported");
10951 		rte_errno = ENOTSUP;
10952 		return -rte_errno;
10953 	}
10954 	ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
10955 	if (ret < 0)
10956 		return ret;
10957 	switch (ret) {
10958 	case 8:
10959 		ret = RTE_DIM(priority_map_3);
10960 		break;
10961 	case 16:
10962 		ret = RTE_DIM(priority_map_5);
10963 		break;
10964 	default:
10965 		rte_errno = ENOTSUP;
10966 		DRV_LOG(ERR,
10967 			"port %u maximum priority: %d expected 8/16",
10968 			dev->data->port_id, ret);
10969 		return -rte_errno;
10970 	}
10971 	DRV_LOG(INFO, "port %u supported flow priorities:"
10972 		" 0-%d for ingress or egress root table,"
10973 		" 0-%d for non-root table or transfer root table.",
10974 		dev->data->port_id, ret - 2,
10975 		MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
10976 	return ret;
10977 }
10978 
10979 /**
10980  * Adjust flow priority based on the highest layer and the request priority.
10981  *
10982  * @param[in] dev
10983  *   Pointer to the Ethernet device structure.
10984  * @param[in] priority
10985  *   The rule base priority.
10986  * @param[in] subpriority
10987  *   The priority based on the items.
10988  *
10989  * @return
10990  *   The new priority.
10991  */
10992 uint32_t
10993 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
10994 			  uint32_t subpriority)
10995 {
10996 	uint32_t res = 0;
10997 	struct mlx5_priv *priv = dev->data->dev_private;
10998 
10999 	switch (priv->sh->flow_max_priority) {
11000 	case RTE_DIM(priority_map_3):
11001 		res = priority_map_3[priority][subpriority];
11002 		break;
11003 	case RTE_DIM(priority_map_5):
11004 		res = priority_map_5[priority][subpriority];
11005 		break;
11006 	}
11007 	return  res;
11008 }
11009