xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision b7b78a089c454d42eb654360eeecb1e2f15e6cd8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12 
13 #include <rte_common.h>
14 #include <rte_ether.h>
15 #include <ethdev_driver.h>
16 #include <rte_eal_paging.h>
17 #include <rte_flow.h>
18 #include <rte_cycles.h>
19 #include <rte_flow_driver.h>
20 #include <rte_malloc.h>
21 #include <rte_ip.h>
22 
23 #include <mlx5_glue.h>
24 #include <mlx5_devx_cmds.h>
25 #include <mlx5_prm.h>
26 #include <mlx5_malloc.h>
27 
28 #include "mlx5_defs.h"
29 #include "mlx5.h"
30 #include "mlx5_flow.h"
31 #include "mlx5_flow_os.h"
32 #include "mlx5_rxtx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 struct tunnel_default_miss_ctx {
37 	uint16_t *queue;
38 	__extension__
39 	union {
40 		struct rte_flow_action_rss action_rss;
41 		struct rte_flow_action_queue miss_queue;
42 		struct rte_flow_action_jump miss_jump;
43 		uint8_t raw[0];
44 	};
45 };
46 
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49 			     struct rte_flow *flow,
50 			     const struct rte_flow_attr *attr,
51 			     const struct rte_flow_action *app_actions,
52 			     uint32_t flow_idx,
53 			     struct tunnel_default_miss_ctx *ctx,
54 			     struct rte_flow_error *error);
55 static struct mlx5_flow_tunnel *
56 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
57 static void
58 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
59 static uint32_t
60 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
61 				const struct mlx5_flow_tunnel *tunnel,
62 				uint32_t group, uint32_t *table,
63 				struct rte_flow_error *error);
64 
65 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
66 static void mlx5_flow_pop_thread_workspace(void);
67 
68 
69 /** Device flow drivers. */
70 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
71 
72 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
73 
74 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
75 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
76 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
77 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
78 #endif
79 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
80 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
81 };
82 
83 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
84 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
85 	(const int []){ \
86 		__VA_ARGS__, 0, \
87 	}
88 
89 /** Node object of input graph for mlx5_flow_expand_rss(). */
90 struct mlx5_flow_expand_node {
91 	const int *const next;
92 	/**<
93 	 * List of next node indexes. Index 0 is interpreted as a terminator.
94 	 */
95 	const enum rte_flow_item_type type;
96 	/**< Pattern item type of current node. */
97 	uint64_t rss_types;
98 	/**<
99 	 * RSS types bit-field associated with this node
100 	 * (see ETH_RSS_* definitions).
101 	 */
102 };
103 
104 /** Object returned by mlx5_flow_expand_rss(). */
105 struct mlx5_flow_expand_rss {
106 	uint32_t entries;
107 	/**< Number of entries @p patterns and @p priorities. */
108 	struct {
109 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
110 		uint32_t priority; /**< Priority offset for each expansion. */
111 	} entry[];
112 };
113 
114 static enum rte_flow_item_type
115 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
116 {
117 	enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
118 	uint16_t ether_type = 0;
119 	uint16_t ether_type_m;
120 	uint8_t ip_next_proto = 0;
121 	uint8_t ip_next_proto_m;
122 
123 	if (item == NULL || item->spec == NULL)
124 		return ret;
125 	switch (item->type) {
126 	case RTE_FLOW_ITEM_TYPE_ETH:
127 		if (item->mask)
128 			ether_type_m = ((const struct rte_flow_item_eth *)
129 						(item->mask))->type;
130 		else
131 			ether_type_m = rte_flow_item_eth_mask.type;
132 		if (ether_type_m != RTE_BE16(0xFFFF))
133 			break;
134 		ether_type = ((const struct rte_flow_item_eth *)
135 				(item->spec))->type;
136 		if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
137 			ret = RTE_FLOW_ITEM_TYPE_IPV4;
138 		else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
139 			ret = RTE_FLOW_ITEM_TYPE_IPV6;
140 		else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
141 			ret = RTE_FLOW_ITEM_TYPE_VLAN;
142 		else
143 			ret = RTE_FLOW_ITEM_TYPE_END;
144 		break;
145 	case RTE_FLOW_ITEM_TYPE_VLAN:
146 		if (item->mask)
147 			ether_type_m = ((const struct rte_flow_item_vlan *)
148 						(item->mask))->inner_type;
149 		else
150 			ether_type_m = rte_flow_item_vlan_mask.inner_type;
151 		if (ether_type_m != RTE_BE16(0xFFFF))
152 			break;
153 		ether_type = ((const struct rte_flow_item_vlan *)
154 				(item->spec))->inner_type;
155 		if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
156 			ret = RTE_FLOW_ITEM_TYPE_IPV4;
157 		else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
158 			ret = RTE_FLOW_ITEM_TYPE_IPV6;
159 		else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
160 			ret = RTE_FLOW_ITEM_TYPE_VLAN;
161 		else
162 			ret = RTE_FLOW_ITEM_TYPE_END;
163 		break;
164 	case RTE_FLOW_ITEM_TYPE_IPV4:
165 		if (item->mask)
166 			ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
167 					(item->mask))->hdr.next_proto_id;
168 		else
169 			ip_next_proto_m =
170 				rte_flow_item_ipv4_mask.hdr.next_proto_id;
171 		if (ip_next_proto_m != 0xFF)
172 			break;
173 		ip_next_proto = ((const struct rte_flow_item_ipv4 *)
174 				(item->spec))->hdr.next_proto_id;
175 		if (ip_next_proto == IPPROTO_UDP)
176 			ret = RTE_FLOW_ITEM_TYPE_UDP;
177 		else if (ip_next_proto == IPPROTO_TCP)
178 			ret = RTE_FLOW_ITEM_TYPE_TCP;
179 		else if (ip_next_proto == IPPROTO_IP)
180 			ret = RTE_FLOW_ITEM_TYPE_IPV4;
181 		else if (ip_next_proto == IPPROTO_IPV6)
182 			ret = RTE_FLOW_ITEM_TYPE_IPV6;
183 		else
184 			ret = RTE_FLOW_ITEM_TYPE_END;
185 		break;
186 	case RTE_FLOW_ITEM_TYPE_IPV6:
187 		if (item->mask)
188 			ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
189 						(item->mask))->hdr.proto;
190 		else
191 			ip_next_proto_m =
192 				rte_flow_item_ipv6_mask.hdr.proto;
193 		if (ip_next_proto_m != 0xFF)
194 			break;
195 		ip_next_proto = ((const struct rte_flow_item_ipv6 *)
196 				(item->spec))->hdr.proto;
197 		if (ip_next_proto == IPPROTO_UDP)
198 			ret = RTE_FLOW_ITEM_TYPE_UDP;
199 		else if (ip_next_proto == IPPROTO_TCP)
200 			ret = RTE_FLOW_ITEM_TYPE_TCP;
201 		else if (ip_next_proto == IPPROTO_IP)
202 			ret = RTE_FLOW_ITEM_TYPE_IPV4;
203 		else if (ip_next_proto == IPPROTO_IPV6)
204 			ret = RTE_FLOW_ITEM_TYPE_IPV6;
205 		else
206 			ret = RTE_FLOW_ITEM_TYPE_END;
207 		break;
208 	default:
209 		ret = RTE_FLOW_ITEM_TYPE_VOID;
210 		break;
211 	}
212 	return ret;
213 }
214 
215 #define MLX5_RSS_EXP_ELT_N 8
216 
217 /**
218  * Expand RSS flows into several possible flows according to the RSS hash
219  * fields requested and the driver capabilities.
220  *
221  * @param[out] buf
222  *   Buffer to store the result expansion.
223  * @param[in] size
224  *   Buffer size in bytes. If 0, @p buf can be NULL.
225  * @param[in] pattern
226  *   User flow pattern.
227  * @param[in] types
228  *   RSS types to expand (see ETH_RSS_* definitions).
229  * @param[in] graph
230  *   Input graph to expand @p pattern according to @p types.
231  * @param[in] graph_root_index
232  *   Index of root node in @p graph, typically 0.
233  *
234  * @return
235  *   A positive value representing the size of @p buf in bytes regardless of
236  *   @p size on success, a negative errno value otherwise and rte_errno is
237  *   set, the following errors are defined:
238  *
239  *   -E2BIG: graph-depth @p graph is too deep.
240  */
241 static int
242 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
243 		     const struct rte_flow_item *pattern, uint64_t types,
244 		     const struct mlx5_flow_expand_node graph[],
245 		     int graph_root_index)
246 {
247 	const struct rte_flow_item *item;
248 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
249 	const int *next_node;
250 	const int *stack[MLX5_RSS_EXP_ELT_N];
251 	int stack_pos = 0;
252 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
253 	unsigned int i;
254 	size_t lsize;
255 	size_t user_pattern_size = 0;
256 	void *addr = NULL;
257 	const struct mlx5_flow_expand_node *next = NULL;
258 	struct rte_flow_item missed_item;
259 	int missed = 0;
260 	int elt = 0;
261 	const struct rte_flow_item *last_item = NULL;
262 
263 	memset(&missed_item, 0, sizeof(missed_item));
264 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
265 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
266 	if (lsize <= size) {
267 		buf->entry[0].priority = 0;
268 		buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
269 		buf->entries = 0;
270 		addr = buf->entry[0].pattern;
271 	}
272 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
273 		if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
274 			last_item = item;
275 		for (i = 0; node->next && node->next[i]; ++i) {
276 			next = &graph[node->next[i]];
277 			if (next->type == item->type)
278 				break;
279 		}
280 		if (next)
281 			node = next;
282 		user_pattern_size += sizeof(*item);
283 	}
284 	user_pattern_size += sizeof(*item); /* Handle END item. */
285 	lsize += user_pattern_size;
286 	/* Copy the user pattern in the first entry of the buffer. */
287 	if (lsize <= size) {
288 		rte_memcpy(addr, pattern, user_pattern_size);
289 		addr = (void *)(((uintptr_t)addr) + user_pattern_size);
290 		buf->entries = 1;
291 	}
292 	/* Start expanding. */
293 	memset(flow_items, 0, sizeof(flow_items));
294 	user_pattern_size -= sizeof(*item);
295 	/*
296 	 * Check if the last valid item has spec set, need complete pattern,
297 	 * and the pattern can be used for expansion.
298 	 */
299 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_item);
300 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
301 		/* Item type END indicates expansion is not required. */
302 		return lsize;
303 	}
304 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
305 		next = NULL;
306 		missed = 1;
307 		for (i = 0; node->next && node->next[i]; ++i) {
308 			next = &graph[node->next[i]];
309 			if (next->type == missed_item.type) {
310 				flow_items[0].type = missed_item.type;
311 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
312 				break;
313 			}
314 			next = NULL;
315 		}
316 	}
317 	if (next && missed) {
318 		elt = 2; /* missed item + item end. */
319 		node = next;
320 		lsize += elt * sizeof(*item) + user_pattern_size;
321 		if ((node->rss_types & types) && lsize <= size) {
322 			buf->entry[buf->entries].priority = 1;
323 			buf->entry[buf->entries].pattern = addr;
324 			buf->entries++;
325 			rte_memcpy(addr, buf->entry[0].pattern,
326 				   user_pattern_size);
327 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
328 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
329 			addr = (void *)(((uintptr_t)addr) +
330 					elt * sizeof(*item));
331 		}
332 	}
333 	memset(flow_items, 0, sizeof(flow_items));
334 	next_node = node->next;
335 	stack[stack_pos] = next_node;
336 	node = next_node ? &graph[*next_node] : NULL;
337 	while (node) {
338 		flow_items[stack_pos].type = node->type;
339 		if (node->rss_types & types) {
340 			/*
341 			 * compute the number of items to copy from the
342 			 * expansion and copy it.
343 			 * When the stack_pos is 0, there are 1 element in it,
344 			 * plus the addition END item.
345 			 */
346 			elt = stack_pos + 2;
347 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
348 			lsize += elt * sizeof(*item) + user_pattern_size;
349 			if (lsize <= size) {
350 				size_t n = elt * sizeof(*item);
351 
352 				buf->entry[buf->entries].priority =
353 					stack_pos + 1 + missed;
354 				buf->entry[buf->entries].pattern = addr;
355 				buf->entries++;
356 				rte_memcpy(addr, buf->entry[0].pattern,
357 					   user_pattern_size);
358 				addr = (void *)(((uintptr_t)addr) +
359 						user_pattern_size);
360 				rte_memcpy(addr, &missed_item,
361 					   missed * sizeof(*item));
362 				addr = (void *)(((uintptr_t)addr) +
363 					missed * sizeof(*item));
364 				rte_memcpy(addr, flow_items, n);
365 				addr = (void *)(((uintptr_t)addr) + n);
366 			}
367 		}
368 		/* Go deeper. */
369 		if (node->next) {
370 			next_node = node->next;
371 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
372 				rte_errno = E2BIG;
373 				return -rte_errno;
374 			}
375 			stack[stack_pos] = next_node;
376 		} else if (*(next_node + 1)) {
377 			/* Follow up with the next possibility. */
378 			++next_node;
379 		} else {
380 			/* Move to the next path. */
381 			if (stack_pos)
382 				next_node = stack[--stack_pos];
383 			next_node++;
384 			stack[stack_pos] = next_node;
385 		}
386 		node = *next_node ? &graph[*next_node] : NULL;
387 	};
388 	/* no expanded flows but we have missed item, create one rule for it */
389 	if (buf->entries == 1 && missed != 0) {
390 		elt = 2;
391 		lsize += elt * sizeof(*item) + user_pattern_size;
392 		if (lsize <= size) {
393 			buf->entry[buf->entries].priority = 1;
394 			buf->entry[buf->entries].pattern = addr;
395 			buf->entries++;
396 			flow_items[0].type = missed_item.type;
397 			flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
398 			rte_memcpy(addr, buf->entry[0].pattern,
399 				   user_pattern_size);
400 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
401 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
402 		}
403 	}
404 	return lsize;
405 }
406 
407 enum mlx5_expansion {
408 	MLX5_EXPANSION_ROOT,
409 	MLX5_EXPANSION_ROOT_OUTER,
410 	MLX5_EXPANSION_ROOT_ETH_VLAN,
411 	MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
412 	MLX5_EXPANSION_OUTER_ETH,
413 	MLX5_EXPANSION_OUTER_ETH_VLAN,
414 	MLX5_EXPANSION_OUTER_VLAN,
415 	MLX5_EXPANSION_OUTER_IPV4,
416 	MLX5_EXPANSION_OUTER_IPV4_UDP,
417 	MLX5_EXPANSION_OUTER_IPV4_TCP,
418 	MLX5_EXPANSION_OUTER_IPV6,
419 	MLX5_EXPANSION_OUTER_IPV6_UDP,
420 	MLX5_EXPANSION_OUTER_IPV6_TCP,
421 	MLX5_EXPANSION_VXLAN,
422 	MLX5_EXPANSION_VXLAN_GPE,
423 	MLX5_EXPANSION_GRE,
424 	MLX5_EXPANSION_MPLS,
425 	MLX5_EXPANSION_ETH,
426 	MLX5_EXPANSION_ETH_VLAN,
427 	MLX5_EXPANSION_VLAN,
428 	MLX5_EXPANSION_IPV4,
429 	MLX5_EXPANSION_IPV4_UDP,
430 	MLX5_EXPANSION_IPV4_TCP,
431 	MLX5_EXPANSION_IPV6,
432 	MLX5_EXPANSION_IPV6_UDP,
433 	MLX5_EXPANSION_IPV6_TCP,
434 };
435 
436 /** Supported expansion of items. */
437 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
438 	[MLX5_EXPANSION_ROOT] = {
439 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
440 						  MLX5_EXPANSION_IPV4,
441 						  MLX5_EXPANSION_IPV6),
442 		.type = RTE_FLOW_ITEM_TYPE_END,
443 	},
444 	[MLX5_EXPANSION_ROOT_OUTER] = {
445 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
446 						  MLX5_EXPANSION_OUTER_IPV4,
447 						  MLX5_EXPANSION_OUTER_IPV6),
448 		.type = RTE_FLOW_ITEM_TYPE_END,
449 	},
450 	[MLX5_EXPANSION_ROOT_ETH_VLAN] = {
451 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
452 		.type = RTE_FLOW_ITEM_TYPE_END,
453 	},
454 	[MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
455 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
456 						(MLX5_EXPANSION_OUTER_ETH_VLAN),
457 		.type = RTE_FLOW_ITEM_TYPE_END,
458 	},
459 	[MLX5_EXPANSION_OUTER_ETH] = {
460 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
461 						  MLX5_EXPANSION_OUTER_IPV6,
462 						  MLX5_EXPANSION_MPLS),
463 		.type = RTE_FLOW_ITEM_TYPE_ETH,
464 		.rss_types = 0,
465 	},
466 	[MLX5_EXPANSION_OUTER_ETH_VLAN] = {
467 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
468 		.type = RTE_FLOW_ITEM_TYPE_ETH,
469 		.rss_types = 0,
470 	},
471 	[MLX5_EXPANSION_OUTER_VLAN] = {
472 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
473 						  MLX5_EXPANSION_OUTER_IPV6),
474 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
475 	},
476 	[MLX5_EXPANSION_OUTER_IPV4] = {
477 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
478 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
479 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
480 			 MLX5_EXPANSION_GRE,
481 			 MLX5_EXPANSION_IPV4,
482 			 MLX5_EXPANSION_IPV6),
483 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
484 		.rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
485 			ETH_RSS_NONFRAG_IPV4_OTHER,
486 	},
487 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
488 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
489 						  MLX5_EXPANSION_VXLAN_GPE),
490 		.type = RTE_FLOW_ITEM_TYPE_UDP,
491 		.rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
492 	},
493 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
494 		.type = RTE_FLOW_ITEM_TYPE_TCP,
495 		.rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
496 	},
497 	[MLX5_EXPANSION_OUTER_IPV6] = {
498 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
499 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
500 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
501 			 MLX5_EXPANSION_IPV4,
502 			 MLX5_EXPANSION_IPV6),
503 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
504 		.rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
505 			ETH_RSS_NONFRAG_IPV6_OTHER,
506 	},
507 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
508 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
509 						  MLX5_EXPANSION_VXLAN_GPE),
510 		.type = RTE_FLOW_ITEM_TYPE_UDP,
511 		.rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
512 	},
513 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
514 		.type = RTE_FLOW_ITEM_TYPE_TCP,
515 		.rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
516 	},
517 	[MLX5_EXPANSION_VXLAN] = {
518 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
519 						  MLX5_EXPANSION_IPV4,
520 						  MLX5_EXPANSION_IPV6),
521 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
522 	},
523 	[MLX5_EXPANSION_VXLAN_GPE] = {
524 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
525 						  MLX5_EXPANSION_IPV4,
526 						  MLX5_EXPANSION_IPV6),
527 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
528 	},
529 	[MLX5_EXPANSION_GRE] = {
530 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
531 		.type = RTE_FLOW_ITEM_TYPE_GRE,
532 	},
533 	[MLX5_EXPANSION_MPLS] = {
534 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
535 						  MLX5_EXPANSION_IPV6),
536 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
537 	},
538 	[MLX5_EXPANSION_ETH] = {
539 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
540 						  MLX5_EXPANSION_IPV6),
541 		.type = RTE_FLOW_ITEM_TYPE_ETH,
542 	},
543 	[MLX5_EXPANSION_ETH_VLAN] = {
544 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
545 		.type = RTE_FLOW_ITEM_TYPE_ETH,
546 	},
547 	[MLX5_EXPANSION_VLAN] = {
548 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
549 						  MLX5_EXPANSION_IPV6),
550 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
551 	},
552 	[MLX5_EXPANSION_IPV4] = {
553 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
554 						  MLX5_EXPANSION_IPV4_TCP),
555 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
556 		.rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
557 			ETH_RSS_NONFRAG_IPV4_OTHER,
558 	},
559 	[MLX5_EXPANSION_IPV4_UDP] = {
560 		.type = RTE_FLOW_ITEM_TYPE_UDP,
561 		.rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
562 	},
563 	[MLX5_EXPANSION_IPV4_TCP] = {
564 		.type = RTE_FLOW_ITEM_TYPE_TCP,
565 		.rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
566 	},
567 	[MLX5_EXPANSION_IPV6] = {
568 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
569 						  MLX5_EXPANSION_IPV6_TCP),
570 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
571 		.rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
572 			ETH_RSS_NONFRAG_IPV6_OTHER,
573 	},
574 	[MLX5_EXPANSION_IPV6_UDP] = {
575 		.type = RTE_FLOW_ITEM_TYPE_UDP,
576 		.rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
577 	},
578 	[MLX5_EXPANSION_IPV6_TCP] = {
579 		.type = RTE_FLOW_ITEM_TYPE_TCP,
580 		.rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
581 	},
582 };
583 
584 static struct rte_flow_shared_action *
585 mlx5_shared_action_create(struct rte_eth_dev *dev,
586 			  const struct rte_flow_shared_action_conf *conf,
587 			  const struct rte_flow_action *action,
588 			  struct rte_flow_error *error);
589 static int mlx5_shared_action_destroy
590 				(struct rte_eth_dev *dev,
591 				 struct rte_flow_shared_action *shared_action,
592 				 struct rte_flow_error *error);
593 static int mlx5_shared_action_update
594 				(struct rte_eth_dev *dev,
595 				 struct rte_flow_shared_action *shared_action,
596 				 const struct rte_flow_action *action,
597 				 struct rte_flow_error *error);
598 static int mlx5_shared_action_query
599 				(struct rte_eth_dev *dev,
600 				 const struct rte_flow_shared_action *action,
601 				 void *data,
602 				 struct rte_flow_error *error);
603 static int
604 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
605 		    struct rte_flow_tunnel *app_tunnel,
606 		    struct rte_flow_action **actions,
607 		    uint32_t *num_of_actions,
608 		    struct rte_flow_error *error);
609 static int
610 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
611 		       struct rte_flow_tunnel *app_tunnel,
612 		       struct rte_flow_item **items,
613 		       uint32_t *num_of_items,
614 		       struct rte_flow_error *error);
615 static int
616 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
617 			      struct rte_flow_item *pmd_items,
618 			      uint32_t num_items, struct rte_flow_error *err);
619 static int
620 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
621 				struct rte_flow_action *pmd_actions,
622 				uint32_t num_actions,
623 				struct rte_flow_error *err);
624 static int
625 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
626 				  struct rte_mbuf *m,
627 				  struct rte_flow_restore_info *info,
628 				  struct rte_flow_error *err);
629 
630 static const struct rte_flow_ops mlx5_flow_ops = {
631 	.validate = mlx5_flow_validate,
632 	.create = mlx5_flow_create,
633 	.destroy = mlx5_flow_destroy,
634 	.flush = mlx5_flow_flush,
635 	.isolate = mlx5_flow_isolate,
636 	.query = mlx5_flow_query,
637 	.dev_dump = mlx5_flow_dev_dump,
638 	.get_aged_flows = mlx5_flow_get_aged_flows,
639 	.shared_action_create = mlx5_shared_action_create,
640 	.shared_action_destroy = mlx5_shared_action_destroy,
641 	.shared_action_update = mlx5_shared_action_update,
642 	.shared_action_query = mlx5_shared_action_query,
643 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
644 	.tunnel_match = mlx5_flow_tunnel_match,
645 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
646 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
647 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
648 };
649 
650 /* Tunnel information. */
651 struct mlx5_flow_tunnel_info {
652 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
653 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
654 };
655 
656 static struct mlx5_flow_tunnel_info tunnels_info[] = {
657 	{
658 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
659 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
660 	},
661 	{
662 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
663 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
664 	},
665 	{
666 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
667 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
668 	},
669 	{
670 		.tunnel = MLX5_FLOW_LAYER_GRE,
671 		.ptype = RTE_PTYPE_TUNNEL_GRE,
672 	},
673 	{
674 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
675 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
676 	},
677 	{
678 		.tunnel = MLX5_FLOW_LAYER_MPLS,
679 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
680 	},
681 	{
682 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
683 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
684 	},
685 	{
686 		.tunnel = MLX5_FLOW_LAYER_IPIP,
687 		.ptype = RTE_PTYPE_TUNNEL_IP,
688 	},
689 	{
690 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
691 		.ptype = RTE_PTYPE_TUNNEL_IP,
692 	},
693 	{
694 		.tunnel = MLX5_FLOW_LAYER_GTP,
695 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
696 	},
697 };
698 
699 
700 
701 /**
702  * Translate tag ID to register.
703  *
704  * @param[in] dev
705  *   Pointer to the Ethernet device structure.
706  * @param[in] feature
707  *   The feature that request the register.
708  * @param[in] id
709  *   The request register ID.
710  * @param[out] error
711  *   Error description in case of any.
712  *
713  * @return
714  *   The request register on success, a negative errno
715  *   value otherwise and rte_errno is set.
716  */
717 int
718 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
719 		     enum mlx5_feature_name feature,
720 		     uint32_t id,
721 		     struct rte_flow_error *error)
722 {
723 	struct mlx5_priv *priv = dev->data->dev_private;
724 	struct mlx5_dev_config *config = &priv->config;
725 	enum modify_reg start_reg;
726 	bool skip_mtr_reg = false;
727 
728 	switch (feature) {
729 	case MLX5_HAIRPIN_RX:
730 		return REG_B;
731 	case MLX5_HAIRPIN_TX:
732 		return REG_A;
733 	case MLX5_METADATA_RX:
734 		switch (config->dv_xmeta_en) {
735 		case MLX5_XMETA_MODE_LEGACY:
736 			return REG_B;
737 		case MLX5_XMETA_MODE_META16:
738 			return REG_C_0;
739 		case MLX5_XMETA_MODE_META32:
740 			return REG_C_1;
741 		}
742 		break;
743 	case MLX5_METADATA_TX:
744 		return REG_A;
745 	case MLX5_METADATA_FDB:
746 		switch (config->dv_xmeta_en) {
747 		case MLX5_XMETA_MODE_LEGACY:
748 			return REG_NON;
749 		case MLX5_XMETA_MODE_META16:
750 			return REG_C_0;
751 		case MLX5_XMETA_MODE_META32:
752 			return REG_C_1;
753 		}
754 		break;
755 	case MLX5_FLOW_MARK:
756 		switch (config->dv_xmeta_en) {
757 		case MLX5_XMETA_MODE_LEGACY:
758 			return REG_NON;
759 		case MLX5_XMETA_MODE_META16:
760 			return REG_C_1;
761 		case MLX5_XMETA_MODE_META32:
762 			return REG_C_0;
763 		}
764 		break;
765 	case MLX5_MTR_SFX:
766 		/*
767 		 * If meter color and flow match share one register, flow match
768 		 * should use the meter color register for match.
769 		 */
770 		if (priv->mtr_reg_share)
771 			return priv->mtr_color_reg;
772 		else
773 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
774 			       REG_C_3;
775 	case MLX5_MTR_COLOR:
776 	case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
777 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
778 		return priv->mtr_color_reg;
779 	case MLX5_COPY_MARK:
780 		/*
781 		 * Metadata COPY_MARK register using is in meter suffix sub
782 		 * flow while with meter. It's safe to share the same register.
783 		 */
784 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
785 	case MLX5_APP_TAG:
786 		/*
787 		 * If meter is enable, it will engage the register for color
788 		 * match and flow match. If meter color match is not using the
789 		 * REG_C_2, need to skip the REG_C_x be used by meter color
790 		 * match.
791 		 * If meter is disable, free to use all available registers.
792 		 */
793 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
794 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
795 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
796 		if (id > (uint32_t)(REG_C_7 - start_reg))
797 			return rte_flow_error_set(error, EINVAL,
798 						  RTE_FLOW_ERROR_TYPE_ITEM,
799 						  NULL, "invalid tag id");
800 		if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
801 			return rte_flow_error_set(error, ENOTSUP,
802 						  RTE_FLOW_ERROR_TYPE_ITEM,
803 						  NULL, "unsupported tag id");
804 		/*
805 		 * This case means meter is using the REG_C_x great than 2.
806 		 * Take care not to conflict with meter color REG_C_x.
807 		 * If the available index REG_C_y >= REG_C_x, skip the
808 		 * color register.
809 		 */
810 		if (skip_mtr_reg && config->flow_mreg_c
811 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
812 			if (id >= (uint32_t)(REG_C_7 - start_reg))
813 				return rte_flow_error_set(error, EINVAL,
814 						       RTE_FLOW_ERROR_TYPE_ITEM,
815 							NULL, "invalid tag id");
816 			if (config->flow_mreg_c
817 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
818 				return config->flow_mreg_c
819 					       [id + 1 + start_reg - REG_C_0];
820 			return rte_flow_error_set(error, ENOTSUP,
821 						  RTE_FLOW_ERROR_TYPE_ITEM,
822 						  NULL, "unsupported tag id");
823 		}
824 		return config->flow_mreg_c[id + start_reg - REG_C_0];
825 	}
826 	MLX5_ASSERT(false);
827 	return rte_flow_error_set(error, EINVAL,
828 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
829 				  NULL, "invalid feature name");
830 }
831 
832 /**
833  * Check extensive flow metadata register support.
834  *
835  * @param dev
836  *   Pointer to rte_eth_dev structure.
837  *
838  * @return
839  *   True if device supports extensive flow metadata register, otherwise false.
840  */
841 bool
842 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
843 {
844 	struct mlx5_priv *priv = dev->data->dev_private;
845 	struct mlx5_dev_config *config = &priv->config;
846 
847 	/*
848 	 * Having available reg_c can be regarded inclusively as supporting
849 	 * extensive flow metadata register, which could mean,
850 	 * - metadata register copy action by modify header.
851 	 * - 16 modify header actions is supported.
852 	 * - reg_c's are preserved across different domain (FDB and NIC) on
853 	 *   packet loopback by flow lookup miss.
854 	 */
855 	return config->flow_mreg_c[2] != REG_NON;
856 }
857 
858 /**
859  * Get the lowest priority.
860  *
861  * @param[in] dev
862  *   Pointer to the Ethernet device structure.
863  * @param[in] attributes
864  *   Pointer to device flow rule attributes.
865  *
866  * @return
867  *   The value of lowest priority of flow.
868  */
869 uint32_t
870 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
871 			  const struct rte_flow_attr *attr)
872 {
873 	struct mlx5_priv *priv = dev->data->dev_private;
874 
875 	if (!attr->group && !attr->transfer)
876 		return priv->config.flow_prio - 2;
877 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
878 }
879 
880 /**
881  * Calculate matcher priority of the flow.
882  *
883  * @param[in] dev
884  *   Pointer to the Ethernet device structure.
885  * @param[in] attr
886  *   Pointer to device flow rule attributes.
887  * @param[in] subpriority
888  *   The priority based on the items.
889  * @return
890  *   The matcher priority of the flow.
891  */
892 uint16_t
893 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
894 			  const struct rte_flow_attr *attr,
895 			  uint32_t subpriority)
896 {
897 	uint16_t priority = (uint16_t)attr->priority;
898 	struct mlx5_priv *priv = dev->data->dev_private;
899 
900 	if (!attr->group && !attr->transfer) {
901 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
902 			priority = priv->config.flow_prio - 1;
903 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
904 	}
905 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
906 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
907 	return priority * 3 + subpriority;
908 }
909 
910 /**
911  * Verify the @p item specifications (spec, last, mask) are compatible with the
912  * NIC capabilities.
913  *
914  * @param[in] item
915  *   Item specification.
916  * @param[in] mask
917  *   @p item->mask or flow default bit-masks.
918  * @param[in] nic_mask
919  *   Bit-masks covering supported fields by the NIC to compare with user mask.
920  * @param[in] size
921  *   Bit-masks size in bytes.
922  * @param[in] range_accepted
923  *   True if range of values is accepted for specific fields, false otherwise.
924  * @param[out] error
925  *   Pointer to error structure.
926  *
927  * @return
928  *   0 on success, a negative errno value otherwise and rte_errno is set.
929  */
930 int
931 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
932 			  const uint8_t *mask,
933 			  const uint8_t *nic_mask,
934 			  unsigned int size,
935 			  bool range_accepted,
936 			  struct rte_flow_error *error)
937 {
938 	unsigned int i;
939 
940 	MLX5_ASSERT(nic_mask);
941 	for (i = 0; i < size; ++i)
942 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
943 			return rte_flow_error_set(error, ENOTSUP,
944 						  RTE_FLOW_ERROR_TYPE_ITEM,
945 						  item,
946 						  "mask enables non supported"
947 						  " bits");
948 	if (!item->spec && (item->mask || item->last))
949 		return rte_flow_error_set(error, EINVAL,
950 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
951 					  "mask/last without a spec is not"
952 					  " supported");
953 	if (item->spec && item->last && !range_accepted) {
954 		uint8_t spec[size];
955 		uint8_t last[size];
956 		unsigned int i;
957 		int ret;
958 
959 		for (i = 0; i < size; ++i) {
960 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
961 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
962 		}
963 		ret = memcmp(spec, last, size);
964 		if (ret != 0)
965 			return rte_flow_error_set(error, EINVAL,
966 						  RTE_FLOW_ERROR_TYPE_ITEM,
967 						  item,
968 						  "range is not valid");
969 	}
970 	return 0;
971 }
972 
973 /**
974  * Adjust the hash fields according to the @p flow information.
975  *
976  * @param[in] dev_flow.
977  *   Pointer to the mlx5_flow.
978  * @param[in] tunnel
979  *   1 when the hash field is for a tunnel item.
980  * @param[in] layer_types
981  *   ETH_RSS_* types.
982  * @param[in] hash_fields
983  *   Item hash fields.
984  *
985  * @return
986  *   The hash fields that should be used.
987  */
988 uint64_t
989 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
990 			    int tunnel __rte_unused, uint64_t layer_types,
991 			    uint64_t hash_fields)
992 {
993 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
994 	int rss_request_inner = rss_desc->level >= 2;
995 
996 	/* Check RSS hash level for tunnel. */
997 	if (tunnel && rss_request_inner)
998 		hash_fields |= IBV_RX_HASH_INNER;
999 	else if (tunnel || rss_request_inner)
1000 		return 0;
1001 #endif
1002 	/* Check if requested layer matches RSS hash fields. */
1003 	if (!(rss_desc->types & layer_types))
1004 		return 0;
1005 	return hash_fields;
1006 }
1007 
1008 /**
1009  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1010  * if several tunnel rules are used on this queue, the tunnel ptype will be
1011  * cleared.
1012  *
1013  * @param rxq_ctrl
1014  *   Rx queue to update.
1015  */
1016 static void
1017 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1018 {
1019 	unsigned int i;
1020 	uint32_t tunnel_ptype = 0;
1021 
1022 	/* Look up for the ptype to use. */
1023 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1024 		if (!rxq_ctrl->flow_tunnels_n[i])
1025 			continue;
1026 		if (!tunnel_ptype) {
1027 			tunnel_ptype = tunnels_info[i].ptype;
1028 		} else {
1029 			tunnel_ptype = 0;
1030 			break;
1031 		}
1032 	}
1033 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1034 }
1035 
1036 /**
1037  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
1038  * flow.
1039  *
1040  * @param[in] dev
1041  *   Pointer to the Ethernet device structure.
1042  * @param[in] dev_handle
1043  *   Pointer to device flow handle structure.
1044  */
1045 static void
1046 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1047 		       struct mlx5_flow_handle *dev_handle)
1048 {
1049 	struct mlx5_priv *priv = dev->data->dev_private;
1050 	const int mark = dev_handle->mark;
1051 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1052 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1053 	unsigned int i;
1054 
1055 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1056 		struct mlx5_hrxq *hrxq;
1057 
1058 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1059 			      dev_handle->rix_hrxq);
1060 		if (hrxq)
1061 			ind_tbl = hrxq->ind_table;
1062 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1063 		struct mlx5_shared_action_rss *shared_rss;
1064 
1065 		shared_rss = mlx5_ipool_get
1066 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1067 			 dev_handle->rix_srss);
1068 		if (shared_rss)
1069 			ind_tbl = shared_rss->ind_tbl;
1070 	}
1071 	if (!ind_tbl)
1072 		return;
1073 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1074 		int idx = ind_tbl->queues[i];
1075 		struct mlx5_rxq_ctrl *rxq_ctrl =
1076 			container_of((*priv->rxqs)[idx],
1077 				     struct mlx5_rxq_ctrl, rxq);
1078 
1079 		/*
1080 		 * To support metadata register copy on Tx loopback,
1081 		 * this must be always enabled (metadata may arive
1082 		 * from other port - not from local flows only.
1083 		 */
1084 		if (priv->config.dv_flow_en &&
1085 		    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1086 		    mlx5_flow_ext_mreg_supported(dev)) {
1087 			rxq_ctrl->rxq.mark = 1;
1088 			rxq_ctrl->flow_mark_n = 1;
1089 		} else if (mark) {
1090 			rxq_ctrl->rxq.mark = 1;
1091 			rxq_ctrl->flow_mark_n++;
1092 		}
1093 		if (tunnel) {
1094 			unsigned int j;
1095 
1096 			/* Increase the counter matching the flow. */
1097 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1098 				if ((tunnels_info[j].tunnel &
1099 				     dev_handle->layers) ==
1100 				    tunnels_info[j].tunnel) {
1101 					rxq_ctrl->flow_tunnels_n[j]++;
1102 					break;
1103 				}
1104 			}
1105 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1106 		}
1107 	}
1108 }
1109 
1110 /**
1111  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1112  *
1113  * @param[in] dev
1114  *   Pointer to the Ethernet device structure.
1115  * @param[in] flow
1116  *   Pointer to flow structure.
1117  */
1118 static void
1119 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1120 {
1121 	struct mlx5_priv *priv = dev->data->dev_private;
1122 	uint32_t handle_idx;
1123 	struct mlx5_flow_handle *dev_handle;
1124 
1125 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1126 		       handle_idx, dev_handle, next)
1127 		flow_drv_rxq_flags_set(dev, dev_handle);
1128 }
1129 
1130 /**
1131  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1132  * device flow if no other flow uses it with the same kind of request.
1133  *
1134  * @param dev
1135  *   Pointer to Ethernet device.
1136  * @param[in] dev_handle
1137  *   Pointer to the device flow handle structure.
1138  */
1139 static void
1140 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1141 			struct mlx5_flow_handle *dev_handle)
1142 {
1143 	struct mlx5_priv *priv = dev->data->dev_private;
1144 	const int mark = dev_handle->mark;
1145 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1146 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1147 	unsigned int i;
1148 
1149 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1150 		struct mlx5_hrxq *hrxq;
1151 
1152 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1153 			      dev_handle->rix_hrxq);
1154 		if (hrxq)
1155 			ind_tbl = hrxq->ind_table;
1156 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1157 		struct mlx5_shared_action_rss *shared_rss;
1158 
1159 		shared_rss = mlx5_ipool_get
1160 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1161 			 dev_handle->rix_srss);
1162 		if (shared_rss)
1163 			ind_tbl = shared_rss->ind_tbl;
1164 	}
1165 	if (!ind_tbl)
1166 		return;
1167 	MLX5_ASSERT(dev->data->dev_started);
1168 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1169 		int idx = ind_tbl->queues[i];
1170 		struct mlx5_rxq_ctrl *rxq_ctrl =
1171 			container_of((*priv->rxqs)[idx],
1172 				     struct mlx5_rxq_ctrl, rxq);
1173 
1174 		if (priv->config.dv_flow_en &&
1175 		    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1176 		    mlx5_flow_ext_mreg_supported(dev)) {
1177 			rxq_ctrl->rxq.mark = 1;
1178 			rxq_ctrl->flow_mark_n = 1;
1179 		} else if (mark) {
1180 			rxq_ctrl->flow_mark_n--;
1181 			rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1182 		}
1183 		if (tunnel) {
1184 			unsigned int j;
1185 
1186 			/* Decrease the counter matching the flow. */
1187 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1188 				if ((tunnels_info[j].tunnel &
1189 				     dev_handle->layers) ==
1190 				    tunnels_info[j].tunnel) {
1191 					rxq_ctrl->flow_tunnels_n[j]--;
1192 					break;
1193 				}
1194 			}
1195 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1196 		}
1197 	}
1198 }
1199 
1200 /**
1201  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1202  * @p flow if no other flow uses it with the same kind of request.
1203  *
1204  * @param dev
1205  *   Pointer to Ethernet device.
1206  * @param[in] flow
1207  *   Pointer to the flow.
1208  */
1209 static void
1210 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1211 {
1212 	struct mlx5_priv *priv = dev->data->dev_private;
1213 	uint32_t handle_idx;
1214 	struct mlx5_flow_handle *dev_handle;
1215 
1216 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1217 		       handle_idx, dev_handle, next)
1218 		flow_drv_rxq_flags_trim(dev, dev_handle);
1219 }
1220 
1221 /**
1222  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1223  *
1224  * @param dev
1225  *   Pointer to Ethernet device.
1226  */
1227 static void
1228 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1229 {
1230 	struct mlx5_priv *priv = dev->data->dev_private;
1231 	unsigned int i;
1232 
1233 	for (i = 0; i != priv->rxqs_n; ++i) {
1234 		struct mlx5_rxq_ctrl *rxq_ctrl;
1235 		unsigned int j;
1236 
1237 		if (!(*priv->rxqs)[i])
1238 			continue;
1239 		rxq_ctrl = container_of((*priv->rxqs)[i],
1240 					struct mlx5_rxq_ctrl, rxq);
1241 		rxq_ctrl->flow_mark_n = 0;
1242 		rxq_ctrl->rxq.mark = 0;
1243 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1244 			rxq_ctrl->flow_tunnels_n[j] = 0;
1245 		rxq_ctrl->rxq.tunnel = 0;
1246 	}
1247 }
1248 
1249 /**
1250  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1251  *
1252  * @param[in] dev
1253  *   Pointer to the Ethernet device structure.
1254  */
1255 void
1256 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1257 {
1258 	struct mlx5_priv *priv = dev->data->dev_private;
1259 	struct mlx5_rxq_data *data;
1260 	unsigned int i;
1261 
1262 	for (i = 0; i != priv->rxqs_n; ++i) {
1263 		if (!(*priv->rxqs)[i])
1264 			continue;
1265 		data = (*priv->rxqs)[i];
1266 		if (!rte_flow_dynf_metadata_avail()) {
1267 			data->dynf_meta = 0;
1268 			data->flow_meta_mask = 0;
1269 			data->flow_meta_offset = -1;
1270 		} else {
1271 			data->dynf_meta = 1;
1272 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1273 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1274 		}
1275 	}
1276 }
1277 
1278 /*
1279  * return a pointer to the desired action in the list of actions.
1280  *
1281  * @param[in] actions
1282  *   The list of actions to search the action in.
1283  * @param[in] action
1284  *   The action to find.
1285  *
1286  * @return
1287  *   Pointer to the action in the list, if found. NULL otherwise.
1288  */
1289 const struct rte_flow_action *
1290 mlx5_flow_find_action(const struct rte_flow_action *actions,
1291 		      enum rte_flow_action_type action)
1292 {
1293 	if (actions == NULL)
1294 		return NULL;
1295 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1296 		if (actions->type == action)
1297 			return actions;
1298 	return NULL;
1299 }
1300 
1301 /*
1302  * Validate the flag action.
1303  *
1304  * @param[in] action_flags
1305  *   Bit-fields that holds the actions detected until now.
1306  * @param[in] attr
1307  *   Attributes of flow that includes this action.
1308  * @param[out] error
1309  *   Pointer to error structure.
1310  *
1311  * @return
1312  *   0 on success, a negative errno value otherwise and rte_errno is set.
1313  */
1314 int
1315 mlx5_flow_validate_action_flag(uint64_t action_flags,
1316 			       const struct rte_flow_attr *attr,
1317 			       struct rte_flow_error *error)
1318 {
1319 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1320 		return rte_flow_error_set(error, EINVAL,
1321 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1322 					  "can't mark and flag in same flow");
1323 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1324 		return rte_flow_error_set(error, EINVAL,
1325 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1326 					  "can't have 2 flag"
1327 					  " actions in same flow");
1328 	if (attr->egress)
1329 		return rte_flow_error_set(error, ENOTSUP,
1330 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1331 					  "flag action not supported for "
1332 					  "egress");
1333 	return 0;
1334 }
1335 
1336 /*
1337  * Validate the mark action.
1338  *
1339  * @param[in] action
1340  *   Pointer to the queue action.
1341  * @param[in] action_flags
1342  *   Bit-fields that holds the actions detected until now.
1343  * @param[in] attr
1344  *   Attributes of flow that includes this action.
1345  * @param[out] error
1346  *   Pointer to error structure.
1347  *
1348  * @return
1349  *   0 on success, a negative errno value otherwise and rte_errno is set.
1350  */
1351 int
1352 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1353 			       uint64_t action_flags,
1354 			       const struct rte_flow_attr *attr,
1355 			       struct rte_flow_error *error)
1356 {
1357 	const struct rte_flow_action_mark *mark = action->conf;
1358 
1359 	if (!mark)
1360 		return rte_flow_error_set(error, EINVAL,
1361 					  RTE_FLOW_ERROR_TYPE_ACTION,
1362 					  action,
1363 					  "configuration cannot be null");
1364 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1365 		return rte_flow_error_set(error, EINVAL,
1366 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1367 					  &mark->id,
1368 					  "mark id must in 0 <= id < "
1369 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1370 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1371 		return rte_flow_error_set(error, EINVAL,
1372 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1373 					  "can't flag and mark in same flow");
1374 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1375 		return rte_flow_error_set(error, EINVAL,
1376 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1377 					  "can't have 2 mark actions in same"
1378 					  " flow");
1379 	if (attr->egress)
1380 		return rte_flow_error_set(error, ENOTSUP,
1381 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1382 					  "mark action not supported for "
1383 					  "egress");
1384 	return 0;
1385 }
1386 
1387 /*
1388  * Validate the drop action.
1389  *
1390  * @param[in] action_flags
1391  *   Bit-fields that holds the actions detected until now.
1392  * @param[in] attr
1393  *   Attributes of flow that includes this action.
1394  * @param[out] error
1395  *   Pointer to error structure.
1396  *
1397  * @return
1398  *   0 on success, a negative errno value otherwise and rte_errno is set.
1399  */
1400 int
1401 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1402 			       const struct rte_flow_attr *attr,
1403 			       struct rte_flow_error *error)
1404 {
1405 	if (attr->egress)
1406 		return rte_flow_error_set(error, ENOTSUP,
1407 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1408 					  "drop action not supported for "
1409 					  "egress");
1410 	return 0;
1411 }
1412 
1413 /*
1414  * Validate the queue action.
1415  *
1416  * @param[in] action
1417  *   Pointer to the queue action.
1418  * @param[in] action_flags
1419  *   Bit-fields that holds the actions detected until now.
1420  * @param[in] dev
1421  *   Pointer to the Ethernet device structure.
1422  * @param[in] attr
1423  *   Attributes of flow that includes this action.
1424  * @param[out] error
1425  *   Pointer to error structure.
1426  *
1427  * @return
1428  *   0 on success, a negative errno value otherwise and rte_errno is set.
1429  */
1430 int
1431 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1432 				uint64_t action_flags,
1433 				struct rte_eth_dev *dev,
1434 				const struct rte_flow_attr *attr,
1435 				struct rte_flow_error *error)
1436 {
1437 	struct mlx5_priv *priv = dev->data->dev_private;
1438 	const struct rte_flow_action_queue *queue = action->conf;
1439 
1440 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1441 		return rte_flow_error_set(error, EINVAL,
1442 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1443 					  "can't have 2 fate actions in"
1444 					  " same flow");
1445 	if (!priv->rxqs_n)
1446 		return rte_flow_error_set(error, EINVAL,
1447 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1448 					  NULL, "No Rx queues configured");
1449 	if (queue->index >= priv->rxqs_n)
1450 		return rte_flow_error_set(error, EINVAL,
1451 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1452 					  &queue->index,
1453 					  "queue index out of range");
1454 	if (!(*priv->rxqs)[queue->index])
1455 		return rte_flow_error_set(error, EINVAL,
1456 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1457 					  &queue->index,
1458 					  "queue is not configured");
1459 	if (attr->egress)
1460 		return rte_flow_error_set(error, ENOTSUP,
1461 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1462 					  "queue action not supported for "
1463 					  "egress");
1464 	return 0;
1465 }
1466 
1467 /*
1468  * Validate the rss action.
1469  *
1470  * @param[in] dev
1471  *   Pointer to the Ethernet device structure.
1472  * @param[in] action
1473  *   Pointer to the queue action.
1474  * @param[out] error
1475  *   Pointer to error structure.
1476  *
1477  * @return
1478  *   0 on success, a negative errno value otherwise and rte_errno is set.
1479  */
1480 int
1481 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1482 			 const struct rte_flow_action *action,
1483 			 struct rte_flow_error *error)
1484 {
1485 	struct mlx5_priv *priv = dev->data->dev_private;
1486 	const struct rte_flow_action_rss *rss = action->conf;
1487 	enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
1488 	unsigned int i;
1489 
1490 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1491 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1492 		return rte_flow_error_set(error, ENOTSUP,
1493 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1494 					  &rss->func,
1495 					  "RSS hash function not supported");
1496 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1497 	if (rss->level > 2)
1498 #else
1499 	if (rss->level > 1)
1500 #endif
1501 		return rte_flow_error_set(error, ENOTSUP,
1502 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1503 					  &rss->level,
1504 					  "tunnel RSS is not supported");
1505 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
1506 	if (rss->key_len == 0 && rss->key != NULL)
1507 		return rte_flow_error_set(error, ENOTSUP,
1508 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1509 					  &rss->key_len,
1510 					  "RSS hash key length 0");
1511 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1512 		return rte_flow_error_set(error, ENOTSUP,
1513 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1514 					  &rss->key_len,
1515 					  "RSS hash key too small");
1516 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1517 		return rte_flow_error_set(error, ENOTSUP,
1518 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1519 					  &rss->key_len,
1520 					  "RSS hash key too large");
1521 	if (rss->queue_num > priv->config.ind_table_max_size)
1522 		return rte_flow_error_set(error, ENOTSUP,
1523 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1524 					  &rss->queue_num,
1525 					  "number of queues too large");
1526 	if (rss->types & MLX5_RSS_HF_MASK)
1527 		return rte_flow_error_set(error, ENOTSUP,
1528 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1529 					  &rss->types,
1530 					  "some RSS protocols are not"
1531 					  " supported");
1532 	if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1533 	    !(rss->types & ETH_RSS_IP))
1534 		return rte_flow_error_set(error, EINVAL,
1535 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1536 					  "L3 partial RSS requested but L3 RSS"
1537 					  " type not specified");
1538 	if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1539 	    !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1540 		return rte_flow_error_set(error, EINVAL,
1541 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1542 					  "L4 partial RSS requested but L4 RSS"
1543 					  " type not specified");
1544 	if (!priv->rxqs_n)
1545 		return rte_flow_error_set(error, EINVAL,
1546 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1547 					  NULL, "No Rx queues configured");
1548 	if (!rss->queue_num)
1549 		return rte_flow_error_set(error, EINVAL,
1550 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1551 					  NULL, "No queues configured");
1552 	for (i = 0; i != rss->queue_num; ++i) {
1553 		struct mlx5_rxq_ctrl *rxq_ctrl;
1554 
1555 		if (rss->queue[i] >= priv->rxqs_n)
1556 			return rte_flow_error_set
1557 				(error, EINVAL,
1558 				 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1559 				 &rss->queue[i], "queue index out of range");
1560 		if (!(*priv->rxqs)[rss->queue[i]])
1561 			return rte_flow_error_set
1562 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1563 				 &rss->queue[i], "queue is not configured");
1564 		rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
1565 					struct mlx5_rxq_ctrl, rxq);
1566 		if (i == 0)
1567 			rxq_type = rxq_ctrl->type;
1568 		if (rxq_type != rxq_ctrl->type)
1569 			return rte_flow_error_set
1570 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1571 				 &rss->queue[i],
1572 				 "combining hairpin and regular RSS queues is not supported");
1573 	}
1574 	return 0;
1575 }
1576 
1577 /*
1578  * Validate the rss action.
1579  *
1580  * @param[in] action
1581  *   Pointer to the queue action.
1582  * @param[in] action_flags
1583  *   Bit-fields that holds the actions detected until now.
1584  * @param[in] dev
1585  *   Pointer to the Ethernet device structure.
1586  * @param[in] attr
1587  *   Attributes of flow that includes this action.
1588  * @param[in] item_flags
1589  *   Items that were detected.
1590  * @param[out] error
1591  *   Pointer to error structure.
1592  *
1593  * @return
1594  *   0 on success, a negative errno value otherwise and rte_errno is set.
1595  */
1596 int
1597 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1598 			      uint64_t action_flags,
1599 			      struct rte_eth_dev *dev,
1600 			      const struct rte_flow_attr *attr,
1601 			      uint64_t item_flags,
1602 			      struct rte_flow_error *error)
1603 {
1604 	const struct rte_flow_action_rss *rss = action->conf;
1605 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1606 	int ret;
1607 
1608 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1609 		return rte_flow_error_set(error, EINVAL,
1610 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1611 					  "can't have 2 fate actions"
1612 					  " in same flow");
1613 	ret = mlx5_validate_action_rss(dev, action, error);
1614 	if (ret)
1615 		return ret;
1616 	if (attr->egress)
1617 		return rte_flow_error_set(error, ENOTSUP,
1618 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1619 					  "rss action not supported for "
1620 					  "egress");
1621 	if (rss->level > 1 && !tunnel)
1622 		return rte_flow_error_set(error, EINVAL,
1623 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1624 					  "inner RSS is not supported for "
1625 					  "non-tunnel flows");
1626 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1627 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1628 		return rte_flow_error_set(error, EINVAL,
1629 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1630 					  "RSS on eCPRI is not supported now");
1631 	}
1632 	return 0;
1633 }
1634 
1635 /*
1636  * Validate the default miss action.
1637  *
1638  * @param[in] action_flags
1639  *   Bit-fields that holds the actions detected until now.
1640  * @param[out] error
1641  *   Pointer to error structure.
1642  *
1643  * @return
1644  *   0 on success, a negative errno value otherwise and rte_errno is set.
1645  */
1646 int
1647 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1648 				const struct rte_flow_attr *attr,
1649 				struct rte_flow_error *error)
1650 {
1651 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1652 		return rte_flow_error_set(error, EINVAL,
1653 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1654 					  "can't have 2 fate actions in"
1655 					  " same flow");
1656 	if (attr->egress)
1657 		return rte_flow_error_set(error, ENOTSUP,
1658 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1659 					  "default miss action not supported "
1660 					  "for egress");
1661 	if (attr->group)
1662 		return rte_flow_error_set(error, ENOTSUP,
1663 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1664 					  "only group 0 is supported");
1665 	if (attr->transfer)
1666 		return rte_flow_error_set(error, ENOTSUP,
1667 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1668 					  NULL, "transfer is not supported");
1669 	return 0;
1670 }
1671 
1672 /*
1673  * Validate the count action.
1674  *
1675  * @param[in] dev
1676  *   Pointer to the Ethernet device structure.
1677  * @param[in] attr
1678  *   Attributes of flow that includes this action.
1679  * @param[out] error
1680  *   Pointer to error structure.
1681  *
1682  * @return
1683  *   0 on success, a negative errno value otherwise and rte_errno is set.
1684  */
1685 int
1686 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1687 				const struct rte_flow_attr *attr,
1688 				struct rte_flow_error *error)
1689 {
1690 	if (attr->egress)
1691 		return rte_flow_error_set(error, ENOTSUP,
1692 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1693 					  "count action not supported for "
1694 					  "egress");
1695 	return 0;
1696 }
1697 
1698 /**
1699  * Verify the @p attributes will be correctly understood by the NIC and store
1700  * them in the @p flow if everything is correct.
1701  *
1702  * @param[in] dev
1703  *   Pointer to the Ethernet device structure.
1704  * @param[in] attributes
1705  *   Pointer to flow attributes
1706  * @param[out] error
1707  *   Pointer to error structure.
1708  *
1709  * @return
1710  *   0 on success, a negative errno value otherwise and rte_errno is set.
1711  */
1712 int
1713 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1714 			      const struct rte_flow_attr *attributes,
1715 			      struct rte_flow_error *error)
1716 {
1717 	struct mlx5_priv *priv = dev->data->dev_private;
1718 	uint32_t priority_max = priv->config.flow_prio - 1;
1719 
1720 	if (attributes->group)
1721 		return rte_flow_error_set(error, ENOTSUP,
1722 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1723 					  NULL, "groups is not supported");
1724 	if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
1725 	    attributes->priority >= priority_max)
1726 		return rte_flow_error_set(error, ENOTSUP,
1727 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1728 					  NULL, "priority out of range");
1729 	if (attributes->egress)
1730 		return rte_flow_error_set(error, ENOTSUP,
1731 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1732 					  "egress is not supported");
1733 	if (attributes->transfer && !priv->config.dv_esw_en)
1734 		return rte_flow_error_set(error, ENOTSUP,
1735 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1736 					  NULL, "transfer is not supported");
1737 	if (!attributes->ingress)
1738 		return rte_flow_error_set(error, EINVAL,
1739 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1740 					  NULL,
1741 					  "ingress attribute is mandatory");
1742 	return 0;
1743 }
1744 
1745 /**
1746  * Validate ICMP6 item.
1747  *
1748  * @param[in] item
1749  *   Item specification.
1750  * @param[in] item_flags
1751  *   Bit-fields that holds the items detected until now.
1752  * @param[in] ext_vlan_sup
1753  *   Whether extended VLAN features are supported or not.
1754  * @param[out] error
1755  *   Pointer to error structure.
1756  *
1757  * @return
1758  *   0 on success, a negative errno value otherwise and rte_errno is set.
1759  */
1760 int
1761 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1762 			       uint64_t item_flags,
1763 			       uint8_t target_protocol,
1764 			       struct rte_flow_error *error)
1765 {
1766 	const struct rte_flow_item_icmp6 *mask = item->mask;
1767 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1768 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1769 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1770 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1771 				      MLX5_FLOW_LAYER_OUTER_L4;
1772 	int ret;
1773 
1774 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1775 		return rte_flow_error_set(error, EINVAL,
1776 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1777 					  "protocol filtering not compatible"
1778 					  " with ICMP6 layer");
1779 	if (!(item_flags & l3m))
1780 		return rte_flow_error_set(error, EINVAL,
1781 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1782 					  "IPv6 is mandatory to filter on"
1783 					  " ICMP6");
1784 	if (item_flags & l4m)
1785 		return rte_flow_error_set(error, EINVAL,
1786 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1787 					  "multiple L4 layers not supported");
1788 	if (!mask)
1789 		mask = &rte_flow_item_icmp6_mask;
1790 	ret = mlx5_flow_item_acceptable
1791 		(item, (const uint8_t *)mask,
1792 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
1793 		 sizeof(struct rte_flow_item_icmp6),
1794 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1795 	if (ret < 0)
1796 		return ret;
1797 	return 0;
1798 }
1799 
1800 /**
1801  * Validate ICMP item.
1802  *
1803  * @param[in] item
1804  *   Item specification.
1805  * @param[in] item_flags
1806  *   Bit-fields that holds the items detected until now.
1807  * @param[out] error
1808  *   Pointer to error structure.
1809  *
1810  * @return
1811  *   0 on success, a negative errno value otherwise and rte_errno is set.
1812  */
1813 int
1814 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1815 			     uint64_t item_flags,
1816 			     uint8_t target_protocol,
1817 			     struct rte_flow_error *error)
1818 {
1819 	const struct rte_flow_item_icmp *mask = item->mask;
1820 	const struct rte_flow_item_icmp nic_mask = {
1821 		.hdr.icmp_type = 0xff,
1822 		.hdr.icmp_code = 0xff,
1823 		.hdr.icmp_ident = RTE_BE16(0xffff),
1824 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
1825 	};
1826 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1827 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1828 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1829 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1830 				      MLX5_FLOW_LAYER_OUTER_L4;
1831 	int ret;
1832 
1833 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1834 		return rte_flow_error_set(error, EINVAL,
1835 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1836 					  "protocol filtering not compatible"
1837 					  " with ICMP layer");
1838 	if (!(item_flags & l3m))
1839 		return rte_flow_error_set(error, EINVAL,
1840 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1841 					  "IPv4 is mandatory to filter"
1842 					  " on ICMP");
1843 	if (item_flags & l4m)
1844 		return rte_flow_error_set(error, EINVAL,
1845 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1846 					  "multiple L4 layers not supported");
1847 	if (!mask)
1848 		mask = &nic_mask;
1849 	ret = mlx5_flow_item_acceptable
1850 		(item, (const uint8_t *)mask,
1851 		 (const uint8_t *)&nic_mask,
1852 		 sizeof(struct rte_flow_item_icmp),
1853 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1854 	if (ret < 0)
1855 		return ret;
1856 	return 0;
1857 }
1858 
1859 /**
1860  * Validate Ethernet item.
1861  *
1862  * @param[in] item
1863  *   Item specification.
1864  * @param[in] item_flags
1865  *   Bit-fields that holds the items detected until now.
1866  * @param[out] error
1867  *   Pointer to error structure.
1868  *
1869  * @return
1870  *   0 on success, a negative errno value otherwise and rte_errno is set.
1871  */
1872 int
1873 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1874 			    uint64_t item_flags, bool ext_vlan_sup,
1875 			    struct rte_flow_error *error)
1876 {
1877 	const struct rte_flow_item_eth *mask = item->mask;
1878 	const struct rte_flow_item_eth nic_mask = {
1879 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1880 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1881 		.type = RTE_BE16(0xffff),
1882 		.has_vlan = ext_vlan_sup ? 1 : 0,
1883 	};
1884 	int ret;
1885 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1886 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
1887 				       MLX5_FLOW_LAYER_OUTER_L2;
1888 
1889 	if (item_flags & ethm)
1890 		return rte_flow_error_set(error, ENOTSUP,
1891 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1892 					  "multiple L2 layers not supported");
1893 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1894 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1895 		return rte_flow_error_set(error, EINVAL,
1896 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1897 					  "L2 layer should not follow "
1898 					  "L3 layers");
1899 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1900 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1901 		return rte_flow_error_set(error, EINVAL,
1902 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1903 					  "L2 layer should not follow VLAN");
1904 	if (!mask)
1905 		mask = &rte_flow_item_eth_mask;
1906 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1907 					(const uint8_t *)&nic_mask,
1908 					sizeof(struct rte_flow_item_eth),
1909 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1910 	return ret;
1911 }
1912 
1913 /**
1914  * Validate VLAN item.
1915  *
1916  * @param[in] item
1917  *   Item specification.
1918  * @param[in] item_flags
1919  *   Bit-fields that holds the items detected until now.
1920  * @param[in] dev
1921  *   Ethernet device flow is being created on.
1922  * @param[out] error
1923  *   Pointer to error structure.
1924  *
1925  * @return
1926  *   0 on success, a negative errno value otherwise and rte_errno is set.
1927  */
1928 int
1929 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1930 			     uint64_t item_flags,
1931 			     struct rte_eth_dev *dev,
1932 			     struct rte_flow_error *error)
1933 {
1934 	const struct rte_flow_item_vlan *spec = item->spec;
1935 	const struct rte_flow_item_vlan *mask = item->mask;
1936 	const struct rte_flow_item_vlan nic_mask = {
1937 		.tci = RTE_BE16(UINT16_MAX),
1938 		.inner_type = RTE_BE16(UINT16_MAX),
1939 	};
1940 	uint16_t vlan_tag = 0;
1941 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1942 	int ret;
1943 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1944 					MLX5_FLOW_LAYER_INNER_L4) :
1945 				       (MLX5_FLOW_LAYER_OUTER_L3 |
1946 					MLX5_FLOW_LAYER_OUTER_L4);
1947 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1948 					MLX5_FLOW_LAYER_OUTER_VLAN;
1949 
1950 	if (item_flags & vlanm)
1951 		return rte_flow_error_set(error, EINVAL,
1952 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1953 					  "multiple VLAN layers not supported");
1954 	else if ((item_flags & l34m) != 0)
1955 		return rte_flow_error_set(error, EINVAL,
1956 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1957 					  "VLAN cannot follow L3/L4 layer");
1958 	if (!mask)
1959 		mask = &rte_flow_item_vlan_mask;
1960 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1961 					(const uint8_t *)&nic_mask,
1962 					sizeof(struct rte_flow_item_vlan),
1963 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1964 	if (ret)
1965 		return ret;
1966 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1967 		struct mlx5_priv *priv = dev->data->dev_private;
1968 
1969 		if (priv->vmwa_context) {
1970 			/*
1971 			 * Non-NULL context means we have a virtual machine
1972 			 * and SR-IOV enabled, we have to create VLAN interface
1973 			 * to make hypervisor to setup E-Switch vport
1974 			 * context correctly. We avoid creating the multiple
1975 			 * VLAN interfaces, so we cannot support VLAN tag mask.
1976 			 */
1977 			return rte_flow_error_set(error, EINVAL,
1978 						  RTE_FLOW_ERROR_TYPE_ITEM,
1979 						  item,
1980 						  "VLAN tag mask is not"
1981 						  " supported in virtual"
1982 						  " environment");
1983 		}
1984 	}
1985 	if (spec) {
1986 		vlan_tag = spec->tci;
1987 		vlan_tag &= mask->tci;
1988 	}
1989 	/*
1990 	 * From verbs perspective an empty VLAN is equivalent
1991 	 * to a packet without VLAN layer.
1992 	 */
1993 	if (!vlan_tag)
1994 		return rte_flow_error_set(error, EINVAL,
1995 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1996 					  item->spec,
1997 					  "VLAN cannot be empty");
1998 	return 0;
1999 }
2000 
2001 /**
2002  * Validate IPV4 item.
2003  *
2004  * @param[in] item
2005  *   Item specification.
2006  * @param[in] item_flags
2007  *   Bit-fields that holds the items detected until now.
2008  * @param[in] last_item
2009  *   Previous validated item in the pattern items.
2010  * @param[in] ether_type
2011  *   Type in the ethernet layer header (including dot1q).
2012  * @param[in] acc_mask
2013  *   Acceptable mask, if NULL default internal default mask
2014  *   will be used to check whether item fields are supported.
2015  * @param[in] range_accepted
2016  *   True if range of values is accepted for specific fields, false otherwise.
2017  * @param[out] error
2018  *   Pointer to error structure.
2019  *
2020  * @return
2021  *   0 on success, a negative errno value otherwise and rte_errno is set.
2022  */
2023 int
2024 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2025 			     uint64_t item_flags,
2026 			     uint64_t last_item,
2027 			     uint16_t ether_type,
2028 			     const struct rte_flow_item_ipv4 *acc_mask,
2029 			     bool range_accepted,
2030 			     struct rte_flow_error *error)
2031 {
2032 	const struct rte_flow_item_ipv4 *mask = item->mask;
2033 	const struct rte_flow_item_ipv4 *spec = item->spec;
2034 	const struct rte_flow_item_ipv4 nic_mask = {
2035 		.hdr = {
2036 			.src_addr = RTE_BE32(0xffffffff),
2037 			.dst_addr = RTE_BE32(0xffffffff),
2038 			.type_of_service = 0xff,
2039 			.next_proto_id = 0xff,
2040 		},
2041 	};
2042 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2043 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2044 				      MLX5_FLOW_LAYER_OUTER_L3;
2045 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2046 				      MLX5_FLOW_LAYER_OUTER_L4;
2047 	int ret;
2048 	uint8_t next_proto = 0xFF;
2049 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2050 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2051 				  MLX5_FLOW_LAYER_INNER_VLAN);
2052 
2053 	if ((last_item & l2_vlan) && ether_type &&
2054 	    ether_type != RTE_ETHER_TYPE_IPV4)
2055 		return rte_flow_error_set(error, EINVAL,
2056 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2057 					  "IPv4 cannot follow L2/VLAN layer "
2058 					  "which ether type is not IPv4");
2059 	if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2060 		if (mask && spec)
2061 			next_proto = mask->hdr.next_proto_id &
2062 				     spec->hdr.next_proto_id;
2063 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2064 			return rte_flow_error_set(error, EINVAL,
2065 						  RTE_FLOW_ERROR_TYPE_ITEM,
2066 						  item,
2067 						  "multiple tunnel "
2068 						  "not supported");
2069 	}
2070 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2071 		return rte_flow_error_set(error, EINVAL,
2072 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2073 					  "wrong tunnel type - IPv6 specified "
2074 					  "but IPv4 item provided");
2075 	if (item_flags & l3m)
2076 		return rte_flow_error_set(error, ENOTSUP,
2077 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2078 					  "multiple L3 layers not supported");
2079 	else if (item_flags & l4m)
2080 		return rte_flow_error_set(error, EINVAL,
2081 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2082 					  "L3 cannot follow an L4 layer.");
2083 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2084 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2085 		return rte_flow_error_set(error, EINVAL,
2086 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2087 					  "L3 cannot follow an NVGRE layer.");
2088 	if (!mask)
2089 		mask = &rte_flow_item_ipv4_mask;
2090 	else if (mask->hdr.next_proto_id != 0 &&
2091 		 mask->hdr.next_proto_id != 0xff)
2092 		return rte_flow_error_set(error, EINVAL,
2093 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2094 					  "partial mask is not supported"
2095 					  " for protocol");
2096 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2097 					acc_mask ? (const uint8_t *)acc_mask
2098 						 : (const uint8_t *)&nic_mask,
2099 					sizeof(struct rte_flow_item_ipv4),
2100 					range_accepted, error);
2101 	if (ret < 0)
2102 		return ret;
2103 	return 0;
2104 }
2105 
2106 /**
2107  * Validate IPV6 item.
2108  *
2109  * @param[in] item
2110  *   Item specification.
2111  * @param[in] item_flags
2112  *   Bit-fields that holds the items detected until now.
2113  * @param[in] last_item
2114  *   Previous validated item in the pattern items.
2115  * @param[in] ether_type
2116  *   Type in the ethernet layer header (including dot1q).
2117  * @param[in] acc_mask
2118  *   Acceptable mask, if NULL default internal default mask
2119  *   will be used to check whether item fields are supported.
2120  * @param[out] error
2121  *   Pointer to error structure.
2122  *
2123  * @return
2124  *   0 on success, a negative errno value otherwise and rte_errno is set.
2125  */
2126 int
2127 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2128 			     uint64_t item_flags,
2129 			     uint64_t last_item,
2130 			     uint16_t ether_type,
2131 			     const struct rte_flow_item_ipv6 *acc_mask,
2132 			     struct rte_flow_error *error)
2133 {
2134 	const struct rte_flow_item_ipv6 *mask = item->mask;
2135 	const struct rte_flow_item_ipv6 *spec = item->spec;
2136 	const struct rte_flow_item_ipv6 nic_mask = {
2137 		.hdr = {
2138 			.src_addr =
2139 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2140 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2141 			.dst_addr =
2142 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2143 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2144 			.vtc_flow = RTE_BE32(0xffffffff),
2145 			.proto = 0xff,
2146 		},
2147 	};
2148 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2149 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2150 				      MLX5_FLOW_LAYER_OUTER_L3;
2151 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2152 				      MLX5_FLOW_LAYER_OUTER_L4;
2153 	int ret;
2154 	uint8_t next_proto = 0xFF;
2155 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2156 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2157 				  MLX5_FLOW_LAYER_INNER_VLAN);
2158 
2159 	if ((last_item & l2_vlan) && ether_type &&
2160 	    ether_type != RTE_ETHER_TYPE_IPV6)
2161 		return rte_flow_error_set(error, EINVAL,
2162 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2163 					  "IPv6 cannot follow L2/VLAN layer "
2164 					  "which ether type is not IPv6");
2165 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2166 		next_proto = spec->hdr.proto;
2167 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
2168 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2169 			return rte_flow_error_set(error, EINVAL,
2170 						  RTE_FLOW_ERROR_TYPE_ITEM,
2171 						  item,
2172 						  "multiple tunnel "
2173 						  "not supported");
2174 	}
2175 	if (next_proto == IPPROTO_HOPOPTS  ||
2176 	    next_proto == IPPROTO_ROUTING  ||
2177 	    next_proto == IPPROTO_FRAGMENT ||
2178 	    next_proto == IPPROTO_ESP	   ||
2179 	    next_proto == IPPROTO_AH	   ||
2180 	    next_proto == IPPROTO_DSTOPTS)
2181 		return rte_flow_error_set(error, EINVAL,
2182 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2183 					  "IPv6 proto (next header) should "
2184 					  "not be set as extension header");
2185 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2186 		return rte_flow_error_set(error, EINVAL,
2187 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2188 					  "wrong tunnel type - IPv4 specified "
2189 					  "but IPv6 item provided");
2190 	if (item_flags & l3m)
2191 		return rte_flow_error_set(error, ENOTSUP,
2192 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2193 					  "multiple L3 layers not supported");
2194 	else if (item_flags & l4m)
2195 		return rte_flow_error_set(error, EINVAL,
2196 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2197 					  "L3 cannot follow an L4 layer.");
2198 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2199 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2200 		return rte_flow_error_set(error, EINVAL,
2201 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2202 					  "L3 cannot follow an NVGRE layer.");
2203 	if (!mask)
2204 		mask = &rte_flow_item_ipv6_mask;
2205 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2206 					acc_mask ? (const uint8_t *)acc_mask
2207 						 : (const uint8_t *)&nic_mask,
2208 					sizeof(struct rte_flow_item_ipv6),
2209 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2210 	if (ret < 0)
2211 		return ret;
2212 	return 0;
2213 }
2214 
2215 /**
2216  * Validate UDP item.
2217  *
2218  * @param[in] item
2219  *   Item specification.
2220  * @param[in] item_flags
2221  *   Bit-fields that holds the items detected until now.
2222  * @param[in] target_protocol
2223  *   The next protocol in the previous item.
2224  * @param[in] flow_mask
2225  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2226  * @param[out] error
2227  *   Pointer to error structure.
2228  *
2229  * @return
2230  *   0 on success, a negative errno value otherwise and rte_errno is set.
2231  */
2232 int
2233 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2234 			    uint64_t item_flags,
2235 			    uint8_t target_protocol,
2236 			    struct rte_flow_error *error)
2237 {
2238 	const struct rte_flow_item_udp *mask = item->mask;
2239 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2240 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2241 				      MLX5_FLOW_LAYER_OUTER_L3;
2242 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2243 				      MLX5_FLOW_LAYER_OUTER_L4;
2244 	int ret;
2245 
2246 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2247 		return rte_flow_error_set(error, EINVAL,
2248 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2249 					  "protocol filtering not compatible"
2250 					  " with UDP layer");
2251 	if (!(item_flags & l3m))
2252 		return rte_flow_error_set(error, EINVAL,
2253 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2254 					  "L3 is mandatory to filter on L4");
2255 	if (item_flags & l4m)
2256 		return rte_flow_error_set(error, EINVAL,
2257 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2258 					  "multiple L4 layers not supported");
2259 	if (!mask)
2260 		mask = &rte_flow_item_udp_mask;
2261 	ret = mlx5_flow_item_acceptable
2262 		(item, (const uint8_t *)mask,
2263 		 (const uint8_t *)&rte_flow_item_udp_mask,
2264 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2265 		 error);
2266 	if (ret < 0)
2267 		return ret;
2268 	return 0;
2269 }
2270 
2271 /**
2272  * Validate TCP item.
2273  *
2274  * @param[in] item
2275  *   Item specification.
2276  * @param[in] item_flags
2277  *   Bit-fields that holds the items detected until now.
2278  * @param[in] target_protocol
2279  *   The next protocol in the previous item.
2280  * @param[out] error
2281  *   Pointer to error structure.
2282  *
2283  * @return
2284  *   0 on success, a negative errno value otherwise and rte_errno is set.
2285  */
2286 int
2287 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2288 			    uint64_t item_flags,
2289 			    uint8_t target_protocol,
2290 			    const struct rte_flow_item_tcp *flow_mask,
2291 			    struct rte_flow_error *error)
2292 {
2293 	const struct rte_flow_item_tcp *mask = item->mask;
2294 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2295 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2296 				      MLX5_FLOW_LAYER_OUTER_L3;
2297 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2298 				      MLX5_FLOW_LAYER_OUTER_L4;
2299 	int ret;
2300 
2301 	MLX5_ASSERT(flow_mask);
2302 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2303 		return rte_flow_error_set(error, EINVAL,
2304 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2305 					  "protocol filtering not compatible"
2306 					  " with TCP layer");
2307 	if (!(item_flags & l3m))
2308 		return rte_flow_error_set(error, EINVAL,
2309 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2310 					  "L3 is mandatory to filter on L4");
2311 	if (item_flags & l4m)
2312 		return rte_flow_error_set(error, EINVAL,
2313 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2314 					  "multiple L4 layers not supported");
2315 	if (!mask)
2316 		mask = &rte_flow_item_tcp_mask;
2317 	ret = mlx5_flow_item_acceptable
2318 		(item, (const uint8_t *)mask,
2319 		 (const uint8_t *)flow_mask,
2320 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2321 		 error);
2322 	if (ret < 0)
2323 		return ret;
2324 	return 0;
2325 }
2326 
2327 /**
2328  * Validate VXLAN item.
2329  *
2330  * @param[in] item
2331  *   Item specification.
2332  * @param[in] item_flags
2333  *   Bit-fields that holds the items detected until now.
2334  * @param[in] target_protocol
2335  *   The next protocol in the previous item.
2336  * @param[out] error
2337  *   Pointer to error structure.
2338  *
2339  * @return
2340  *   0 on success, a negative errno value otherwise and rte_errno is set.
2341  */
2342 int
2343 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2344 			      uint64_t item_flags,
2345 			      struct rte_flow_error *error)
2346 {
2347 	const struct rte_flow_item_vxlan *spec = item->spec;
2348 	const struct rte_flow_item_vxlan *mask = item->mask;
2349 	int ret;
2350 	union vni {
2351 		uint32_t vlan_id;
2352 		uint8_t vni[4];
2353 	} id = { .vlan_id = 0, };
2354 
2355 
2356 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2357 		return rte_flow_error_set(error, ENOTSUP,
2358 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2359 					  "multiple tunnel layers not"
2360 					  " supported");
2361 	/*
2362 	 * Verify only UDPv4 is present as defined in
2363 	 * https://tools.ietf.org/html/rfc7348
2364 	 */
2365 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2366 		return rte_flow_error_set(error, EINVAL,
2367 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2368 					  "no outer UDP layer found");
2369 	if (!mask)
2370 		mask = &rte_flow_item_vxlan_mask;
2371 	ret = mlx5_flow_item_acceptable
2372 		(item, (const uint8_t *)mask,
2373 		 (const uint8_t *)&rte_flow_item_vxlan_mask,
2374 		 sizeof(struct rte_flow_item_vxlan),
2375 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2376 	if (ret < 0)
2377 		return ret;
2378 	if (spec) {
2379 		memcpy(&id.vni[1], spec->vni, 3);
2380 		memcpy(&id.vni[1], mask->vni, 3);
2381 	}
2382 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2383 		return rte_flow_error_set(error, ENOTSUP,
2384 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2385 					  "VXLAN tunnel must be fully defined");
2386 	return 0;
2387 }
2388 
2389 /**
2390  * Validate VXLAN_GPE item.
2391  *
2392  * @param[in] item
2393  *   Item specification.
2394  * @param[in] item_flags
2395  *   Bit-fields that holds the items detected until now.
2396  * @param[in] priv
2397  *   Pointer to the private data structure.
2398  * @param[in] target_protocol
2399  *   The next protocol in the previous item.
2400  * @param[out] error
2401  *   Pointer to error structure.
2402  *
2403  * @return
2404  *   0 on success, a negative errno value otherwise and rte_errno is set.
2405  */
2406 int
2407 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2408 				  uint64_t item_flags,
2409 				  struct rte_eth_dev *dev,
2410 				  struct rte_flow_error *error)
2411 {
2412 	struct mlx5_priv *priv = dev->data->dev_private;
2413 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2414 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2415 	int ret;
2416 	union vni {
2417 		uint32_t vlan_id;
2418 		uint8_t vni[4];
2419 	} id = { .vlan_id = 0, };
2420 
2421 	if (!priv->config.l3_vxlan_en)
2422 		return rte_flow_error_set(error, ENOTSUP,
2423 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2424 					  "L3 VXLAN is not enabled by device"
2425 					  " parameter and/or not configured in"
2426 					  " firmware");
2427 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2428 		return rte_flow_error_set(error, ENOTSUP,
2429 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2430 					  "multiple tunnel layers not"
2431 					  " supported");
2432 	/*
2433 	 * Verify only UDPv4 is present as defined in
2434 	 * https://tools.ietf.org/html/rfc7348
2435 	 */
2436 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2437 		return rte_flow_error_set(error, EINVAL,
2438 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2439 					  "no outer UDP layer found");
2440 	if (!mask)
2441 		mask = &rte_flow_item_vxlan_gpe_mask;
2442 	ret = mlx5_flow_item_acceptable
2443 		(item, (const uint8_t *)mask,
2444 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2445 		 sizeof(struct rte_flow_item_vxlan_gpe),
2446 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2447 	if (ret < 0)
2448 		return ret;
2449 	if (spec) {
2450 		if (spec->protocol)
2451 			return rte_flow_error_set(error, ENOTSUP,
2452 						  RTE_FLOW_ERROR_TYPE_ITEM,
2453 						  item,
2454 						  "VxLAN-GPE protocol"
2455 						  " not supported");
2456 		memcpy(&id.vni[1], spec->vni, 3);
2457 		memcpy(&id.vni[1], mask->vni, 3);
2458 	}
2459 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2460 		return rte_flow_error_set(error, ENOTSUP,
2461 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2462 					  "VXLAN-GPE tunnel must be fully"
2463 					  " defined");
2464 	return 0;
2465 }
2466 /**
2467  * Validate GRE Key item.
2468  *
2469  * @param[in] item
2470  *   Item specification.
2471  * @param[in] item_flags
2472  *   Bit flags to mark detected items.
2473  * @param[in] gre_item
2474  *   Pointer to gre_item
2475  * @param[out] error
2476  *   Pointer to error structure.
2477  *
2478  * @return
2479  *   0 on success, a negative errno value otherwise and rte_errno is set.
2480  */
2481 int
2482 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2483 				uint64_t item_flags,
2484 				const struct rte_flow_item *gre_item,
2485 				struct rte_flow_error *error)
2486 {
2487 	const rte_be32_t *mask = item->mask;
2488 	int ret = 0;
2489 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2490 	const struct rte_flow_item_gre *gre_spec;
2491 	const struct rte_flow_item_gre *gre_mask;
2492 
2493 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2494 		return rte_flow_error_set(error, ENOTSUP,
2495 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2496 					  "Multiple GRE key not support");
2497 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2498 		return rte_flow_error_set(error, ENOTSUP,
2499 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2500 					  "No preceding GRE header");
2501 	if (item_flags & MLX5_FLOW_LAYER_INNER)
2502 		return rte_flow_error_set(error, ENOTSUP,
2503 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2504 					  "GRE key following a wrong item");
2505 	gre_mask = gre_item->mask;
2506 	if (!gre_mask)
2507 		gre_mask = &rte_flow_item_gre_mask;
2508 	gre_spec = gre_item->spec;
2509 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2510 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2511 		return rte_flow_error_set(error, EINVAL,
2512 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2513 					  "Key bit must be on");
2514 
2515 	if (!mask)
2516 		mask = &gre_key_default_mask;
2517 	ret = mlx5_flow_item_acceptable
2518 		(item, (const uint8_t *)mask,
2519 		 (const uint8_t *)&gre_key_default_mask,
2520 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2521 	return ret;
2522 }
2523 
2524 /**
2525  * Validate GRE item.
2526  *
2527  * @param[in] item
2528  *   Item specification.
2529  * @param[in] item_flags
2530  *   Bit flags to mark detected items.
2531  * @param[in] target_protocol
2532  *   The next protocol in the previous item.
2533  * @param[out] error
2534  *   Pointer to error structure.
2535  *
2536  * @return
2537  *   0 on success, a negative errno value otherwise and rte_errno is set.
2538  */
2539 int
2540 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2541 			    uint64_t item_flags,
2542 			    uint8_t target_protocol,
2543 			    struct rte_flow_error *error)
2544 {
2545 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2546 	const struct rte_flow_item_gre *mask = item->mask;
2547 	int ret;
2548 	const struct rte_flow_item_gre nic_mask = {
2549 		.c_rsvd0_ver = RTE_BE16(0xB000),
2550 		.protocol = RTE_BE16(UINT16_MAX),
2551 	};
2552 
2553 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2554 		return rte_flow_error_set(error, EINVAL,
2555 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2556 					  "protocol filtering not compatible"
2557 					  " with this GRE layer");
2558 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2559 		return rte_flow_error_set(error, ENOTSUP,
2560 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2561 					  "multiple tunnel layers not"
2562 					  " supported");
2563 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2564 		return rte_flow_error_set(error, ENOTSUP,
2565 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2566 					  "L3 Layer is missing");
2567 	if (!mask)
2568 		mask = &rte_flow_item_gre_mask;
2569 	ret = mlx5_flow_item_acceptable
2570 		(item, (const uint8_t *)mask,
2571 		 (const uint8_t *)&nic_mask,
2572 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2573 		 error);
2574 	if (ret < 0)
2575 		return ret;
2576 #ifndef HAVE_MLX5DV_DR
2577 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2578 	if (spec && (spec->protocol & mask->protocol))
2579 		return rte_flow_error_set(error, ENOTSUP,
2580 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2581 					  "without MPLS support the"
2582 					  " specification cannot be used for"
2583 					  " filtering");
2584 #endif
2585 #endif
2586 	return 0;
2587 }
2588 
2589 /**
2590  * Validate Geneve item.
2591  *
2592  * @param[in] item
2593  *   Item specification.
2594  * @param[in] itemFlags
2595  *   Bit-fields that holds the items detected until now.
2596  * @param[in] enPriv
2597  *   Pointer to the private data structure.
2598  * @param[out] error
2599  *   Pointer to error structure.
2600  *
2601  * @return
2602  *   0 on success, a negative errno value otherwise and rte_errno is set.
2603  */
2604 
2605 int
2606 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2607 			       uint64_t item_flags,
2608 			       struct rte_eth_dev *dev,
2609 			       struct rte_flow_error *error)
2610 {
2611 	struct mlx5_priv *priv = dev->data->dev_private;
2612 	const struct rte_flow_item_geneve *spec = item->spec;
2613 	const struct rte_flow_item_geneve *mask = item->mask;
2614 	int ret;
2615 	uint16_t gbhdr;
2616 	uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2617 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2618 	const struct rte_flow_item_geneve nic_mask = {
2619 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2620 		.vni = "\xff\xff\xff",
2621 		.protocol = RTE_BE16(UINT16_MAX),
2622 	};
2623 
2624 	if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2625 		return rte_flow_error_set(error, ENOTSUP,
2626 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2627 					  "L3 Geneve is not enabled by device"
2628 					  " parameter and/or not configured in"
2629 					  " firmware");
2630 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2631 		return rte_flow_error_set(error, ENOTSUP,
2632 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2633 					  "multiple tunnel layers not"
2634 					  " supported");
2635 	/*
2636 	 * Verify only UDPv4 is present as defined in
2637 	 * https://tools.ietf.org/html/rfc7348
2638 	 */
2639 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2640 		return rte_flow_error_set(error, EINVAL,
2641 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2642 					  "no outer UDP layer found");
2643 	if (!mask)
2644 		mask = &rte_flow_item_geneve_mask;
2645 	ret = mlx5_flow_item_acceptable
2646 				  (item, (const uint8_t *)mask,
2647 				   (const uint8_t *)&nic_mask,
2648 				   sizeof(struct rte_flow_item_geneve),
2649 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2650 	if (ret)
2651 		return ret;
2652 	if (spec) {
2653 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2654 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2655 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2656 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2657 			return rte_flow_error_set(error, ENOTSUP,
2658 						  RTE_FLOW_ERROR_TYPE_ITEM,
2659 						  item,
2660 						  "Geneve protocol unsupported"
2661 						  " fields are being used");
2662 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2663 			return rte_flow_error_set
2664 					(error, ENOTSUP,
2665 					 RTE_FLOW_ERROR_TYPE_ITEM,
2666 					 item,
2667 					 "Unsupported Geneve options length");
2668 	}
2669 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2670 		return rte_flow_error_set
2671 				    (error, ENOTSUP,
2672 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
2673 				     "Geneve tunnel must be fully defined");
2674 	return 0;
2675 }
2676 
2677 /**
2678  * Validate Geneve TLV option item.
2679  *
2680  * @param[in] item
2681  *   Item specification.
2682  * @param[in] last_item
2683  *   Previous validated item in the pattern items.
2684  * @param[in] geneve_item
2685  *   Previous GENEVE item specification.
2686  * @param[in] dev
2687  *   Pointer to the rte_eth_dev structure.
2688  * @param[out] error
2689  *   Pointer to error structure.
2690  *
2691  * @return
2692  *   0 on success, a negative errno value otherwise and rte_errno is set.
2693  */
2694 int
2695 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
2696 				   uint64_t last_item,
2697 				   const struct rte_flow_item *geneve_item,
2698 				   struct rte_eth_dev *dev,
2699 				   struct rte_flow_error *error)
2700 {
2701 	struct mlx5_priv *priv = dev->data->dev_private;
2702 	struct mlx5_dev_ctx_shared *sh = priv->sh;
2703 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
2704 	struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr;
2705 	uint8_t data_max_supported =
2706 			hca_attr->max_geneve_tlv_option_data_len * 4;
2707 	struct mlx5_dev_config *config = &priv->config;
2708 	const struct rte_flow_item_geneve *geneve_spec;
2709 	const struct rte_flow_item_geneve *geneve_mask;
2710 	const struct rte_flow_item_geneve_opt *spec = item->spec;
2711 	const struct rte_flow_item_geneve_opt *mask = item->mask;
2712 	unsigned int i;
2713 	unsigned int data_len;
2714 	uint8_t tlv_option_len;
2715 	uint16_t optlen_m, optlen_v;
2716 	const struct rte_flow_item_geneve_opt full_mask = {
2717 		.option_class = RTE_BE16(0xffff),
2718 		.option_type = 0xff,
2719 		.option_len = 0x1f,
2720 	};
2721 
2722 	if (!mask)
2723 		mask = &rte_flow_item_geneve_opt_mask;
2724 	if (!spec)
2725 		return rte_flow_error_set
2726 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2727 			"Geneve TLV opt class/type/length must be specified");
2728 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
2729 		return rte_flow_error_set
2730 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2731 			"Geneve TLV opt length exceeeds the limit (31)");
2732 	/* Check if class type and length masks are full. */
2733 	if (full_mask.option_class != mask->option_class ||
2734 	    full_mask.option_type != mask->option_type ||
2735 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
2736 		return rte_flow_error_set
2737 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2738 			"Geneve TLV opt class/type/length masks must be full");
2739 	/* Check if length is supported */
2740 	if ((uint32_t)spec->option_len >
2741 			config->hca_attr.max_geneve_tlv_option_data_len)
2742 		return rte_flow_error_set
2743 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2744 			"Geneve TLV opt length not supported");
2745 	if (config->hca_attr.max_geneve_tlv_options > 1)
2746 		DRV_LOG(DEBUG,
2747 			"max_geneve_tlv_options supports more than 1 option");
2748 	/* Check GENEVE item preceding. */
2749 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
2750 		return rte_flow_error_set
2751 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2752 			"Geneve opt item must be preceded with Geneve item");
2753 	geneve_spec = geneve_item->spec;
2754 	geneve_mask = geneve_item->mask ? geneve_item->mask :
2755 					  &rte_flow_item_geneve_mask;
2756 	/* Check if GENEVE TLV option size doesn't exceed option length */
2757 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
2758 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
2759 		tlv_option_len = spec->option_len & mask->option_len;
2760 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
2761 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
2762 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
2763 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
2764 		if ((optlen_v & optlen_m) <= tlv_option_len)
2765 			return rte_flow_error_set
2766 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2767 				 "GENEVE TLV option length exceeds optlen");
2768 	}
2769 	/* Check if length is 0 or data is 0. */
2770 	if (spec->data == NULL || spec->option_len == 0)
2771 		return rte_flow_error_set
2772 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2773 			"Geneve TLV opt with zero data/length not supported");
2774 	/* Check not all data & mask are 0. */
2775 	data_len = spec->option_len * 4;
2776 	if (mask->data == NULL) {
2777 		for (i = 0; i < data_len; i++)
2778 			if (spec->data[i])
2779 				break;
2780 		if (i == data_len)
2781 			return rte_flow_error_set(error, ENOTSUP,
2782 				RTE_FLOW_ERROR_TYPE_ITEM, item,
2783 				"Can't match on Geneve option data 0");
2784 	} else {
2785 		for (i = 0; i < data_len; i++)
2786 			if (spec->data[i] & mask->data[i])
2787 				break;
2788 		if (i == data_len)
2789 			return rte_flow_error_set(error, ENOTSUP,
2790 				RTE_FLOW_ERROR_TYPE_ITEM, item,
2791 				"Can't match on Geneve option data and mask 0");
2792 		/* Check data mask supported. */
2793 		for (i = data_max_supported; i < data_len ; i++)
2794 			if (mask->data[i])
2795 				return rte_flow_error_set(error, ENOTSUP,
2796 					RTE_FLOW_ERROR_TYPE_ITEM, item,
2797 					"Data mask is of unsupported size");
2798 	}
2799 	/* Check GENEVE option is supported in NIC. */
2800 	if (!config->hca_attr.geneve_tlv_opt)
2801 		return rte_flow_error_set
2802 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2803 			"Geneve TLV opt not supported");
2804 	/* Check if we already have geneve option with different type/class. */
2805 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
2806 	geneve_opt_resource = sh->geneve_tlv_option_resource;
2807 	if (geneve_opt_resource != NULL)
2808 		if (geneve_opt_resource->option_class != spec->option_class ||
2809 		    geneve_opt_resource->option_type != spec->option_type ||
2810 		    geneve_opt_resource->length != spec->option_len) {
2811 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2812 			return rte_flow_error_set(error, ENOTSUP,
2813 				RTE_FLOW_ERROR_TYPE_ITEM, item,
2814 				"Only one Geneve TLV option supported");
2815 		}
2816 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2817 	return 0;
2818 }
2819 
2820 /**
2821  * Validate MPLS item.
2822  *
2823  * @param[in] dev
2824  *   Pointer to the rte_eth_dev structure.
2825  * @param[in] item
2826  *   Item specification.
2827  * @param[in] item_flags
2828  *   Bit-fields that holds the items detected until now.
2829  * @param[in] prev_layer
2830  *   The protocol layer indicated in previous item.
2831  * @param[out] error
2832  *   Pointer to error structure.
2833  *
2834  * @return
2835  *   0 on success, a negative errno value otherwise and rte_errno is set.
2836  */
2837 int
2838 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2839 			     const struct rte_flow_item *item __rte_unused,
2840 			     uint64_t item_flags __rte_unused,
2841 			     uint64_t prev_layer __rte_unused,
2842 			     struct rte_flow_error *error)
2843 {
2844 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2845 	const struct rte_flow_item_mpls *mask = item->mask;
2846 	struct mlx5_priv *priv = dev->data->dev_private;
2847 	int ret;
2848 
2849 	if (!priv->config.mpls_en)
2850 		return rte_flow_error_set(error, ENOTSUP,
2851 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2852 					  "MPLS not supported or"
2853 					  " disabled in firmware"
2854 					  " configuration.");
2855 	/* MPLS over IP, UDP, GRE is allowed */
2856 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2857 			    MLX5_FLOW_LAYER_OUTER_L4_UDP |
2858 			    MLX5_FLOW_LAYER_GRE |
2859 			    MLX5_FLOW_LAYER_GRE_KEY)))
2860 		return rte_flow_error_set(error, EINVAL,
2861 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2862 					  "protocol filtering not compatible"
2863 					  " with MPLS layer");
2864 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2865 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2866 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
2867 		return rte_flow_error_set(error, ENOTSUP,
2868 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2869 					  "multiple tunnel layers not"
2870 					  " supported");
2871 	if (!mask)
2872 		mask = &rte_flow_item_mpls_mask;
2873 	ret = mlx5_flow_item_acceptable
2874 		(item, (const uint8_t *)mask,
2875 		 (const uint8_t *)&rte_flow_item_mpls_mask,
2876 		 sizeof(struct rte_flow_item_mpls),
2877 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2878 	if (ret < 0)
2879 		return ret;
2880 	return 0;
2881 #else
2882 	return rte_flow_error_set(error, ENOTSUP,
2883 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
2884 				  "MPLS is not supported by Verbs, please"
2885 				  " update.");
2886 #endif
2887 }
2888 
2889 /**
2890  * Validate NVGRE item.
2891  *
2892  * @param[in] item
2893  *   Item specification.
2894  * @param[in] item_flags
2895  *   Bit flags to mark detected items.
2896  * @param[in] target_protocol
2897  *   The next protocol in the previous item.
2898  * @param[out] error
2899  *   Pointer to error structure.
2900  *
2901  * @return
2902  *   0 on success, a negative errno value otherwise and rte_errno is set.
2903  */
2904 int
2905 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2906 			      uint64_t item_flags,
2907 			      uint8_t target_protocol,
2908 			      struct rte_flow_error *error)
2909 {
2910 	const struct rte_flow_item_nvgre *mask = item->mask;
2911 	int ret;
2912 
2913 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2914 		return rte_flow_error_set(error, EINVAL,
2915 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2916 					  "protocol filtering not compatible"
2917 					  " with this GRE layer");
2918 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2919 		return rte_flow_error_set(error, ENOTSUP,
2920 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2921 					  "multiple tunnel layers not"
2922 					  " supported");
2923 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2924 		return rte_flow_error_set(error, ENOTSUP,
2925 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2926 					  "L3 Layer is missing");
2927 	if (!mask)
2928 		mask = &rte_flow_item_nvgre_mask;
2929 	ret = mlx5_flow_item_acceptable
2930 		(item, (const uint8_t *)mask,
2931 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
2932 		 sizeof(struct rte_flow_item_nvgre),
2933 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2934 	if (ret < 0)
2935 		return ret;
2936 	return 0;
2937 }
2938 
2939 /**
2940  * Validate eCPRI item.
2941  *
2942  * @param[in] item
2943  *   Item specification.
2944  * @param[in] item_flags
2945  *   Bit-fields that holds the items detected until now.
2946  * @param[in] last_item
2947  *   Previous validated item in the pattern items.
2948  * @param[in] ether_type
2949  *   Type in the ethernet layer header (including dot1q).
2950  * @param[in] acc_mask
2951  *   Acceptable mask, if NULL default internal default mask
2952  *   will be used to check whether item fields are supported.
2953  * @param[out] error
2954  *   Pointer to error structure.
2955  *
2956  * @return
2957  *   0 on success, a negative errno value otherwise and rte_errno is set.
2958  */
2959 int
2960 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2961 			      uint64_t item_flags,
2962 			      uint64_t last_item,
2963 			      uint16_t ether_type,
2964 			      const struct rte_flow_item_ecpri *acc_mask,
2965 			      struct rte_flow_error *error)
2966 {
2967 	const struct rte_flow_item_ecpri *mask = item->mask;
2968 	const struct rte_flow_item_ecpri nic_mask = {
2969 		.hdr = {
2970 			.common = {
2971 				.u32 =
2972 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
2973 					.type = 0xFF,
2974 					}).u32),
2975 			},
2976 			.dummy[0] = 0xFFFFFFFF,
2977 		},
2978 	};
2979 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
2980 					MLX5_FLOW_LAYER_OUTER_VLAN);
2981 	struct rte_flow_item_ecpri mask_lo;
2982 
2983 	if (!(last_item & outer_l2_vlan) &&
2984 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
2985 		return rte_flow_error_set(error, EINVAL,
2986 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2987 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
2988 	if ((last_item & outer_l2_vlan) && ether_type &&
2989 	    ether_type != RTE_ETHER_TYPE_ECPRI)
2990 		return rte_flow_error_set(error, EINVAL,
2991 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2992 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
2993 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2994 		return rte_flow_error_set(error, EINVAL,
2995 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2996 					  "eCPRI with tunnel is not supported right now");
2997 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
2998 		return rte_flow_error_set(error, ENOTSUP,
2999 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3000 					  "multiple L3 layers not supported");
3001 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3002 		return rte_flow_error_set(error, EINVAL,
3003 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3004 					  "eCPRI cannot coexist with a TCP layer");
3005 	/* In specification, eCPRI could be over UDP layer. */
3006 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3007 		return rte_flow_error_set(error, EINVAL,
3008 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3009 					  "eCPRI over UDP layer is not yet supported right now");
3010 	/* Mask for type field in common header could be zero. */
3011 	if (!mask)
3012 		mask = &rte_flow_item_ecpri_mask;
3013 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3014 	/* Input mask is in big-endian format. */
3015 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3016 		return rte_flow_error_set(error, EINVAL,
3017 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3018 					  "partial mask is not supported for protocol");
3019 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3020 		return rte_flow_error_set(error, EINVAL,
3021 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3022 					  "message header mask must be after a type mask");
3023 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3024 					 acc_mask ? (const uint8_t *)acc_mask
3025 						  : (const uint8_t *)&nic_mask,
3026 					 sizeof(struct rte_flow_item_ecpri),
3027 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3028 }
3029 
3030 /**
3031  * Release resource related QUEUE/RSS action split.
3032  *
3033  * @param dev
3034  *   Pointer to Ethernet device.
3035  * @param flow
3036  *   Flow to release id's from.
3037  */
3038 static void
3039 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
3040 			     struct rte_flow *flow)
3041 {
3042 	struct mlx5_priv *priv = dev->data->dev_private;
3043 	uint32_t handle_idx;
3044 	struct mlx5_flow_handle *dev_handle;
3045 
3046 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
3047 		       handle_idx, dev_handle, next)
3048 		if (dev_handle->split_flow_id)
3049 			mlx5_ipool_free(priv->sh->ipool
3050 					[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
3051 					dev_handle->split_flow_id);
3052 }
3053 
3054 static int
3055 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3056 		   const struct rte_flow_attr *attr __rte_unused,
3057 		   const struct rte_flow_item items[] __rte_unused,
3058 		   const struct rte_flow_action actions[] __rte_unused,
3059 		   bool external __rte_unused,
3060 		   int hairpin __rte_unused,
3061 		   struct rte_flow_error *error)
3062 {
3063 	return rte_flow_error_set(error, ENOTSUP,
3064 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3065 }
3066 
3067 static struct mlx5_flow *
3068 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3069 		  const struct rte_flow_attr *attr __rte_unused,
3070 		  const struct rte_flow_item items[] __rte_unused,
3071 		  const struct rte_flow_action actions[] __rte_unused,
3072 		  struct rte_flow_error *error)
3073 {
3074 	rte_flow_error_set(error, ENOTSUP,
3075 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3076 	return NULL;
3077 }
3078 
3079 static int
3080 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3081 		    struct mlx5_flow *dev_flow __rte_unused,
3082 		    const struct rte_flow_attr *attr __rte_unused,
3083 		    const struct rte_flow_item items[] __rte_unused,
3084 		    const struct rte_flow_action actions[] __rte_unused,
3085 		    struct rte_flow_error *error)
3086 {
3087 	return rte_flow_error_set(error, ENOTSUP,
3088 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3089 }
3090 
3091 static int
3092 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3093 		struct rte_flow *flow __rte_unused,
3094 		struct rte_flow_error *error)
3095 {
3096 	return rte_flow_error_set(error, ENOTSUP,
3097 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3098 }
3099 
3100 static void
3101 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3102 		 struct rte_flow *flow __rte_unused)
3103 {
3104 }
3105 
3106 static void
3107 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3108 		  struct rte_flow *flow __rte_unused)
3109 {
3110 }
3111 
3112 static int
3113 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3114 		struct rte_flow *flow __rte_unused,
3115 		const struct rte_flow_action *actions __rte_unused,
3116 		void *data __rte_unused,
3117 		struct rte_flow_error *error)
3118 {
3119 	return rte_flow_error_set(error, ENOTSUP,
3120 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3121 }
3122 
3123 static int
3124 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3125 		      uint32_t domains __rte_unused,
3126 		      uint32_t flags __rte_unused)
3127 {
3128 	return 0;
3129 }
3130 
3131 /* Void driver to protect from null pointer reference. */
3132 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3133 	.validate = flow_null_validate,
3134 	.prepare = flow_null_prepare,
3135 	.translate = flow_null_translate,
3136 	.apply = flow_null_apply,
3137 	.remove = flow_null_remove,
3138 	.destroy = flow_null_destroy,
3139 	.query = flow_null_query,
3140 	.sync_domain = flow_null_sync_domain,
3141 };
3142 
3143 /**
3144  * Select flow driver type according to flow attributes and device
3145  * configuration.
3146  *
3147  * @param[in] dev
3148  *   Pointer to the dev structure.
3149  * @param[in] attr
3150  *   Pointer to the flow attributes.
3151  *
3152  * @return
3153  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3154  */
3155 static enum mlx5_flow_drv_type
3156 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3157 {
3158 	struct mlx5_priv *priv = dev->data->dev_private;
3159 	/* The OS can determine first a specific flow type (DV, VERBS) */
3160 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3161 
3162 	if (type != MLX5_FLOW_TYPE_MAX)
3163 		return type;
3164 	/* If no OS specific type - continue with DV/VERBS selection */
3165 	if (attr->transfer && priv->config.dv_esw_en)
3166 		type = MLX5_FLOW_TYPE_DV;
3167 	if (!attr->transfer)
3168 		type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3169 						 MLX5_FLOW_TYPE_VERBS;
3170 	return type;
3171 }
3172 
3173 #define flow_get_drv_ops(type) flow_drv_ops[type]
3174 
3175 /**
3176  * Flow driver validation API. This abstracts calling driver specific functions.
3177  * The type of flow driver is determined according to flow attributes.
3178  *
3179  * @param[in] dev
3180  *   Pointer to the dev structure.
3181  * @param[in] attr
3182  *   Pointer to the flow attributes.
3183  * @param[in] items
3184  *   Pointer to the list of items.
3185  * @param[in] actions
3186  *   Pointer to the list of actions.
3187  * @param[in] external
3188  *   This flow rule is created by request external to PMD.
3189  * @param[in] hairpin
3190  *   Number of hairpin TX actions, 0 means classic flow.
3191  * @param[out] error
3192  *   Pointer to the error structure.
3193  *
3194  * @return
3195  *   0 on success, a negative errno value otherwise and rte_errno is set.
3196  */
3197 static inline int
3198 flow_drv_validate(struct rte_eth_dev *dev,
3199 		  const struct rte_flow_attr *attr,
3200 		  const struct rte_flow_item items[],
3201 		  const struct rte_flow_action actions[],
3202 		  bool external, int hairpin, struct rte_flow_error *error)
3203 {
3204 	const struct mlx5_flow_driver_ops *fops;
3205 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3206 
3207 	fops = flow_get_drv_ops(type);
3208 	return fops->validate(dev, attr, items, actions, external,
3209 			      hairpin, error);
3210 }
3211 
3212 /**
3213  * Flow driver preparation API. This abstracts calling driver specific
3214  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3215  * calculates the size of memory required for device flow, allocates the memory,
3216  * initializes the device flow and returns the pointer.
3217  *
3218  * @note
3219  *   This function initializes device flow structure such as dv or verbs in
3220  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3221  *   rest. For example, adding returning device flow to flow->dev_flow list and
3222  *   setting backward reference to the flow should be done out of this function.
3223  *   layers field is not filled either.
3224  *
3225  * @param[in] dev
3226  *   Pointer to the dev structure.
3227  * @param[in] attr
3228  *   Pointer to the flow attributes.
3229  * @param[in] items
3230  *   Pointer to the list of items.
3231  * @param[in] actions
3232  *   Pointer to the list of actions.
3233  * @param[in] flow_idx
3234  *   This memory pool index to the flow.
3235  * @param[out] error
3236  *   Pointer to the error structure.
3237  *
3238  * @return
3239  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3240  */
3241 static inline struct mlx5_flow *
3242 flow_drv_prepare(struct rte_eth_dev *dev,
3243 		 const struct rte_flow *flow,
3244 		 const struct rte_flow_attr *attr,
3245 		 const struct rte_flow_item items[],
3246 		 const struct rte_flow_action actions[],
3247 		 uint32_t flow_idx,
3248 		 struct rte_flow_error *error)
3249 {
3250 	const struct mlx5_flow_driver_ops *fops;
3251 	enum mlx5_flow_drv_type type = flow->drv_type;
3252 	struct mlx5_flow *mlx5_flow = NULL;
3253 
3254 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3255 	fops = flow_get_drv_ops(type);
3256 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3257 	if (mlx5_flow)
3258 		mlx5_flow->flow_idx = flow_idx;
3259 	return mlx5_flow;
3260 }
3261 
3262 /**
3263  * Flow driver translation API. This abstracts calling driver specific
3264  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3265  * translates a generic flow into a driver flow. flow_drv_prepare() must
3266  * precede.
3267  *
3268  * @note
3269  *   dev_flow->layers could be filled as a result of parsing during translation
3270  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3271  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3272  *   flow->actions could be overwritten even though all the expanded dev_flows
3273  *   have the same actions.
3274  *
3275  * @param[in] dev
3276  *   Pointer to the rte dev structure.
3277  * @param[in, out] dev_flow
3278  *   Pointer to the mlx5 flow.
3279  * @param[in] attr
3280  *   Pointer to the flow attributes.
3281  * @param[in] items
3282  *   Pointer to the list of items.
3283  * @param[in] actions
3284  *   Pointer to the list of actions.
3285  * @param[out] error
3286  *   Pointer to the error structure.
3287  *
3288  * @return
3289  *   0 on success, a negative errno value otherwise and rte_errno is set.
3290  */
3291 static inline int
3292 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3293 		   const struct rte_flow_attr *attr,
3294 		   const struct rte_flow_item items[],
3295 		   const struct rte_flow_action actions[],
3296 		   struct rte_flow_error *error)
3297 {
3298 	const struct mlx5_flow_driver_ops *fops;
3299 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3300 
3301 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3302 	fops = flow_get_drv_ops(type);
3303 	return fops->translate(dev, dev_flow, attr, items, actions, error);
3304 }
3305 
3306 /**
3307  * Flow driver apply API. This abstracts calling driver specific functions.
3308  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3309  * translated driver flows on to device. flow_drv_translate() must precede.
3310  *
3311  * @param[in] dev
3312  *   Pointer to Ethernet device structure.
3313  * @param[in, out] flow
3314  *   Pointer to flow structure.
3315  * @param[out] error
3316  *   Pointer to error structure.
3317  *
3318  * @return
3319  *   0 on success, a negative errno value otherwise and rte_errno is set.
3320  */
3321 static inline int
3322 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3323 	       struct rte_flow_error *error)
3324 {
3325 	const struct mlx5_flow_driver_ops *fops;
3326 	enum mlx5_flow_drv_type type = flow->drv_type;
3327 
3328 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3329 	fops = flow_get_drv_ops(type);
3330 	return fops->apply(dev, flow, error);
3331 }
3332 
3333 /**
3334  * Flow driver destroy API. This abstracts calling driver specific functions.
3335  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3336  * on device and releases resources of the flow.
3337  *
3338  * @param[in] dev
3339  *   Pointer to Ethernet device.
3340  * @param[in, out] flow
3341  *   Pointer to flow structure.
3342  */
3343 static inline void
3344 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3345 {
3346 	const struct mlx5_flow_driver_ops *fops;
3347 	enum mlx5_flow_drv_type type = flow->drv_type;
3348 
3349 	flow_mreg_split_qrss_release(dev, flow);
3350 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3351 	fops = flow_get_drv_ops(type);
3352 	fops->destroy(dev, flow);
3353 }
3354 
3355 /**
3356  * Get RSS action from the action list.
3357  *
3358  * @param[in] actions
3359  *   Pointer to the list of actions.
3360  *
3361  * @return
3362  *   Pointer to the RSS action if exist, else return NULL.
3363  */
3364 static const struct rte_flow_action_rss*
3365 flow_get_rss_action(const struct rte_flow_action actions[])
3366 {
3367 	const struct rte_flow_action_rss *rss = NULL;
3368 
3369 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3370 		switch (actions->type) {
3371 		case RTE_FLOW_ACTION_TYPE_RSS:
3372 			rss = actions->conf;
3373 			break;
3374 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
3375 		{
3376 			const struct rte_flow_action_sample *sample =
3377 								actions->conf;
3378 			const struct rte_flow_action *act = sample->actions;
3379 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3380 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3381 					rss = act->conf;
3382 			break;
3383 		}
3384 		default:
3385 			break;
3386 		}
3387 	}
3388 	return rss;
3389 }
3390 
3391 /**
3392  * Get ASO age action by index.
3393  *
3394  * @param[in] dev
3395  *   Pointer to the Ethernet device structure.
3396  * @param[in] age_idx
3397  *   Index to the ASO age action.
3398  *
3399  * @return
3400  *   The specified ASO age action.
3401  */
3402 struct mlx5_aso_age_action*
3403 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3404 {
3405 	uint16_t pool_idx = age_idx & UINT16_MAX;
3406 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3407 	struct mlx5_priv *priv = dev->data->dev_private;
3408 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3409 	struct mlx5_aso_age_pool *pool = mng->pools[pool_idx];
3410 
3411 	return &pool->actions[offset - 1];
3412 }
3413 
3414 /* maps shared action to translated non shared in some actions array */
3415 struct mlx5_translated_shared_action {
3416 	struct rte_flow_shared_action *action; /**< Shared action */
3417 	int index; /**< Index in related array of rte_flow_action */
3418 };
3419 
3420 /**
3421  * Translates actions of type RTE_FLOW_ACTION_TYPE_SHARED to related
3422  * non shared action if translation possible.
3423  * This functionality used to run same execution path for both shared & non
3424  * shared actions on flow create. All necessary preparations for shared
3425  * action handling should be preformed on *shared* actions list returned
3426  * from this call.
3427  *
3428  * @param[in] dev
3429  *   Pointer to Ethernet device.
3430  * @param[in] actions
3431  *   List of actions to translate.
3432  * @param[out] shared
3433  *   List to store translated shared actions.
3434  * @param[in, out] shared_n
3435  *   Size of *shared* array. On return should be updated with number of shared
3436  *   actions retrieved from the *actions* list.
3437  * @param[out] translated_actions
3438  *   List of actions where all shared actions were translated to non shared
3439  *   if possible. NULL if no translation took place.
3440  * @param[out] error
3441  *   Pointer to the error structure.
3442  *
3443  * @return
3444  *   0 on success, a negative errno value otherwise and rte_errno is set.
3445  */
3446 static int
3447 flow_shared_actions_translate(struct rte_eth_dev *dev,
3448 			      const struct rte_flow_action actions[],
3449 			      struct mlx5_translated_shared_action *shared,
3450 			      int *shared_n,
3451 			      struct rte_flow_action **translated_actions,
3452 			      struct rte_flow_error *error)
3453 {
3454 	struct mlx5_priv *priv = dev->data->dev_private;
3455 	struct rte_flow_action *translated = NULL;
3456 	size_t actions_size;
3457 	int n;
3458 	int copied_n = 0;
3459 	struct mlx5_translated_shared_action *shared_end = NULL;
3460 
3461 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3462 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_SHARED)
3463 			continue;
3464 		if (copied_n == *shared_n) {
3465 			return rte_flow_error_set
3466 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3467 				 NULL, "too many shared actions");
3468 		}
3469 		rte_memcpy(&shared[copied_n].action, &actions[n].conf,
3470 			   sizeof(actions[n].conf));
3471 		shared[copied_n].index = n;
3472 		copied_n++;
3473 	}
3474 	n++;
3475 	*shared_n = copied_n;
3476 	if (!copied_n)
3477 		return 0;
3478 	actions_size = sizeof(struct rte_flow_action) * n;
3479 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3480 	if (!translated) {
3481 		rte_errno = ENOMEM;
3482 		return -ENOMEM;
3483 	}
3484 	memcpy(translated, actions, actions_size);
3485 	for (shared_end = shared + copied_n; shared < shared_end; shared++) {
3486 		struct mlx5_shared_action_rss *shared_rss;
3487 		uint32_t act_idx = (uint32_t)(uintptr_t)shared->action;
3488 		uint32_t type = act_idx >> MLX5_SHARED_ACTION_TYPE_OFFSET;
3489 		uint32_t idx = act_idx & ((1u << MLX5_SHARED_ACTION_TYPE_OFFSET)
3490 									   - 1);
3491 
3492 		switch (type) {
3493 		case MLX5_SHARED_ACTION_TYPE_RSS:
3494 			shared_rss = mlx5_ipool_get
3495 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3496 			translated[shared->index].type =
3497 				RTE_FLOW_ACTION_TYPE_RSS;
3498 			translated[shared->index].conf =
3499 				&shared_rss->origin;
3500 			break;
3501 		case MLX5_SHARED_ACTION_TYPE_AGE:
3502 			if (priv->sh->flow_hit_aso_en) {
3503 				translated[shared->index].type =
3504 					(enum rte_flow_action_type)
3505 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3506 				translated[shared->index].conf =
3507 							 (void *)(uintptr_t)idx;
3508 				break;
3509 			}
3510 			/* Fall-through */
3511 		default:
3512 			mlx5_free(translated);
3513 			return rte_flow_error_set
3514 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3515 				 NULL, "invalid shared action type");
3516 		}
3517 	}
3518 	*translated_actions = translated;
3519 	return 0;
3520 }
3521 
3522 /**
3523  * Get Shared RSS action from the action list.
3524  *
3525  * @param[in] dev
3526  *   Pointer to Ethernet device.
3527  * @param[in] shared
3528  *   Pointer to the list of actions.
3529  * @param[in] shared_n
3530  *   Actions list length.
3531  *
3532  * @return
3533  *   The MLX5 RSS action ID if exists, otherwise return 0.
3534  */
3535 static uint32_t
3536 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3537 			   struct mlx5_translated_shared_action *shared,
3538 			   int shared_n)
3539 {
3540 	struct mlx5_translated_shared_action *shared_end;
3541 	struct mlx5_priv *priv = dev->data->dev_private;
3542 	struct mlx5_shared_action_rss *shared_rss;
3543 
3544 
3545 	for (shared_end = shared + shared_n; shared < shared_end; shared++) {
3546 		uint32_t act_idx = (uint32_t)(uintptr_t)shared->action;
3547 		uint32_t type = act_idx >> MLX5_SHARED_ACTION_TYPE_OFFSET;
3548 		uint32_t idx = act_idx &
3549 				   ((1u << MLX5_SHARED_ACTION_TYPE_OFFSET) - 1);
3550 		switch (type) {
3551 		case MLX5_SHARED_ACTION_TYPE_RSS:
3552 			shared_rss = mlx5_ipool_get
3553 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3554 									   idx);
3555 			__atomic_add_fetch(&shared_rss->refcnt, 1,
3556 					   __ATOMIC_RELAXED);
3557 			return idx;
3558 		default:
3559 			break;
3560 		}
3561 	}
3562 	return 0;
3563 }
3564 
3565 static unsigned int
3566 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
3567 {
3568 	const struct rte_flow_item *item;
3569 	unsigned int has_vlan = 0;
3570 
3571 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
3572 		if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
3573 			has_vlan = 1;
3574 			break;
3575 		}
3576 	}
3577 	if (has_vlan)
3578 		return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
3579 				       MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
3580 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3581 			       MLX5_EXPANSION_ROOT_OUTER;
3582 }
3583 
3584 /**
3585  *  Get layer flags from the prefix flow.
3586  *
3587  *  Some flows may be split to several subflows, the prefix subflow gets the
3588  *  match items and the suffix sub flow gets the actions.
3589  *  Some actions need the user defined match item flags to get the detail for
3590  *  the action.
3591  *  This function helps the suffix flow to get the item layer flags from prefix
3592  *  subflow.
3593  *
3594  * @param[in] dev_flow
3595  *   Pointer the created preifx subflow.
3596  *
3597  * @return
3598  *   The layers get from prefix subflow.
3599  */
3600 static inline uint64_t
3601 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3602 {
3603 	uint64_t layers = 0;
3604 
3605 	/*
3606 	 * Layers bits could be localization, but usually the compiler will
3607 	 * help to do the optimization work for source code.
3608 	 * If no decap actions, use the layers directly.
3609 	 */
3610 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3611 		return dev_flow->handle->layers;
3612 	/* Convert L3 layers with decap action. */
3613 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3614 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3615 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3616 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3617 	/* Convert L4 layers with decap action.  */
3618 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3619 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3620 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3621 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3622 	return layers;
3623 }
3624 
3625 /**
3626  * Get metadata split action information.
3627  *
3628  * @param[in] actions
3629  *   Pointer to the list of actions.
3630  * @param[out] qrss
3631  *   Pointer to the return pointer.
3632  * @param[out] qrss_type
3633  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3634  *   if no QUEUE/RSS is found.
3635  * @param[out] encap_idx
3636  *   Pointer to the index of the encap action if exists, otherwise the last
3637  *   action index.
3638  *
3639  * @return
3640  *   Total number of actions.
3641  */
3642 static int
3643 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3644 				       const struct rte_flow_action **qrss,
3645 				       int *encap_idx)
3646 {
3647 	const struct rte_flow_action_raw_encap *raw_encap;
3648 	int actions_n = 0;
3649 	int raw_decap_idx = -1;
3650 
3651 	*encap_idx = -1;
3652 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3653 		switch (actions->type) {
3654 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3655 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3656 			*encap_idx = actions_n;
3657 			break;
3658 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3659 			raw_decap_idx = actions_n;
3660 			break;
3661 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3662 			raw_encap = actions->conf;
3663 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3664 				*encap_idx = raw_decap_idx != -1 ?
3665 						      raw_decap_idx : actions_n;
3666 			break;
3667 		case RTE_FLOW_ACTION_TYPE_QUEUE:
3668 		case RTE_FLOW_ACTION_TYPE_RSS:
3669 			*qrss = actions;
3670 			break;
3671 		default:
3672 			break;
3673 		}
3674 		actions_n++;
3675 	}
3676 	if (*encap_idx == -1)
3677 		*encap_idx = actions_n;
3678 	/* Count RTE_FLOW_ACTION_TYPE_END. */
3679 	return actions_n + 1;
3680 }
3681 
3682 /**
3683  * Check meter action from the action list.
3684  *
3685  * @param[in] actions
3686  *   Pointer to the list of actions.
3687  * @param[out] mtr
3688  *   Pointer to the meter exist flag.
3689  *
3690  * @return
3691  *   Total number of actions.
3692  */
3693 static int
3694 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
3695 {
3696 	int actions_n = 0;
3697 
3698 	MLX5_ASSERT(mtr);
3699 	*mtr = 0;
3700 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3701 		switch (actions->type) {
3702 		case RTE_FLOW_ACTION_TYPE_METER:
3703 			*mtr = 1;
3704 			break;
3705 		default:
3706 			break;
3707 		}
3708 		actions_n++;
3709 	}
3710 	/* Count RTE_FLOW_ACTION_TYPE_END. */
3711 	return actions_n + 1;
3712 }
3713 
3714 /**
3715  * Check if the flow should be split due to hairpin.
3716  * The reason for the split is that in current HW we can't
3717  * support encap and push-vlan on Rx, so if a flow contains
3718  * these actions we move it to Tx.
3719  *
3720  * @param dev
3721  *   Pointer to Ethernet device.
3722  * @param[in] attr
3723  *   Flow rule attributes.
3724  * @param[in] actions
3725  *   Associated actions (list terminated by the END action).
3726  *
3727  * @return
3728  *   > 0 the number of actions and the flow should be split,
3729  *   0 when no split required.
3730  */
3731 static int
3732 flow_check_hairpin_split(struct rte_eth_dev *dev,
3733 			 const struct rte_flow_attr *attr,
3734 			 const struct rte_flow_action actions[])
3735 {
3736 	int queue_action = 0;
3737 	int action_n = 0;
3738 	int split = 0;
3739 	const struct rte_flow_action_queue *queue;
3740 	const struct rte_flow_action_rss *rss;
3741 	const struct rte_flow_action_raw_encap *raw_encap;
3742 	const struct rte_eth_hairpin_conf *conf;
3743 
3744 	if (!attr->ingress)
3745 		return 0;
3746 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3747 		switch (actions->type) {
3748 		case RTE_FLOW_ACTION_TYPE_QUEUE:
3749 			queue = actions->conf;
3750 			if (queue == NULL)
3751 				return 0;
3752 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
3753 			if (conf == NULL || conf->tx_explicit != 0)
3754 				return 0;
3755 			queue_action = 1;
3756 			action_n++;
3757 			break;
3758 		case RTE_FLOW_ACTION_TYPE_RSS:
3759 			rss = actions->conf;
3760 			if (rss == NULL || rss->queue_num == 0)
3761 				return 0;
3762 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
3763 			if (conf == NULL || conf->tx_explicit != 0)
3764 				return 0;
3765 			queue_action = 1;
3766 			action_n++;
3767 			break;
3768 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3769 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3770 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3771 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3772 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3773 			split++;
3774 			action_n++;
3775 			break;
3776 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3777 			raw_encap = actions->conf;
3778 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3779 				split++;
3780 			action_n++;
3781 			break;
3782 		default:
3783 			action_n++;
3784 			break;
3785 		}
3786 	}
3787 	if (split && queue_action)
3788 		return action_n;
3789 	return 0;
3790 }
3791 
3792 /* Declare flow create/destroy prototype in advance. */
3793 static uint32_t
3794 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
3795 		 const struct rte_flow_attr *attr,
3796 		 const struct rte_flow_item items[],
3797 		 const struct rte_flow_action actions[],
3798 		 bool external, struct rte_flow_error *error);
3799 
3800 static void
3801 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
3802 		  uint32_t flow_idx);
3803 
3804 int
3805 flow_dv_mreg_match_cb(struct mlx5_hlist *list __rte_unused,
3806 		      struct mlx5_hlist_entry *entry,
3807 		      uint64_t key, void *cb_ctx __rte_unused)
3808 {
3809 	struct mlx5_flow_mreg_copy_resource *mcp_res =
3810 		container_of(entry, typeof(*mcp_res), hlist_ent);
3811 
3812 	return mcp_res->mark_id != key;
3813 }
3814 
3815 struct mlx5_hlist_entry *
3816 flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key,
3817 		       void *cb_ctx)
3818 {
3819 	struct rte_eth_dev *dev = list->ctx;
3820 	struct mlx5_priv *priv = dev->data->dev_private;
3821 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
3822 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3823 	struct rte_flow_error *error = ctx->error;
3824 	uint32_t idx = 0;
3825 	int ret;
3826 	uint32_t mark_id = key;
3827 	struct rte_flow_attr attr = {
3828 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3829 		.ingress = 1,
3830 	};
3831 	struct mlx5_rte_flow_item_tag tag_spec = {
3832 		.data = mark_id,
3833 	};
3834 	struct rte_flow_item items[] = {
3835 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3836 	};
3837 	struct rte_flow_action_mark ftag = {
3838 		.id = mark_id,
3839 	};
3840 	struct mlx5_flow_action_copy_mreg cp_mreg = {
3841 		.dst = REG_B,
3842 		.src = REG_NON,
3843 	};
3844 	struct rte_flow_action_jump jump = {
3845 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3846 	};
3847 	struct rte_flow_action actions[] = {
3848 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3849 	};
3850 
3851 	/* Fill the register fileds in the flow. */
3852 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3853 	if (ret < 0)
3854 		return NULL;
3855 	tag_spec.id = ret;
3856 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3857 	if (ret < 0)
3858 		return NULL;
3859 	cp_mreg.src = ret;
3860 	/* Provide the full width of FLAG specific value. */
3861 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3862 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3863 	/* Build a new flow. */
3864 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
3865 		items[0] = (struct rte_flow_item){
3866 			.type = (enum rte_flow_item_type)
3867 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3868 			.spec = &tag_spec,
3869 		};
3870 		items[1] = (struct rte_flow_item){
3871 			.type = RTE_FLOW_ITEM_TYPE_END,
3872 		};
3873 		actions[0] = (struct rte_flow_action){
3874 			.type = (enum rte_flow_action_type)
3875 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3876 			.conf = &ftag,
3877 		};
3878 		actions[1] = (struct rte_flow_action){
3879 			.type = (enum rte_flow_action_type)
3880 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3881 			.conf = &cp_mreg,
3882 		};
3883 		actions[2] = (struct rte_flow_action){
3884 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
3885 			.conf = &jump,
3886 		};
3887 		actions[3] = (struct rte_flow_action){
3888 			.type = RTE_FLOW_ACTION_TYPE_END,
3889 		};
3890 	} else {
3891 		/* Default rule, wildcard match. */
3892 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
3893 		items[0] = (struct rte_flow_item){
3894 			.type = RTE_FLOW_ITEM_TYPE_END,
3895 		};
3896 		actions[0] = (struct rte_flow_action){
3897 			.type = (enum rte_flow_action_type)
3898 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3899 			.conf = &cp_mreg,
3900 		};
3901 		actions[1] = (struct rte_flow_action){
3902 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
3903 			.conf = &jump,
3904 		};
3905 		actions[2] = (struct rte_flow_action){
3906 			.type = RTE_FLOW_ACTION_TYPE_END,
3907 		};
3908 	}
3909 	/* Build a new entry. */
3910 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3911 	if (!mcp_res) {
3912 		rte_errno = ENOMEM;
3913 		return NULL;
3914 	}
3915 	mcp_res->idx = idx;
3916 	mcp_res->mark_id = mark_id;
3917 	/*
3918 	 * The copy Flows are not included in any list. There
3919 	 * ones are referenced from other Flows and can not
3920 	 * be applied, removed, deleted in ardbitrary order
3921 	 * by list traversing.
3922 	 */
3923 	mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3924 					 actions, false, error);
3925 	if (!mcp_res->rix_flow) {
3926 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
3927 		return NULL;
3928 	}
3929 	return &mcp_res->hlist_ent;
3930 }
3931 
3932 /**
3933  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3934  *
3935  * As mark_id is unique, if there's already a registered flow for the mark_id,
3936  * return by increasing the reference counter of the resource. Otherwise, create
3937  * the resource (mcp_res) and flow.
3938  *
3939  * Flow looks like,
3940  *   - If ingress port is ANY and reg_c[1] is mark_id,
3941  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3942  *
3943  * For default flow (zero mark_id), flow is like,
3944  *   - If ingress port is ANY,
3945  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
3946  *
3947  * @param dev
3948  *   Pointer to Ethernet device.
3949  * @param mark_id
3950  *   ID of MARK action, zero means default flow for META.
3951  * @param[out] error
3952  *   Perform verbose error reporting if not NULL.
3953  *
3954  * @return
3955  *   Associated resource on success, NULL otherwise and rte_errno is set.
3956  */
3957 static struct mlx5_flow_mreg_copy_resource *
3958 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3959 			  struct rte_flow_error *error)
3960 {
3961 	struct mlx5_priv *priv = dev->data->dev_private;
3962 	struct mlx5_hlist_entry *entry;
3963 	struct mlx5_flow_cb_ctx ctx = {
3964 		.dev = dev,
3965 		.error = error,
3966 	};
3967 
3968 	/* Check if already registered. */
3969 	MLX5_ASSERT(priv->mreg_cp_tbl);
3970 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
3971 	if (!entry)
3972 		return NULL;
3973 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
3974 			    hlist_ent);
3975 }
3976 
3977 void
3978 flow_dv_mreg_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry)
3979 {
3980 	struct mlx5_flow_mreg_copy_resource *mcp_res =
3981 		container_of(entry, typeof(*mcp_res), hlist_ent);
3982 	struct rte_eth_dev *dev = list->ctx;
3983 	struct mlx5_priv *priv = dev->data->dev_private;
3984 
3985 	MLX5_ASSERT(mcp_res->rix_flow);
3986 	flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3987 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3988 }
3989 
3990 /**
3991  * Release flow in RX_CP_TBL.
3992  *
3993  * @param dev
3994  *   Pointer to Ethernet device.
3995  * @flow
3996  *   Parent flow for wich copying is provided.
3997  */
3998 static void
3999 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4000 			  struct rte_flow *flow)
4001 {
4002 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4003 	struct mlx5_priv *priv = dev->data->dev_private;
4004 
4005 	if (!flow->rix_mreg_copy)
4006 		return;
4007 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4008 				 flow->rix_mreg_copy);
4009 	if (!mcp_res || !priv->mreg_cp_tbl)
4010 		return;
4011 	MLX5_ASSERT(mcp_res->rix_flow);
4012 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4013 	flow->rix_mreg_copy = 0;
4014 }
4015 
4016 /**
4017  * Remove the default copy action from RX_CP_TBL.
4018  *
4019  * This functions is called in the mlx5_dev_start(). No thread safe
4020  * is guaranteed.
4021  *
4022  * @param dev
4023  *   Pointer to Ethernet device.
4024  */
4025 static void
4026 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4027 {
4028 	struct mlx5_hlist_entry *entry;
4029 	struct mlx5_priv *priv = dev->data->dev_private;
4030 
4031 	/* Check if default flow is registered. */
4032 	if (!priv->mreg_cp_tbl)
4033 		return;
4034 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl,
4035 				  MLX5_DEFAULT_COPY_ID, NULL);
4036 	if (!entry)
4037 		return;
4038 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4039 }
4040 
4041 /**
4042  * Add the default copy action in in RX_CP_TBL.
4043  *
4044  * This functions is called in the mlx5_dev_start(). No thread safe
4045  * is guaranteed.
4046  *
4047  * @param dev
4048  *   Pointer to Ethernet device.
4049  * @param[out] error
4050  *   Perform verbose error reporting if not NULL.
4051  *
4052  * @return
4053  *   0 for success, negative value otherwise and rte_errno is set.
4054  */
4055 static int
4056 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4057 				  struct rte_flow_error *error)
4058 {
4059 	struct mlx5_priv *priv = dev->data->dev_private;
4060 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4061 
4062 	/* Check whether extensive metadata feature is engaged. */
4063 	if (!priv->config.dv_flow_en ||
4064 	    priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4065 	    !mlx5_flow_ext_mreg_supported(dev) ||
4066 	    !priv->sh->dv_regc0_mask)
4067 		return 0;
4068 	/*
4069 	 * Add default mreg copy flow may be called multiple time, but
4070 	 * only be called once in stop. Avoid register it twice.
4071 	 */
4072 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, MLX5_DEFAULT_COPY_ID, NULL))
4073 		return 0;
4074 	mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
4075 	if (!mcp_res)
4076 		return -rte_errno;
4077 	return 0;
4078 }
4079 
4080 /**
4081  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4082  *
4083  * All the flow having Q/RSS action should be split by
4084  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4085  * performs the following,
4086  *   - CQE->flow_tag := reg_c[1] (MARK)
4087  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4088  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4089  * but there should be a flow per each MARK ID set by MARK action.
4090  *
4091  * For the aforementioned reason, if there's a MARK action in flow's action
4092  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4093  * the MARK ID to CQE's flow_tag like,
4094  *   - If reg_c[1] is mark_id,
4095  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4096  *
4097  * For SET_META action which stores value in reg_c[0], as the destination is
4098  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4099  * MARK ID means the default flow. The default flow looks like,
4100  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4101  *
4102  * @param dev
4103  *   Pointer to Ethernet device.
4104  * @param flow
4105  *   Pointer to flow structure.
4106  * @param[in] actions
4107  *   Pointer to the list of actions.
4108  * @param[out] error
4109  *   Perform verbose error reporting if not NULL.
4110  *
4111  * @return
4112  *   0 on success, negative value otherwise and rte_errno is set.
4113  */
4114 static int
4115 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4116 			    struct rte_flow *flow,
4117 			    const struct rte_flow_action *actions,
4118 			    struct rte_flow_error *error)
4119 {
4120 	struct mlx5_priv *priv = dev->data->dev_private;
4121 	struct mlx5_dev_config *config = &priv->config;
4122 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4123 	const struct rte_flow_action_mark *mark;
4124 
4125 	/* Check whether extensive metadata feature is engaged. */
4126 	if (!config->dv_flow_en ||
4127 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4128 	    !mlx5_flow_ext_mreg_supported(dev) ||
4129 	    !priv->sh->dv_regc0_mask)
4130 		return 0;
4131 	/* Find MARK action. */
4132 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4133 		switch (actions->type) {
4134 		case RTE_FLOW_ACTION_TYPE_FLAG:
4135 			mcp_res = flow_mreg_add_copy_action
4136 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
4137 			if (!mcp_res)
4138 				return -rte_errno;
4139 			flow->rix_mreg_copy = mcp_res->idx;
4140 			return 0;
4141 		case RTE_FLOW_ACTION_TYPE_MARK:
4142 			mark = (const struct rte_flow_action_mark *)
4143 				actions->conf;
4144 			mcp_res =
4145 				flow_mreg_add_copy_action(dev, mark->id, error);
4146 			if (!mcp_res)
4147 				return -rte_errno;
4148 			flow->rix_mreg_copy = mcp_res->idx;
4149 			return 0;
4150 		default:
4151 			break;
4152 		}
4153 	}
4154 	return 0;
4155 }
4156 
4157 #define MLX5_MAX_SPLIT_ACTIONS 24
4158 #define MLX5_MAX_SPLIT_ITEMS 24
4159 
4160 /**
4161  * Split the hairpin flow.
4162  * Since HW can't support encap and push-vlan on Rx, we move these
4163  * actions to Tx.
4164  * If the count action is after the encap then we also
4165  * move the count action. in this case the count will also measure
4166  * the outer bytes.
4167  *
4168  * @param dev
4169  *   Pointer to Ethernet device.
4170  * @param[in] actions
4171  *   Associated actions (list terminated by the END action).
4172  * @param[out] actions_rx
4173  *   Rx flow actions.
4174  * @param[out] actions_tx
4175  *   Tx flow actions..
4176  * @param[out] pattern_tx
4177  *   The pattern items for the Tx flow.
4178  * @param[out] flow_id
4179  *   The flow ID connected to this flow.
4180  *
4181  * @return
4182  *   0 on success.
4183  */
4184 static int
4185 flow_hairpin_split(struct rte_eth_dev *dev,
4186 		   const struct rte_flow_action actions[],
4187 		   struct rte_flow_action actions_rx[],
4188 		   struct rte_flow_action actions_tx[],
4189 		   struct rte_flow_item pattern_tx[],
4190 		   uint32_t flow_id)
4191 {
4192 	const struct rte_flow_action_raw_encap *raw_encap;
4193 	const struct rte_flow_action_raw_decap *raw_decap;
4194 	struct mlx5_rte_flow_action_set_tag *set_tag;
4195 	struct rte_flow_action *tag_action;
4196 	struct mlx5_rte_flow_item_tag *tag_item;
4197 	struct rte_flow_item *item;
4198 	char *addr;
4199 	int encap = 0;
4200 
4201 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4202 		switch (actions->type) {
4203 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4204 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4205 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4206 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4207 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4208 			rte_memcpy(actions_tx, actions,
4209 			       sizeof(struct rte_flow_action));
4210 			actions_tx++;
4211 			break;
4212 		case RTE_FLOW_ACTION_TYPE_COUNT:
4213 			if (encap) {
4214 				rte_memcpy(actions_tx, actions,
4215 					   sizeof(struct rte_flow_action));
4216 				actions_tx++;
4217 			} else {
4218 				rte_memcpy(actions_rx, actions,
4219 					   sizeof(struct rte_flow_action));
4220 				actions_rx++;
4221 			}
4222 			break;
4223 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4224 			raw_encap = actions->conf;
4225 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4226 				memcpy(actions_tx, actions,
4227 				       sizeof(struct rte_flow_action));
4228 				actions_tx++;
4229 				encap = 1;
4230 			} else {
4231 				rte_memcpy(actions_rx, actions,
4232 					   sizeof(struct rte_flow_action));
4233 				actions_rx++;
4234 			}
4235 			break;
4236 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4237 			raw_decap = actions->conf;
4238 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4239 				memcpy(actions_tx, actions,
4240 				       sizeof(struct rte_flow_action));
4241 				actions_tx++;
4242 			} else {
4243 				rte_memcpy(actions_rx, actions,
4244 					   sizeof(struct rte_flow_action));
4245 				actions_rx++;
4246 			}
4247 			break;
4248 		default:
4249 			rte_memcpy(actions_rx, actions,
4250 				   sizeof(struct rte_flow_action));
4251 			actions_rx++;
4252 			break;
4253 		}
4254 	}
4255 	/* Add set meta action and end action for the Rx flow. */
4256 	tag_action = actions_rx;
4257 	tag_action->type = (enum rte_flow_action_type)
4258 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4259 	actions_rx++;
4260 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4261 	actions_rx++;
4262 	set_tag = (void *)actions_rx;
4263 	set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
4264 	MLX5_ASSERT(set_tag->id > REG_NON);
4265 	set_tag->data = flow_id;
4266 	tag_action->conf = set_tag;
4267 	/* Create Tx item list. */
4268 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4269 	addr = (void *)&pattern_tx[2];
4270 	item = pattern_tx;
4271 	item->type = (enum rte_flow_item_type)
4272 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4273 	tag_item = (void *)addr;
4274 	tag_item->data = flow_id;
4275 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4276 	MLX5_ASSERT(set_tag->id > REG_NON);
4277 	item->spec = tag_item;
4278 	addr += sizeof(struct mlx5_rte_flow_item_tag);
4279 	tag_item = (void *)addr;
4280 	tag_item->data = UINT32_MAX;
4281 	tag_item->id = UINT16_MAX;
4282 	item->mask = tag_item;
4283 	item->last = NULL;
4284 	item++;
4285 	item->type = RTE_FLOW_ITEM_TYPE_END;
4286 	return 0;
4287 }
4288 
4289 /**
4290  * The last stage of splitting chain, just creates the subflow
4291  * without any modification.
4292  *
4293  * @param[in] dev
4294  *   Pointer to Ethernet device.
4295  * @param[in] flow
4296  *   Parent flow structure pointer.
4297  * @param[in, out] sub_flow
4298  *   Pointer to return the created subflow, may be NULL.
4299  * @param[in] attr
4300  *   Flow rule attributes.
4301  * @param[in] items
4302  *   Pattern specification (list terminated by the END pattern item).
4303  * @param[in] actions
4304  *   Associated actions (list terminated by the END action).
4305  * @param[in] flow_split_info
4306  *   Pointer to flow split info structure.
4307  * @param[out] error
4308  *   Perform verbose error reporting if not NULL.
4309  * @return
4310  *   0 on success, negative value otherwise
4311  */
4312 static int
4313 flow_create_split_inner(struct rte_eth_dev *dev,
4314 			struct rte_flow *flow,
4315 			struct mlx5_flow **sub_flow,
4316 			const struct rte_flow_attr *attr,
4317 			const struct rte_flow_item items[],
4318 			const struct rte_flow_action actions[],
4319 			struct mlx5_flow_split_info *flow_split_info,
4320 			struct rte_flow_error *error)
4321 {
4322 	struct mlx5_flow *dev_flow;
4323 
4324 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4325 				    flow_split_info->flow_idx, error);
4326 	if (!dev_flow)
4327 		return -rte_errno;
4328 	dev_flow->flow = flow;
4329 	dev_flow->external = flow_split_info->external;
4330 	dev_flow->skip_scale = flow_split_info->skip_scale;
4331 	/* Subflow object was created, we must include one in the list. */
4332 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4333 		      dev_flow->handle, next);
4334 	/*
4335 	 * If dev_flow is as one of the suffix flow, some actions in suffix
4336 	 * flow may need some user defined item layer flags, and pass the
4337 	 * Metadate rxq mark flag to suffix flow as well.
4338 	 */
4339 	if (flow_split_info->prefix_layers)
4340 		dev_flow->handle->layers = flow_split_info->prefix_layers;
4341 	if (flow_split_info->prefix_mark)
4342 		dev_flow->handle->mark = 1;
4343 	if (sub_flow)
4344 		*sub_flow = dev_flow;
4345 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4346 }
4347 
4348 /**
4349  * Split the meter flow.
4350  *
4351  * As meter flow will split to three sub flow, other than meter
4352  * action, the other actions make sense to only meter accepts
4353  * the packet. If it need to be dropped, no other additional
4354  * actions should be take.
4355  *
4356  * One kind of special action which decapsulates the L3 tunnel
4357  * header will be in the prefix sub flow, as not to take the
4358  * L3 tunnel header into account.
4359  *
4360  * @param dev
4361  *   Pointer to Ethernet device.
4362  * @param[in] items
4363  *   Pattern specification (list terminated by the END pattern item).
4364  * @param[out] sfx_items
4365  *   Suffix flow match items (list terminated by the END pattern item).
4366  * @param[in] actions
4367  *   Associated actions (list terminated by the END action).
4368  * @param[out] actions_sfx
4369  *   Suffix flow actions.
4370  * @param[out] actions_pre
4371  *   Prefix flow actions.
4372  * @param[out] pattern_sfx
4373  *   The pattern items for the suffix flow.
4374  * @param[out] tag_sfx
4375  *   Pointer to suffix flow tag.
4376  *
4377  * @return
4378  *   0 on success.
4379  */
4380 static int
4381 flow_meter_split_prep(struct rte_eth_dev *dev,
4382 		 const struct rte_flow_item items[],
4383 		 struct rte_flow_item sfx_items[],
4384 		 const struct rte_flow_action actions[],
4385 		 struct rte_flow_action actions_sfx[],
4386 		 struct rte_flow_action actions_pre[])
4387 {
4388 	struct mlx5_priv *priv = dev->data->dev_private;
4389 	struct rte_flow_action *tag_action = NULL;
4390 	struct rte_flow_item *tag_item;
4391 	struct mlx5_rte_flow_action_set_tag *set_tag;
4392 	struct rte_flow_error error;
4393 	const struct rte_flow_action_raw_encap *raw_encap;
4394 	const struct rte_flow_action_raw_decap *raw_decap;
4395 	struct mlx5_rte_flow_item_tag *tag_spec;
4396 	struct mlx5_rte_flow_item_tag *tag_mask;
4397 	uint32_t tag_id = 0;
4398 	bool copy_vlan = false;
4399 
4400 	/* Prepare the actions for prefix and suffix flow. */
4401 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4402 		struct rte_flow_action **action_cur = NULL;
4403 
4404 		switch (actions->type) {
4405 		case RTE_FLOW_ACTION_TYPE_METER:
4406 			/* Add the extra tag action first. */
4407 			tag_action = actions_pre;
4408 			tag_action->type = (enum rte_flow_action_type)
4409 					   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4410 			actions_pre++;
4411 			action_cur = &actions_pre;
4412 			break;
4413 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4414 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4415 			action_cur = &actions_pre;
4416 			break;
4417 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4418 			raw_encap = actions->conf;
4419 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
4420 				action_cur = &actions_pre;
4421 			break;
4422 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4423 			raw_decap = actions->conf;
4424 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4425 				action_cur = &actions_pre;
4426 			break;
4427 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4428 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4429 			copy_vlan = true;
4430 			break;
4431 		default:
4432 			break;
4433 		}
4434 		if (!action_cur)
4435 			action_cur = &actions_sfx;
4436 		memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
4437 		(*action_cur)++;
4438 	}
4439 	/* Add end action to the actions. */
4440 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
4441 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
4442 	actions_pre++;
4443 	/* Set the tag. */
4444 	set_tag = (void *)actions_pre;
4445 	set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4446 	mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
4447 			  &tag_id);
4448 	if (tag_id >= (1 << (sizeof(tag_id) * 8 - MLX5_MTR_COLOR_BITS))) {
4449 		DRV_LOG(ERR, "Port %u meter flow id exceed max limit.",
4450 			dev->data->port_id);
4451 		mlx5_ipool_free(priv->sh->ipool
4452 				[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], tag_id);
4453 		return 0;
4454 	} else if (!tag_id) {
4455 		return 0;
4456 	}
4457 	set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
4458 	assert(tag_action);
4459 	tag_action->conf = set_tag;
4460 	/* Prepare the suffix subflow items. */
4461 	tag_item = sfx_items++;
4462 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4463 		int item_type = items->type;
4464 
4465 		switch (item_type) {
4466 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
4467 			memcpy(sfx_items, items, sizeof(*sfx_items));
4468 			sfx_items++;
4469 			break;
4470 		case RTE_FLOW_ITEM_TYPE_VLAN:
4471 			if (copy_vlan) {
4472 				memcpy(sfx_items, items, sizeof(*sfx_items));
4473 				/*
4474 				 * Convert to internal match item, it is used
4475 				 * for vlan push and set vid.
4476 				 */
4477 				sfx_items->type = (enum rte_flow_item_type)
4478 						  MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
4479 				sfx_items++;
4480 			}
4481 			break;
4482 		default:
4483 			break;
4484 		}
4485 	}
4486 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4487 	sfx_items++;
4488 	tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
4489 	tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
4490 	tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4491 	tag_mask = tag_spec + 1;
4492 	tag_mask->data = 0xffffff00;
4493 	tag_item->type = (enum rte_flow_item_type)
4494 			 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4495 	tag_item->spec = tag_spec;
4496 	tag_item->last = NULL;
4497 	tag_item->mask = tag_mask;
4498 	return tag_id;
4499 }
4500 
4501 /**
4502  * Split action list having QUEUE/RSS for metadata register copy.
4503  *
4504  * Once Q/RSS action is detected in user's action list, the flow action
4505  * should be split in order to copy metadata registers, which will happen in
4506  * RX_CP_TBL like,
4507  *   - CQE->flow_tag := reg_c[1] (MARK)
4508  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4509  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
4510  * This is because the last action of each flow must be a terminal action
4511  * (QUEUE, RSS or DROP).
4512  *
4513  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
4514  * stored and kept in the mlx5_flow structure per each sub_flow.
4515  *
4516  * The Q/RSS action is replaced with,
4517  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
4518  * And the following JUMP action is added at the end,
4519  *   - JUMP, to RX_CP_TBL.
4520  *
4521  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
4522  * flow_create_split_metadata() routine. The flow will look like,
4523  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
4524  *
4525  * @param dev
4526  *   Pointer to Ethernet device.
4527  * @param[out] split_actions
4528  *   Pointer to store split actions to jump to CP_TBL.
4529  * @param[in] actions
4530  *   Pointer to the list of original flow actions.
4531  * @param[in] qrss
4532  *   Pointer to the Q/RSS action.
4533  * @param[in] actions_n
4534  *   Number of original actions.
4535  * @param[out] error
4536  *   Perform verbose error reporting if not NULL.
4537  *
4538  * @return
4539  *   non-zero unique flow_id on success, otherwise 0 and
4540  *   error/rte_error are set.
4541  */
4542 static uint32_t
4543 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
4544 			  struct rte_flow_action *split_actions,
4545 			  const struct rte_flow_action *actions,
4546 			  const struct rte_flow_action *qrss,
4547 			  int actions_n, struct rte_flow_error *error)
4548 {
4549 	struct mlx5_priv *priv = dev->data->dev_private;
4550 	struct mlx5_rte_flow_action_set_tag *set_tag;
4551 	struct rte_flow_action_jump *jump;
4552 	const int qrss_idx = qrss - actions;
4553 	uint32_t flow_id = 0;
4554 	int ret = 0;
4555 
4556 	/*
4557 	 * Given actions will be split
4558 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
4559 	 * - Add jump to mreg CP_TBL.
4560 	 * As a result, there will be one more action.
4561 	 */
4562 	++actions_n;
4563 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
4564 	set_tag = (void *)(split_actions + actions_n);
4565 	/*
4566 	 * If tag action is not set to void(it means we are not the meter
4567 	 * suffix flow), add the tag action. Since meter suffix flow already
4568 	 * has the tag added.
4569 	 */
4570 	if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
4571 		/*
4572 		 * Allocate the new subflow ID. This one is unique within
4573 		 * device and not shared with representors. Otherwise,
4574 		 * we would have to resolve multi-thread access synch
4575 		 * issue. Each flow on the shared device is appended
4576 		 * with source vport identifier, so the resulting
4577 		 * flows will be unique in the shared (by master and
4578 		 * representors) domain even if they have coinciding
4579 		 * IDs.
4580 		 */
4581 		mlx5_ipool_malloc(priv->sh->ipool
4582 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
4583 		if (!flow_id)
4584 			return rte_flow_error_set(error, ENOMEM,
4585 						  RTE_FLOW_ERROR_TYPE_ACTION,
4586 						  NULL, "can't allocate id "
4587 						  "for split Q/RSS subflow");
4588 		/* Internal SET_TAG action to set flow ID. */
4589 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
4590 			.data = flow_id,
4591 		};
4592 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
4593 		if (ret < 0)
4594 			return ret;
4595 		set_tag->id = ret;
4596 		/* Construct new actions array. */
4597 		/* Replace QUEUE/RSS action. */
4598 		split_actions[qrss_idx] = (struct rte_flow_action){
4599 			.type = (enum rte_flow_action_type)
4600 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4601 			.conf = set_tag,
4602 		};
4603 	}
4604 	/* JUMP action to jump to mreg copy table (CP_TBL). */
4605 	jump = (void *)(set_tag + 1);
4606 	*jump = (struct rte_flow_action_jump){
4607 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4608 	};
4609 	split_actions[actions_n - 2] = (struct rte_flow_action){
4610 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
4611 		.conf = jump,
4612 	};
4613 	split_actions[actions_n - 1] = (struct rte_flow_action){
4614 		.type = RTE_FLOW_ACTION_TYPE_END,
4615 	};
4616 	return flow_id;
4617 }
4618 
4619 /**
4620  * Extend the given action list for Tx metadata copy.
4621  *
4622  * Copy the given action list to the ext_actions and add flow metadata register
4623  * copy action in order to copy reg_a set by WQE to reg_c[0].
4624  *
4625  * @param[out] ext_actions
4626  *   Pointer to the extended action list.
4627  * @param[in] actions
4628  *   Pointer to the list of actions.
4629  * @param[in] actions_n
4630  *   Number of actions in the list.
4631  * @param[out] error
4632  *   Perform verbose error reporting if not NULL.
4633  * @param[in] encap_idx
4634  *   The encap action inndex.
4635  *
4636  * @return
4637  *   0 on success, negative value otherwise
4638  */
4639 static int
4640 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
4641 		       struct rte_flow_action *ext_actions,
4642 		       const struct rte_flow_action *actions,
4643 		       int actions_n, struct rte_flow_error *error,
4644 		       int encap_idx)
4645 {
4646 	struct mlx5_flow_action_copy_mreg *cp_mreg =
4647 		(struct mlx5_flow_action_copy_mreg *)
4648 			(ext_actions + actions_n + 1);
4649 	int ret;
4650 
4651 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4652 	if (ret < 0)
4653 		return ret;
4654 	cp_mreg->dst = ret;
4655 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
4656 	if (ret < 0)
4657 		return ret;
4658 	cp_mreg->src = ret;
4659 	if (encap_idx != 0)
4660 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
4661 	if (encap_idx == actions_n - 1) {
4662 		ext_actions[actions_n - 1] = (struct rte_flow_action){
4663 			.type = (enum rte_flow_action_type)
4664 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4665 			.conf = cp_mreg,
4666 		};
4667 		ext_actions[actions_n] = (struct rte_flow_action){
4668 			.type = RTE_FLOW_ACTION_TYPE_END,
4669 		};
4670 	} else {
4671 		ext_actions[encap_idx] = (struct rte_flow_action){
4672 			.type = (enum rte_flow_action_type)
4673 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4674 			.conf = cp_mreg,
4675 		};
4676 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
4677 				sizeof(*ext_actions) * (actions_n - encap_idx));
4678 	}
4679 	return 0;
4680 }
4681 
4682 /**
4683  * Check the match action from the action list.
4684  *
4685  * @param[in] actions
4686  *   Pointer to the list of actions.
4687  * @param[in] attr
4688  *   Flow rule attributes.
4689  * @param[in] action
4690  *   The action to be check if exist.
4691  * @param[out] match_action_pos
4692  *   Pointer to the position of the matched action if exists, otherwise is -1.
4693  * @param[out] qrss_action_pos
4694  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
4695  * @param[out] modify_after_mirror
4696  *   Pointer to the flag of modify action after FDB mirroring.
4697  *
4698  * @return
4699  *   > 0 the total number of actions.
4700  *   0 if not found match action in action list.
4701  */
4702 static int
4703 flow_check_match_action(const struct rte_flow_action actions[],
4704 			const struct rte_flow_attr *attr,
4705 			enum rte_flow_action_type action,
4706 			int *match_action_pos, int *qrss_action_pos,
4707 			int *modify_after_mirror)
4708 {
4709 	const struct rte_flow_action_sample *sample;
4710 	int actions_n = 0;
4711 	uint32_t ratio = 0;
4712 	int sub_type = 0;
4713 	int flag = 0;
4714 	int fdb_mirror = 0;
4715 
4716 	*match_action_pos = -1;
4717 	*qrss_action_pos = -1;
4718 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4719 		if (actions->type == action) {
4720 			flag = 1;
4721 			*match_action_pos = actions_n;
4722 		}
4723 		switch (actions->type) {
4724 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4725 		case RTE_FLOW_ACTION_TYPE_RSS:
4726 			*qrss_action_pos = actions_n;
4727 			break;
4728 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
4729 			sample = actions->conf;
4730 			ratio = sample->ratio;
4731 			sub_type = ((const struct rte_flow_action *)
4732 					(sample->actions))->type;
4733 			if (ratio == 1 && attr->transfer)
4734 				fdb_mirror = 1;
4735 			break;
4736 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4737 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4738 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4739 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4740 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4741 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4742 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4743 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4744 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4745 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
4746 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4747 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4748 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4749 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4750 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4751 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4752 		case RTE_FLOW_ACTION_TYPE_FLAG:
4753 		case RTE_FLOW_ACTION_TYPE_MARK:
4754 		case RTE_FLOW_ACTION_TYPE_SET_META:
4755 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
4756 			if (fdb_mirror)
4757 				*modify_after_mirror = 1;
4758 			break;
4759 		default:
4760 			break;
4761 		}
4762 		actions_n++;
4763 	}
4764 	if (flag && fdb_mirror && !*modify_after_mirror) {
4765 		/* FDB mirroring uses the destination array to implement
4766 		 * instead of FLOW_SAMPLER object.
4767 		 */
4768 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
4769 			flag = 0;
4770 	}
4771 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4772 	return flag ? actions_n + 1 : 0;
4773 }
4774 
4775 #define SAMPLE_SUFFIX_ITEM 2
4776 
4777 /**
4778  * Split the sample flow.
4779  *
4780  * As sample flow will split to two sub flow, sample flow with
4781  * sample action, the other actions will move to new suffix flow.
4782  *
4783  * Also add unique tag id with tag action in the sample flow,
4784  * the same tag id will be as match in the suffix flow.
4785  *
4786  * @param dev
4787  *   Pointer to Ethernet device.
4788  * @param[in] add_tag
4789  *   Add extra tag action flag.
4790  * @param[out] sfx_items
4791  *   Suffix flow match items (list terminated by the END pattern item).
4792  * @param[in] actions
4793  *   Associated actions (list terminated by the END action).
4794  * @param[out] actions_sfx
4795  *   Suffix flow actions.
4796  * @param[out] actions_pre
4797  *   Prefix flow actions.
4798  * @param[in] actions_n
4799  *  The total number of actions.
4800  * @param[in] sample_action_pos
4801  *   The sample action position.
4802  * @param[in] qrss_action_pos
4803  *   The Queue/RSS action position.
4804  * @param[in] jump_table
4805  *   Add extra jump action flag.
4806  * @param[out] error
4807  *   Perform verbose error reporting if not NULL.
4808  *
4809  * @return
4810  *   0 on success, or unique flow_id, a negative errno value
4811  *   otherwise and rte_errno is set.
4812  */
4813 static int
4814 flow_sample_split_prep(struct rte_eth_dev *dev,
4815 		       int add_tag,
4816 		       struct rte_flow_item sfx_items[],
4817 		       const struct rte_flow_action actions[],
4818 		       struct rte_flow_action actions_sfx[],
4819 		       struct rte_flow_action actions_pre[],
4820 		       int actions_n,
4821 		       int sample_action_pos,
4822 		       int qrss_action_pos,
4823 		       int jump_table,
4824 		       struct rte_flow_error *error)
4825 {
4826 	struct mlx5_priv *priv = dev->data->dev_private;
4827 	struct mlx5_rte_flow_action_set_tag *set_tag;
4828 	struct mlx5_rte_flow_item_tag *tag_spec;
4829 	struct mlx5_rte_flow_item_tag *tag_mask;
4830 	struct rte_flow_action_jump *jump_action;
4831 	uint32_t tag_id = 0;
4832 	int index;
4833 	int append_index = 0;
4834 	int ret;
4835 
4836 	if (sample_action_pos < 0)
4837 		return rte_flow_error_set(error, EINVAL,
4838 					  RTE_FLOW_ERROR_TYPE_ACTION,
4839 					  NULL, "invalid position of sample "
4840 					  "action in list");
4841 	/* Prepare the actions for prefix and suffix flow. */
4842 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
4843 		index = qrss_action_pos;
4844 		/* Put the preceding the Queue/RSS action into prefix flow. */
4845 		if (index != 0)
4846 			memcpy(actions_pre, actions,
4847 			       sizeof(struct rte_flow_action) * index);
4848 		/* Put others preceding the sample action into prefix flow. */
4849 		if (sample_action_pos > index + 1)
4850 			memcpy(actions_pre + index, actions + index + 1,
4851 			       sizeof(struct rte_flow_action) *
4852 			       (sample_action_pos - index - 1));
4853 		index = sample_action_pos - 1;
4854 		/* Put Queue/RSS action into Suffix flow. */
4855 		memcpy(actions_sfx, actions + qrss_action_pos,
4856 		       sizeof(struct rte_flow_action));
4857 		actions_sfx++;
4858 	} else {
4859 		index = sample_action_pos;
4860 		if (index != 0)
4861 			memcpy(actions_pre, actions,
4862 			       sizeof(struct rte_flow_action) * index);
4863 	}
4864 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
4865 	 * For CX6DX and above, metadata registers Cx preserve their value,
4866 	 * add an extra tag action for NIC-RX and E-Switch Domain.
4867 	 */
4868 	if (add_tag) {
4869 		/* Prepare the prefix tag action. */
4870 		append_index++;
4871 		set_tag = (void *)(actions_pre + actions_n + append_index);
4872 		ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
4873 		if (ret < 0)
4874 			return ret;
4875 		set_tag->id = ret;
4876 		mlx5_ipool_malloc(priv->sh->ipool
4877 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
4878 		set_tag->data = tag_id;
4879 		/* Prepare the suffix subflow items. */
4880 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
4881 		tag_spec->data = tag_id;
4882 		tag_spec->id = set_tag->id;
4883 		tag_mask = tag_spec + 1;
4884 		tag_mask->data = UINT32_MAX;
4885 		sfx_items[0] = (struct rte_flow_item){
4886 			.type = (enum rte_flow_item_type)
4887 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4888 			.spec = tag_spec,
4889 			.last = NULL,
4890 			.mask = tag_mask,
4891 		};
4892 		sfx_items[1] = (struct rte_flow_item){
4893 			.type = (enum rte_flow_item_type)
4894 				RTE_FLOW_ITEM_TYPE_END,
4895 		};
4896 		/* Prepare the tag action in prefix subflow. */
4897 		actions_pre[index++] =
4898 			(struct rte_flow_action){
4899 			.type = (enum rte_flow_action_type)
4900 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4901 			.conf = set_tag,
4902 		};
4903 	}
4904 	memcpy(actions_pre + index, actions + sample_action_pos,
4905 	       sizeof(struct rte_flow_action));
4906 	index += 1;
4907 	/* For the modify action after the sample action in E-Switch mirroring,
4908 	 * Add the extra jump action in prefix subflow and jump into the next
4909 	 * table, then do the modify action in the new table.
4910 	 */
4911 	if (jump_table) {
4912 		/* Prepare the prefix jump action. */
4913 		append_index++;
4914 		jump_action = (void *)(actions_pre + actions_n + append_index);
4915 		jump_action->group = jump_table;
4916 		actions_pre[index++] =
4917 			(struct rte_flow_action){
4918 			.type = (enum rte_flow_action_type)
4919 				RTE_FLOW_ACTION_TYPE_JUMP,
4920 			.conf = jump_action,
4921 		};
4922 	}
4923 	actions_pre[index] = (struct rte_flow_action){
4924 		.type = (enum rte_flow_action_type)
4925 			RTE_FLOW_ACTION_TYPE_END,
4926 	};
4927 	/* Put the actions after sample into Suffix flow. */
4928 	memcpy(actions_sfx, actions + sample_action_pos + 1,
4929 	       sizeof(struct rte_flow_action) *
4930 	       (actions_n - sample_action_pos - 1));
4931 	return tag_id;
4932 }
4933 
4934 /**
4935  * The splitting for metadata feature.
4936  *
4937  * - Q/RSS action on NIC Rx should be split in order to pass by
4938  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
4939  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
4940  *
4941  * - All the actions on NIC Tx should have a mreg copy action to
4942  *   copy reg_a from WQE to reg_c[0].
4943  *
4944  * @param dev
4945  *   Pointer to Ethernet device.
4946  * @param[in] flow
4947  *   Parent flow structure pointer.
4948  * @param[in] attr
4949  *   Flow rule attributes.
4950  * @param[in] items
4951  *   Pattern specification (list terminated by the END pattern item).
4952  * @param[in] actions
4953  *   Associated actions (list terminated by the END action).
4954  * @param[in] flow_split_info
4955  *   Pointer to flow split info structure.
4956  * @param[out] error
4957  *   Perform verbose error reporting if not NULL.
4958  * @return
4959  *   0 on success, negative value otherwise
4960  */
4961 static int
4962 flow_create_split_metadata(struct rte_eth_dev *dev,
4963 			   struct rte_flow *flow,
4964 			   const struct rte_flow_attr *attr,
4965 			   const struct rte_flow_item items[],
4966 			   const struct rte_flow_action actions[],
4967 			   struct mlx5_flow_split_info *flow_split_info,
4968 			   struct rte_flow_error *error)
4969 {
4970 	struct mlx5_priv *priv = dev->data->dev_private;
4971 	struct mlx5_dev_config *config = &priv->config;
4972 	const struct rte_flow_action *qrss = NULL;
4973 	struct rte_flow_action *ext_actions = NULL;
4974 	struct mlx5_flow *dev_flow = NULL;
4975 	uint32_t qrss_id = 0;
4976 	int mtr_sfx = 0;
4977 	size_t act_size;
4978 	int actions_n;
4979 	int encap_idx;
4980 	int ret;
4981 
4982 	/* Check whether extensive metadata feature is engaged. */
4983 	if (!config->dv_flow_en ||
4984 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4985 	    !mlx5_flow_ext_mreg_supported(dev))
4986 		return flow_create_split_inner(dev, flow, NULL, attr, items,
4987 					       actions, flow_split_info, error);
4988 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
4989 							   &encap_idx);
4990 	if (qrss) {
4991 		/* Exclude hairpin flows from splitting. */
4992 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
4993 			const struct rte_flow_action_queue *queue;
4994 
4995 			queue = qrss->conf;
4996 			if (mlx5_rxq_get_type(dev, queue->index) ==
4997 			    MLX5_RXQ_TYPE_HAIRPIN)
4998 				qrss = NULL;
4999 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
5000 			const struct rte_flow_action_rss *rss;
5001 
5002 			rss = qrss->conf;
5003 			if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
5004 			    MLX5_RXQ_TYPE_HAIRPIN)
5005 				qrss = NULL;
5006 		}
5007 	}
5008 	if (qrss) {
5009 		/* Check if it is in meter suffix table. */
5010 		mtr_sfx = attr->group == (attr->transfer ?
5011 			  (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
5012 			  MLX5_FLOW_TABLE_LEVEL_SUFFIX);
5013 		/*
5014 		 * Q/RSS action on NIC Rx should be split in order to pass by
5015 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
5016 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
5017 		 */
5018 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5019 			   sizeof(struct rte_flow_action_set_tag) +
5020 			   sizeof(struct rte_flow_action_jump);
5021 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5022 					  SOCKET_ID_ANY);
5023 		if (!ext_actions)
5024 			return rte_flow_error_set(error, ENOMEM,
5025 						  RTE_FLOW_ERROR_TYPE_ACTION,
5026 						  NULL, "no memory to split "
5027 						  "metadata flow");
5028 		/*
5029 		 * If we are the suffix flow of meter, tag already exist.
5030 		 * Set the tag action to void.
5031 		 */
5032 		if (mtr_sfx)
5033 			ext_actions[qrss - actions].type =
5034 						RTE_FLOW_ACTION_TYPE_VOID;
5035 		else
5036 			ext_actions[qrss - actions].type =
5037 						(enum rte_flow_action_type)
5038 						MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5039 		/*
5040 		 * Create the new actions list with removed Q/RSS action
5041 		 * and appended set tag and jump to register copy table
5042 		 * (RX_CP_TBL). We should preallocate unique tag ID here
5043 		 * in advance, because it is needed for set tag action.
5044 		 */
5045 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5046 						    qrss, actions_n, error);
5047 		if (!mtr_sfx && !qrss_id) {
5048 			ret = -rte_errno;
5049 			goto exit;
5050 		}
5051 	} else if (attr->egress && !attr->transfer) {
5052 		/*
5053 		 * All the actions on NIC Tx should have a metadata register
5054 		 * copy action to copy reg_a from WQE to reg_c[meta]
5055 		 */
5056 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5057 			   sizeof(struct mlx5_flow_action_copy_mreg);
5058 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5059 					  SOCKET_ID_ANY);
5060 		if (!ext_actions)
5061 			return rte_flow_error_set(error, ENOMEM,
5062 						  RTE_FLOW_ERROR_TYPE_ACTION,
5063 						  NULL, "no memory to split "
5064 						  "metadata flow");
5065 		/* Create the action list appended with copy register. */
5066 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5067 					     actions_n, error, encap_idx);
5068 		if (ret < 0)
5069 			goto exit;
5070 	}
5071 	/* Add the unmodified original or prefix subflow. */
5072 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5073 				      items, ext_actions ? ext_actions :
5074 				      actions, flow_split_info, error);
5075 	if (ret < 0)
5076 		goto exit;
5077 	MLX5_ASSERT(dev_flow);
5078 	if (qrss) {
5079 		const struct rte_flow_attr q_attr = {
5080 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5081 			.ingress = 1,
5082 		};
5083 		/* Internal PMD action to set register. */
5084 		struct mlx5_rte_flow_item_tag q_tag_spec = {
5085 			.data = qrss_id,
5086 			.id = REG_NON,
5087 		};
5088 		struct rte_flow_item q_items[] = {
5089 			{
5090 				.type = (enum rte_flow_item_type)
5091 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5092 				.spec = &q_tag_spec,
5093 				.last = NULL,
5094 				.mask = NULL,
5095 			},
5096 			{
5097 				.type = RTE_FLOW_ITEM_TYPE_END,
5098 			},
5099 		};
5100 		struct rte_flow_action q_actions[] = {
5101 			{
5102 				.type = qrss->type,
5103 				.conf = qrss->conf,
5104 			},
5105 			{
5106 				.type = RTE_FLOW_ACTION_TYPE_END,
5107 			},
5108 		};
5109 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5110 
5111 		/*
5112 		 * Configure the tag item only if there is no meter subflow.
5113 		 * Since tag is already marked in the meter suffix subflow
5114 		 * we can just use the meter suffix items as is.
5115 		 */
5116 		if (qrss_id) {
5117 			/* Not meter subflow. */
5118 			MLX5_ASSERT(!mtr_sfx);
5119 			/*
5120 			 * Put unique id in prefix flow due to it is destroyed
5121 			 * after suffix flow and id will be freed after there
5122 			 * is no actual flows with this id and identifier
5123 			 * reallocation becomes possible (for example, for
5124 			 * other flows in other threads).
5125 			 */
5126 			dev_flow->handle->split_flow_id = qrss_id;
5127 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5128 						   error);
5129 			if (ret < 0)
5130 				goto exit;
5131 			q_tag_spec.id = ret;
5132 		}
5133 		dev_flow = NULL;
5134 		/* Add suffix subflow to execute Q/RSS. */
5135 		flow_split_info->prefix_layers = layers;
5136 		flow_split_info->prefix_mark = 0;
5137 		ret = flow_create_split_inner(dev, flow, &dev_flow,
5138 					      &q_attr, mtr_sfx ? items :
5139 					      q_items, q_actions,
5140 					      flow_split_info, error);
5141 		if (ret < 0)
5142 			goto exit;
5143 		/* qrss ID should be freed if failed. */
5144 		qrss_id = 0;
5145 		MLX5_ASSERT(dev_flow);
5146 	}
5147 
5148 exit:
5149 	/*
5150 	 * We do not destroy the partially created sub_flows in case of error.
5151 	 * These ones are included into parent flow list and will be destroyed
5152 	 * by flow_drv_destroy.
5153 	 */
5154 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5155 			qrss_id);
5156 	mlx5_free(ext_actions);
5157 	return ret;
5158 }
5159 
5160 /**
5161  * The splitting for meter feature.
5162  *
5163  * - The meter flow will be split to two flows as prefix and
5164  *   suffix flow. The packets make sense only it pass the prefix
5165  *   meter action.
5166  *
5167  * - Reg_C_5 is used for the packet to match betweend prefix and
5168  *   suffix flow.
5169  *
5170  * @param dev
5171  *   Pointer to Ethernet device.
5172  * @param[in] flow
5173  *   Parent flow structure pointer.
5174  * @param[in] attr
5175  *   Flow rule attributes.
5176  * @param[in] items
5177  *   Pattern specification (list terminated by the END pattern item).
5178  * @param[in] actions
5179  *   Associated actions (list terminated by the END action).
5180  * @param[in] flow_split_info
5181  *   Pointer to flow split info structure.
5182  * @param[out] error
5183  *   Perform verbose error reporting if not NULL.
5184  * @return
5185  *   0 on success, negative value otherwise
5186  */
5187 static int
5188 flow_create_split_meter(struct rte_eth_dev *dev,
5189 			struct rte_flow *flow,
5190 			const struct rte_flow_attr *attr,
5191 			const struct rte_flow_item items[],
5192 			const struct rte_flow_action actions[],
5193 			struct mlx5_flow_split_info *flow_split_info,
5194 			struct rte_flow_error *error)
5195 {
5196 	struct mlx5_priv *priv = dev->data->dev_private;
5197 	struct rte_flow_action *sfx_actions = NULL;
5198 	struct rte_flow_action *pre_actions = NULL;
5199 	struct rte_flow_item *sfx_items = NULL;
5200 	struct mlx5_flow *dev_flow = NULL;
5201 	struct rte_flow_attr sfx_attr = *attr;
5202 	uint32_t mtr = 0;
5203 	uint32_t mtr_tag_id = 0;
5204 	size_t act_size;
5205 	size_t item_size;
5206 	int actions_n = 0;
5207 	int ret;
5208 
5209 	if (priv->mtr_en)
5210 		actions_n = flow_check_meter_action(actions, &mtr);
5211 	if (mtr) {
5212 		/* The five prefix actions: meter, decap, encap, tag, end. */
5213 		act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
5214 			   sizeof(struct mlx5_rte_flow_action_set_tag);
5215 		/* tag, vlan, port id, end. */
5216 #define METER_SUFFIX_ITEM 4
5217 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
5218 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
5219 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
5220 					  0, SOCKET_ID_ANY);
5221 		if (!sfx_actions)
5222 			return rte_flow_error_set(error, ENOMEM,
5223 						  RTE_FLOW_ERROR_TYPE_ACTION,
5224 						  NULL, "no memory to split "
5225 						  "meter flow");
5226 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
5227 			     act_size);
5228 		pre_actions = sfx_actions + actions_n;
5229 		mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
5230 						   actions, sfx_actions,
5231 						   pre_actions);
5232 		if (!mtr_tag_id) {
5233 			ret = -rte_errno;
5234 			goto exit;
5235 		}
5236 		/* Add the prefix subflow. */
5237 		flow_split_info->prefix_mark = 0;
5238 		ret = flow_create_split_inner(dev, flow, &dev_flow,
5239 					      attr, items, pre_actions,
5240 					      flow_split_info, error);
5241 		if (ret) {
5242 			ret = -rte_errno;
5243 			goto exit;
5244 		}
5245 		dev_flow->handle->split_flow_id = mtr_tag_id;
5246 		/* Setting the sfx group atrr. */
5247 		sfx_attr.group = sfx_attr.transfer ?
5248 				(MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
5249 				 MLX5_FLOW_TABLE_LEVEL_SUFFIX;
5250 		flow_split_info->prefix_layers =
5251 				flow_get_prefix_layer_flags(dev_flow);
5252 		flow_split_info->prefix_mark = dev_flow->handle->mark;
5253 	}
5254 	/* Add the prefix subflow. */
5255 	ret = flow_create_split_metadata(dev, flow,
5256 					 &sfx_attr, sfx_items ?
5257 					 sfx_items : items,
5258 					 sfx_actions ? sfx_actions : actions,
5259 					 flow_split_info, error);
5260 exit:
5261 	if (sfx_actions)
5262 		mlx5_free(sfx_actions);
5263 	return ret;
5264 }
5265 
5266 /**
5267  * The splitting for sample feature.
5268  *
5269  * Once Sample action is detected in the action list, the flow actions should
5270  * be split into prefix sub flow and suffix sub flow.
5271  *
5272  * The original items remain in the prefix sub flow, all actions preceding the
5273  * sample action and the sample action itself will be copied to the prefix
5274  * sub flow, the actions following the sample action will be copied to the
5275  * suffix sub flow, Queue action always be located in the suffix sub flow.
5276  *
5277  * In order to make the packet from prefix sub flow matches with suffix sub
5278  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
5279  * flow uses tag item with the unique flow id.
5280  *
5281  * @param dev
5282  *   Pointer to Ethernet device.
5283  * @param[in] flow
5284  *   Parent flow structure pointer.
5285  * @param[in] attr
5286  *   Flow rule attributes.
5287  * @param[in] items
5288  *   Pattern specification (list terminated by the END pattern item).
5289  * @param[in] actions
5290  *   Associated actions (list terminated by the END action).
5291  * @param[in] flow_split_info
5292  *   Pointer to flow split info structure.
5293  * @param[out] error
5294  *   Perform verbose error reporting if not NULL.
5295  * @return
5296  *   0 on success, negative value otherwise
5297  */
5298 static int
5299 flow_create_split_sample(struct rte_eth_dev *dev,
5300 			 struct rte_flow *flow,
5301 			 const struct rte_flow_attr *attr,
5302 			 const struct rte_flow_item items[],
5303 			 const struct rte_flow_action actions[],
5304 			 struct mlx5_flow_split_info *flow_split_info,
5305 			 struct rte_flow_error *error)
5306 {
5307 	struct mlx5_priv *priv = dev->data->dev_private;
5308 	struct rte_flow_action *sfx_actions = NULL;
5309 	struct rte_flow_action *pre_actions = NULL;
5310 	struct rte_flow_item *sfx_items = NULL;
5311 	struct mlx5_flow *dev_flow = NULL;
5312 	struct rte_flow_attr sfx_attr = *attr;
5313 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5314 	struct mlx5_flow_dv_sample_resource *sample_res;
5315 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
5316 	struct mlx5_flow_tbl_resource *sfx_tbl;
5317 #endif
5318 	size_t act_size;
5319 	size_t item_size;
5320 	uint32_t fdb_tx = 0;
5321 	int32_t tag_id = 0;
5322 	int actions_n = 0;
5323 	int sample_action_pos;
5324 	int qrss_action_pos;
5325 	int add_tag = 0;
5326 	int modify_after_mirror = 0;
5327 	uint16_t jump_table = 0;
5328 	const uint32_t next_ft_step = 1;
5329 	int ret = 0;
5330 
5331 	if (priv->sampler_en)
5332 		actions_n = flow_check_match_action(actions, attr,
5333 					RTE_FLOW_ACTION_TYPE_SAMPLE,
5334 					&sample_action_pos, &qrss_action_pos,
5335 					&modify_after_mirror);
5336 	if (actions_n) {
5337 		/* The prefix actions must includes sample, tag, end. */
5338 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
5339 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
5340 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
5341 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
5342 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
5343 					  item_size), 0, SOCKET_ID_ANY);
5344 		if (!sfx_actions)
5345 			return rte_flow_error_set(error, ENOMEM,
5346 						  RTE_FLOW_ERROR_TYPE_ACTION,
5347 						  NULL, "no memory to split "
5348 						  "sample flow");
5349 		/* The representor_id is -1 for uplink. */
5350 		fdb_tx = (attr->transfer && priv->representor_id != -1);
5351 		/*
5352 		 * When reg_c_preserve is set, metadata registers Cx preserve
5353 		 * their value even through packet duplication.
5354 		 */
5355 		add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
5356 		if (add_tag)
5357 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
5358 					+ act_size);
5359 		if (modify_after_mirror)
5360 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
5361 				     next_ft_step;
5362 		pre_actions = sfx_actions + actions_n;
5363 		tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
5364 						actions, sfx_actions,
5365 						pre_actions, actions_n,
5366 						sample_action_pos,
5367 						qrss_action_pos, jump_table,
5368 						error);
5369 		if (tag_id < 0 || (add_tag && !tag_id)) {
5370 			ret = -rte_errno;
5371 			goto exit;
5372 		}
5373 		if (modify_after_mirror)
5374 			flow_split_info->skip_scale =
5375 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
5376 		/* Add the prefix subflow. */
5377 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5378 					      items, pre_actions,
5379 					      flow_split_info, error);
5380 		if (ret) {
5381 			ret = -rte_errno;
5382 			goto exit;
5383 		}
5384 		dev_flow->handle->split_flow_id = tag_id;
5385 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5386 		if (!modify_after_mirror) {
5387 			/* Set the sfx group attr. */
5388 			sample_res = (struct mlx5_flow_dv_sample_resource *)
5389 						dev_flow->dv.sample_res;
5390 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
5391 						sample_res->normal_path_tbl;
5392 			sfx_tbl_data = container_of(sfx_tbl,
5393 						struct mlx5_flow_tbl_data_entry,
5394 						tbl);
5395 			sfx_attr.group = sfx_attr.transfer ?
5396 						(sfx_tbl_data->table_id - 1) :
5397 						sfx_tbl_data->table_id;
5398 		} else {
5399 			MLX5_ASSERT(attr->transfer);
5400 			sfx_attr.group = jump_table;
5401 		}
5402 		flow_split_info->prefix_layers =
5403 				flow_get_prefix_layer_flags(dev_flow);
5404 		flow_split_info->prefix_mark = dev_flow->handle->mark;
5405 		/* Suffix group level already be scaled with factor, set
5406 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
5407 		 * again in translation.
5408 		 */
5409 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
5410 #endif
5411 	}
5412 	/* Add the suffix subflow. */
5413 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
5414 				      sfx_items ? sfx_items : items,
5415 				      sfx_actions ? sfx_actions : actions,
5416 				      flow_split_info, error);
5417 exit:
5418 	if (sfx_actions)
5419 		mlx5_free(sfx_actions);
5420 	return ret;
5421 }
5422 
5423 /**
5424  * Split the flow to subflow set. The splitters might be linked
5425  * in the chain, like this:
5426  * flow_create_split_outer() calls:
5427  *   flow_create_split_meter() calls:
5428  *     flow_create_split_metadata(meter_subflow_0) calls:
5429  *       flow_create_split_inner(metadata_subflow_0)
5430  *       flow_create_split_inner(metadata_subflow_1)
5431  *       flow_create_split_inner(metadata_subflow_2)
5432  *     flow_create_split_metadata(meter_subflow_1) calls:
5433  *       flow_create_split_inner(metadata_subflow_0)
5434  *       flow_create_split_inner(metadata_subflow_1)
5435  *       flow_create_split_inner(metadata_subflow_2)
5436  *
5437  * This provide flexible way to add new levels of flow splitting.
5438  * The all of successfully created subflows are included to the
5439  * parent flow dev_flow list.
5440  *
5441  * @param dev
5442  *   Pointer to Ethernet device.
5443  * @param[in] flow
5444  *   Parent flow structure pointer.
5445  * @param[in] attr
5446  *   Flow rule attributes.
5447  * @param[in] items
5448  *   Pattern specification (list terminated by the END pattern item).
5449  * @param[in] actions
5450  *   Associated actions (list terminated by the END action).
5451  * @param[in] flow_split_info
5452  *   Pointer to flow split info structure.
5453  * @param[out] error
5454  *   Perform verbose error reporting if not NULL.
5455  * @return
5456  *   0 on success, negative value otherwise
5457  */
5458 static int
5459 flow_create_split_outer(struct rte_eth_dev *dev,
5460 			struct rte_flow *flow,
5461 			const struct rte_flow_attr *attr,
5462 			const struct rte_flow_item items[],
5463 			const struct rte_flow_action actions[],
5464 			struct mlx5_flow_split_info *flow_split_info,
5465 			struct rte_flow_error *error)
5466 {
5467 	int ret;
5468 
5469 	ret = flow_create_split_sample(dev, flow, attr, items,
5470 				       actions, flow_split_info, error);
5471 	MLX5_ASSERT(ret <= 0);
5472 	return ret;
5473 }
5474 
5475 static struct mlx5_flow_tunnel *
5476 flow_tunnel_from_rule(struct rte_eth_dev *dev,
5477 		      const struct rte_flow_attr *attr,
5478 		      const struct rte_flow_item items[],
5479 		      const struct rte_flow_action actions[])
5480 {
5481 	struct mlx5_flow_tunnel *tunnel;
5482 
5483 #pragma GCC diagnostic push
5484 #pragma GCC diagnostic ignored "-Wcast-qual"
5485 	if (is_flow_tunnel_match_rule(dev, attr, items, actions))
5486 		tunnel = (struct mlx5_flow_tunnel *)items[0].spec;
5487 	else if (is_flow_tunnel_steer_rule(dev, attr, items, actions))
5488 		tunnel = (struct mlx5_flow_tunnel *)actions[0].conf;
5489 	else
5490 		tunnel = NULL;
5491 #pragma GCC diagnostic pop
5492 
5493 	return tunnel;
5494 }
5495 
5496 /**
5497  * Adjust flow RSS workspace if needed.
5498  *
5499  * @param wks
5500  *   Pointer to thread flow work space.
5501  * @param rss_desc
5502  *   Pointer to RSS descriptor.
5503  * @param[in] nrssq_num
5504  *   New RSS queue number.
5505  *
5506  * @return
5507  *   0 on success, -1 otherwise and rte_errno is set.
5508  */
5509 static int
5510 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
5511 			  struct mlx5_flow_rss_desc *rss_desc,
5512 			  uint32_t nrssq_num)
5513 {
5514 	if (likely(nrssq_num <= wks->rssq_num))
5515 		return 0;
5516 	rss_desc->queue = realloc(rss_desc->queue,
5517 			  sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
5518 	if (!rss_desc->queue) {
5519 		rte_errno = ENOMEM;
5520 		return -1;
5521 	}
5522 	wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
5523 	return 0;
5524 }
5525 
5526 /**
5527  * Create a flow and add it to @p list.
5528  *
5529  * @param dev
5530  *   Pointer to Ethernet device.
5531  * @param list
5532  *   Pointer to a TAILQ flow list. If this parameter NULL,
5533  *   no list insertion occurred, flow is just created,
5534  *   this is caller's responsibility to track the
5535  *   created flow.
5536  * @param[in] attr
5537  *   Flow rule attributes.
5538  * @param[in] items
5539  *   Pattern specification (list terminated by the END pattern item).
5540  * @param[in] actions
5541  *   Associated actions (list terminated by the END action).
5542  * @param[in] external
5543  *   This flow rule is created by request external to PMD.
5544  * @param[out] error
5545  *   Perform verbose error reporting if not NULL.
5546  *
5547  * @return
5548  *   A flow index on success, 0 otherwise and rte_errno is set.
5549  */
5550 static uint32_t
5551 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
5552 		 const struct rte_flow_attr *attr,
5553 		 const struct rte_flow_item items[],
5554 		 const struct rte_flow_action original_actions[],
5555 		 bool external, struct rte_flow_error *error)
5556 {
5557 	struct mlx5_priv *priv = dev->data->dev_private;
5558 	struct rte_flow *flow = NULL;
5559 	struct mlx5_flow *dev_flow;
5560 	const struct rte_flow_action_rss *rss = NULL;
5561 	struct mlx5_translated_shared_action
5562 		shared_actions[MLX5_MAX_SHARED_ACTIONS];
5563 	int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
5564 	union {
5565 		struct mlx5_flow_expand_rss buf;
5566 		uint8_t buffer[2048];
5567 	} expand_buffer;
5568 	union {
5569 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5570 		uint8_t buffer[2048];
5571 	} actions_rx;
5572 	union {
5573 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5574 		uint8_t buffer[2048];
5575 	} actions_hairpin_tx;
5576 	union {
5577 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
5578 		uint8_t buffer[2048];
5579 	} items_tx;
5580 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
5581 	struct mlx5_flow_rss_desc *rss_desc;
5582 	const struct rte_flow_action *p_actions_rx;
5583 	uint32_t i;
5584 	uint32_t idx = 0;
5585 	int hairpin_flow;
5586 	struct rte_flow_attr attr_tx = { .priority = 0 };
5587 	const struct rte_flow_action *actions;
5588 	struct rte_flow_action *translated_actions = NULL;
5589 	struct mlx5_flow_tunnel *tunnel;
5590 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
5591 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
5592 	struct mlx5_flow_split_info flow_split_info = {
5593 		.external = !!external,
5594 		.skip_scale = 0,
5595 		.flow_idx = 0,
5596 		.prefix_mark = 0,
5597 		.prefix_layers = 0
5598 	};
5599 	int ret;
5600 
5601 	MLX5_ASSERT(wks);
5602 	rss_desc = &wks->rss_desc;
5603 	ret = flow_shared_actions_translate(dev, original_actions,
5604 					    shared_actions,
5605 					    &shared_actions_n,
5606 					    &translated_actions, error);
5607 	if (ret < 0) {
5608 		MLX5_ASSERT(translated_actions == NULL);
5609 		return 0;
5610 	}
5611 	actions = translated_actions ? translated_actions : original_actions;
5612 	p_actions_rx = actions;
5613 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5614 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
5615 				external, hairpin_flow, error);
5616 	if (ret < 0)
5617 		goto error_before_hairpin_split;
5618 	flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
5619 	if (!flow) {
5620 		rte_errno = ENOMEM;
5621 		goto error_before_hairpin_split;
5622 	}
5623 	if (hairpin_flow > 0) {
5624 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
5625 			rte_errno = EINVAL;
5626 			goto error_before_hairpin_split;
5627 		}
5628 		flow_hairpin_split(dev, actions, actions_rx.actions,
5629 				   actions_hairpin_tx.actions, items_tx.items,
5630 				   idx);
5631 		p_actions_rx = actions_rx.actions;
5632 	}
5633 	flow_split_info.flow_idx = idx;
5634 	flow->drv_type = flow_get_drv_type(dev, attr);
5635 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
5636 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
5637 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
5638 	/* RSS Action only works on NIC RX domain */
5639 	if (attr->ingress && !attr->transfer)
5640 		rss = flow_get_rss_action(p_actions_rx);
5641 	if (rss) {
5642 		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
5643 			return 0;
5644 		/*
5645 		 * The following information is required by
5646 		 * mlx5_flow_hashfields_adjust() in advance.
5647 		 */
5648 		rss_desc->level = rss->level;
5649 		/* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
5650 		rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
5651 	}
5652 	flow->dev_handles = 0;
5653 	if (rss && rss->types) {
5654 		unsigned int graph_root;
5655 
5656 		graph_root = find_graph_root(items, rss->level);
5657 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
5658 					   items, rss->types,
5659 					   mlx5_support_expansion, graph_root);
5660 		MLX5_ASSERT(ret > 0 &&
5661 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
5662 	} else {
5663 		buf->entries = 1;
5664 		buf->entry[0].pattern = (void *)(uintptr_t)items;
5665 	}
5666 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, shared_actions,
5667 						      shared_actions_n);
5668 	for (i = 0; i < buf->entries; ++i) {
5669 		/* Initialize flow split data. */
5670 		flow_split_info.prefix_layers = 0;
5671 		flow_split_info.prefix_mark = 0;
5672 		flow_split_info.skip_scale = 0;
5673 		/*
5674 		 * The splitter may create multiple dev_flows,
5675 		 * depending on configuration. In the simplest
5676 		 * case it just creates unmodified original flow.
5677 		 */
5678 		ret = flow_create_split_outer(dev, flow, attr,
5679 					      buf->entry[i].pattern,
5680 					      p_actions_rx, &flow_split_info,
5681 					      error);
5682 		if (ret < 0)
5683 			goto error;
5684 		if (is_flow_tunnel_steer_rule(dev, attr,
5685 					      buf->entry[i].pattern,
5686 					      p_actions_rx)) {
5687 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
5688 							   p_actions_rx,
5689 							   idx,
5690 							   &default_miss_ctx,
5691 							   error);
5692 			if (ret < 0) {
5693 				mlx5_free(default_miss_ctx.queue);
5694 				goto error;
5695 			}
5696 		}
5697 	}
5698 	/* Create the tx flow. */
5699 	if (hairpin_flow) {
5700 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
5701 		attr_tx.ingress = 0;
5702 		attr_tx.egress = 1;
5703 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
5704 					 actions_hairpin_tx.actions,
5705 					 idx, error);
5706 		if (!dev_flow)
5707 			goto error;
5708 		dev_flow->flow = flow;
5709 		dev_flow->external = 0;
5710 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5711 			      dev_flow->handle, next);
5712 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
5713 					 items_tx.items,
5714 					 actions_hairpin_tx.actions, error);
5715 		if (ret < 0)
5716 			goto error;
5717 	}
5718 	/*
5719 	 * Update the metadata register copy table. If extensive
5720 	 * metadata feature is enabled and registers are supported
5721 	 * we might create the extra rte_flow for each unique
5722 	 * MARK/FLAG action ID.
5723 	 *
5724 	 * The table is updated for ingress Flows only, because
5725 	 * the egress Flows belong to the different device and
5726 	 * copy table should be updated in peer NIC Rx domain.
5727 	 */
5728 	if (attr->ingress &&
5729 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
5730 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
5731 		if (ret)
5732 			goto error;
5733 	}
5734 	/*
5735 	 * If the flow is external (from application) OR device is started,
5736 	 * OR mreg discover, then apply immediately.
5737 	 */
5738 	if (external || dev->data->dev_started ||
5739 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
5740 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
5741 		ret = flow_drv_apply(dev, flow, error);
5742 		if (ret < 0)
5743 			goto error;
5744 	}
5745 	if (list) {
5746 		rte_spinlock_lock(&priv->flow_list_lock);
5747 		ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
5748 			     flow, next);
5749 		rte_spinlock_unlock(&priv->flow_list_lock);
5750 	}
5751 	flow_rxq_flags_set(dev, flow);
5752 	rte_free(translated_actions);
5753 	tunnel = flow_tunnel_from_rule(dev, attr, items, actions);
5754 	if (tunnel) {
5755 		flow->tunnel = 1;
5756 		flow->tunnel_id = tunnel->tunnel_id;
5757 		__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
5758 		mlx5_free(default_miss_ctx.queue);
5759 	}
5760 	mlx5_flow_pop_thread_workspace();
5761 	return idx;
5762 error:
5763 	MLX5_ASSERT(flow);
5764 	ret = rte_errno; /* Save rte_errno before cleanup. */
5765 	flow_mreg_del_copy_action(dev, flow);
5766 	flow_drv_destroy(dev, flow);
5767 	if (rss_desc->shared_rss)
5768 		__atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
5769 			mlx5_ipool_get
5770 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
5771 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
5772 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
5773 	rte_errno = ret; /* Restore rte_errno. */
5774 	ret = rte_errno;
5775 	rte_errno = ret;
5776 	mlx5_flow_pop_thread_workspace();
5777 error_before_hairpin_split:
5778 	rte_free(translated_actions);
5779 	return 0;
5780 }
5781 
5782 /**
5783  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
5784  * incoming packets to table 1.
5785  *
5786  * Other flow rules, requested for group n, will be created in
5787  * e-switch table n+1.
5788  * Jump action to e-switch group n will be created to group n+1.
5789  *
5790  * Used when working in switchdev mode, to utilise advantages of table 1
5791  * and above.
5792  *
5793  * @param dev
5794  *   Pointer to Ethernet device.
5795  *
5796  * @return
5797  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
5798  */
5799 struct rte_flow *
5800 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
5801 {
5802 	const struct rte_flow_attr attr = {
5803 		.group = 0,
5804 		.priority = 0,
5805 		.ingress = 1,
5806 		.egress = 0,
5807 		.transfer = 1,
5808 	};
5809 	const struct rte_flow_item pattern = {
5810 		.type = RTE_FLOW_ITEM_TYPE_END,
5811 	};
5812 	struct rte_flow_action_jump jump = {
5813 		.group = 1,
5814 	};
5815 	const struct rte_flow_action actions[] = {
5816 		{
5817 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
5818 			.conf = &jump,
5819 		},
5820 		{
5821 			.type = RTE_FLOW_ACTION_TYPE_END,
5822 		},
5823 	};
5824 	struct mlx5_priv *priv = dev->data->dev_private;
5825 	struct rte_flow_error error;
5826 
5827 	return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
5828 						   &attr, &pattern,
5829 						   actions, false, &error);
5830 }
5831 
5832 /**
5833  * Validate a flow supported by the NIC.
5834  *
5835  * @see rte_flow_validate()
5836  * @see rte_flow_ops
5837  */
5838 int
5839 mlx5_flow_validate(struct rte_eth_dev *dev,
5840 		   const struct rte_flow_attr *attr,
5841 		   const struct rte_flow_item items[],
5842 		   const struct rte_flow_action original_actions[],
5843 		   struct rte_flow_error *error)
5844 {
5845 	int hairpin_flow;
5846 	struct mlx5_translated_shared_action
5847 		shared_actions[MLX5_MAX_SHARED_ACTIONS];
5848 	int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
5849 	const struct rte_flow_action *actions;
5850 	struct rte_flow_action *translated_actions = NULL;
5851 	int ret = flow_shared_actions_translate(dev, original_actions,
5852 						shared_actions,
5853 						&shared_actions_n,
5854 						&translated_actions, error);
5855 
5856 	if (ret)
5857 		return ret;
5858 	actions = translated_actions ? translated_actions : original_actions;
5859 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5860 	ret = flow_drv_validate(dev, attr, items, actions,
5861 				true, hairpin_flow, error);
5862 	rte_free(translated_actions);
5863 	return ret;
5864 }
5865 
5866 /**
5867  * Create a flow.
5868  *
5869  * @see rte_flow_create()
5870  * @see rte_flow_ops
5871  */
5872 struct rte_flow *
5873 mlx5_flow_create(struct rte_eth_dev *dev,
5874 		 const struct rte_flow_attr *attr,
5875 		 const struct rte_flow_item items[],
5876 		 const struct rte_flow_action actions[],
5877 		 struct rte_flow_error *error)
5878 {
5879 	struct mlx5_priv *priv = dev->data->dev_private;
5880 
5881 	/*
5882 	 * If the device is not started yet, it is not allowed to created a
5883 	 * flow from application. PMD default flows and traffic control flows
5884 	 * are not affected.
5885 	 */
5886 	if (unlikely(!dev->data->dev_started)) {
5887 		DRV_LOG(DEBUG, "port %u is not started when "
5888 			"inserting a flow", dev->data->port_id);
5889 		rte_flow_error_set(error, ENODEV,
5890 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5891 				   NULL,
5892 				   "port not started");
5893 		return NULL;
5894 	}
5895 
5896 	return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
5897 				  attr, items, actions, true, error);
5898 }
5899 
5900 /**
5901  * Destroy a flow in a list.
5902  *
5903  * @param dev
5904  *   Pointer to Ethernet device.
5905  * @param list
5906  *   Pointer to the Indexed flow list. If this parameter NULL,
5907  *   there is no flow removal from the list. Be noted that as
5908  *   flow is add to the indexed list, memory of the indexed
5909  *   list points to maybe changed as flow destroyed.
5910  * @param[in] flow_idx
5911  *   Index of flow to destroy.
5912  */
5913 static void
5914 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
5915 		  uint32_t flow_idx)
5916 {
5917 	struct mlx5_priv *priv = dev->data->dev_private;
5918 	struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5919 					       [MLX5_IPOOL_RTE_FLOW], flow_idx);
5920 
5921 	if (!flow)
5922 		return;
5923 	/*
5924 	 * Update RX queue flags only if port is started, otherwise it is
5925 	 * already clean.
5926 	 */
5927 	if (dev->data->dev_started)
5928 		flow_rxq_flags_trim(dev, flow);
5929 	flow_drv_destroy(dev, flow);
5930 	if (list) {
5931 		rte_spinlock_lock(&priv->flow_list_lock);
5932 		ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
5933 			     flow_idx, flow, next);
5934 		rte_spinlock_unlock(&priv->flow_list_lock);
5935 	}
5936 	if (flow->tunnel) {
5937 		struct mlx5_flow_tunnel *tunnel;
5938 
5939 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
5940 		RTE_VERIFY(tunnel);
5941 		if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
5942 			mlx5_flow_tunnel_free(dev, tunnel);
5943 	}
5944 	flow_mreg_del_copy_action(dev, flow);
5945 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5946 }
5947 
5948 /**
5949  * Destroy all flows.
5950  *
5951  * @param dev
5952  *   Pointer to Ethernet device.
5953  * @param list
5954  *   Pointer to the Indexed flow list.
5955  * @param active
5956  *   If flushing is called avtively.
5957  */
5958 void
5959 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
5960 {
5961 	uint32_t num_flushed = 0;
5962 
5963 	while (*list) {
5964 		flow_list_destroy(dev, list, *list);
5965 		num_flushed++;
5966 	}
5967 	if (active) {
5968 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
5969 			dev->data->port_id, num_flushed);
5970 	}
5971 }
5972 
5973 /**
5974  * Stop all default actions for flows.
5975  *
5976  * @param dev
5977  *   Pointer to Ethernet device.
5978  */
5979 void
5980 mlx5_flow_stop_default(struct rte_eth_dev *dev)
5981 {
5982 	flow_mreg_del_default_copy_action(dev);
5983 	flow_rxq_flags_clear(dev);
5984 }
5985 
5986 /**
5987  * Start all default actions for flows.
5988  *
5989  * @param dev
5990  *   Pointer to Ethernet device.
5991  * @return
5992  *   0 on success, a negative errno value otherwise and rte_errno is set.
5993  */
5994 int
5995 mlx5_flow_start_default(struct rte_eth_dev *dev)
5996 {
5997 	struct rte_flow_error error;
5998 
5999 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
6000 	return flow_mreg_add_default_copy_action(dev, &error);
6001 }
6002 
6003 /**
6004  * Release key of thread specific flow workspace data.
6005  */
6006 void
6007 flow_release_workspace(void *data)
6008 {
6009 	struct mlx5_flow_workspace *wks = data;
6010 	struct mlx5_flow_workspace *next;
6011 
6012 	while (wks) {
6013 		next = wks->next;
6014 		free(wks->rss_desc.queue);
6015 		free(wks);
6016 		wks = next;
6017 	}
6018 }
6019 
6020 /**
6021  * Get thread specific current flow workspace.
6022  *
6023  * @return pointer to thread specific flow workspace data, NULL on error.
6024  */
6025 struct mlx5_flow_workspace*
6026 mlx5_flow_get_thread_workspace(void)
6027 {
6028 	struct mlx5_flow_workspace *data;
6029 
6030 	data = mlx5_flow_os_get_specific_workspace();
6031 	MLX5_ASSERT(data && data->inuse);
6032 	if (!data || !data->inuse)
6033 		DRV_LOG(ERR, "flow workspace not initialized.");
6034 	return data;
6035 }
6036 
6037 /**
6038  * Allocate and init new flow workspace.
6039  *
6040  * @return pointer to flow workspace data, NULL on error.
6041  */
6042 static struct mlx5_flow_workspace*
6043 flow_alloc_thread_workspace(void)
6044 {
6045 	struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
6046 
6047 	if (!data) {
6048 		DRV_LOG(ERR, "Failed to allocate flow workspace "
6049 			"memory.");
6050 		return NULL;
6051 	}
6052 	data->rss_desc.queue = calloc(1,
6053 			sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6054 	if (!data->rss_desc.queue)
6055 		goto err;
6056 	data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
6057 	return data;
6058 err:
6059 	if (data->rss_desc.queue)
6060 		free(data->rss_desc.queue);
6061 	free(data);
6062 	return NULL;
6063 }
6064 
6065 /**
6066  * Get new thread specific flow workspace.
6067  *
6068  * If current workspace inuse, create new one and set as current.
6069  *
6070  * @return pointer to thread specific flow workspace data, NULL on error.
6071  */
6072 static struct mlx5_flow_workspace*
6073 mlx5_flow_push_thread_workspace(void)
6074 {
6075 	struct mlx5_flow_workspace *curr;
6076 	struct mlx5_flow_workspace *data;
6077 
6078 	curr = mlx5_flow_os_get_specific_workspace();
6079 	if (!curr) {
6080 		data = flow_alloc_thread_workspace();
6081 		if (!data)
6082 			return NULL;
6083 	} else if (!curr->inuse) {
6084 		data = curr;
6085 	} else if (curr->next) {
6086 		data = curr->next;
6087 	} else {
6088 		data = flow_alloc_thread_workspace();
6089 		if (!data)
6090 			return NULL;
6091 		curr->next = data;
6092 		data->prev = curr;
6093 	}
6094 	data->inuse = 1;
6095 	data->flow_idx = 0;
6096 	/* Set as current workspace */
6097 	if (mlx5_flow_os_set_specific_workspace(data))
6098 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6099 	return data;
6100 }
6101 
6102 /**
6103  * Close current thread specific flow workspace.
6104  *
6105  * If previous workspace available, set it as current.
6106  *
6107  * @return pointer to thread specific flow workspace data, NULL on error.
6108  */
6109 static void
6110 mlx5_flow_pop_thread_workspace(void)
6111 {
6112 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
6113 
6114 	if (!data)
6115 		return;
6116 	if (!data->inuse) {
6117 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
6118 		return;
6119 	}
6120 	data->inuse = 0;
6121 	if (!data->prev)
6122 		return;
6123 	if (mlx5_flow_os_set_specific_workspace(data->prev))
6124 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6125 }
6126 
6127 /**
6128  * Verify the flow list is empty
6129  *
6130  * @param dev
6131  *  Pointer to Ethernet device.
6132  *
6133  * @return the number of flows not released.
6134  */
6135 int
6136 mlx5_flow_verify(struct rte_eth_dev *dev)
6137 {
6138 	struct mlx5_priv *priv = dev->data->dev_private;
6139 	struct rte_flow *flow;
6140 	uint32_t idx;
6141 	int ret = 0;
6142 
6143 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
6144 		      flow, next) {
6145 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
6146 			dev->data->port_id, (void *)flow);
6147 		++ret;
6148 	}
6149 	return ret;
6150 }
6151 
6152 /**
6153  * Enable default hairpin egress flow.
6154  *
6155  * @param dev
6156  *   Pointer to Ethernet device.
6157  * @param queue
6158  *   The queue index.
6159  *
6160  * @return
6161  *   0 on success, a negative errno value otherwise and rte_errno is set.
6162  */
6163 int
6164 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
6165 			    uint32_t queue)
6166 {
6167 	struct mlx5_priv *priv = dev->data->dev_private;
6168 	const struct rte_flow_attr attr = {
6169 		.egress = 1,
6170 		.priority = 0,
6171 	};
6172 	struct mlx5_rte_flow_item_tx_queue queue_spec = {
6173 		.queue = queue,
6174 	};
6175 	struct mlx5_rte_flow_item_tx_queue queue_mask = {
6176 		.queue = UINT32_MAX,
6177 	};
6178 	struct rte_flow_item items[] = {
6179 		{
6180 			.type = (enum rte_flow_item_type)
6181 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6182 			.spec = &queue_spec,
6183 			.last = NULL,
6184 			.mask = &queue_mask,
6185 		},
6186 		{
6187 			.type = RTE_FLOW_ITEM_TYPE_END,
6188 		},
6189 	};
6190 	struct rte_flow_action_jump jump = {
6191 		.group = MLX5_HAIRPIN_TX_TABLE,
6192 	};
6193 	struct rte_flow_action actions[2];
6194 	uint32_t flow_idx;
6195 	struct rte_flow_error error;
6196 
6197 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
6198 	actions[0].conf = &jump;
6199 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
6200 	flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6201 				&attr, items, actions, false, &error);
6202 	if (!flow_idx) {
6203 		DRV_LOG(DEBUG,
6204 			"Failed to create ctrl flow: rte_errno(%d),"
6205 			" type(%d), message(%s)",
6206 			rte_errno, error.type,
6207 			error.message ? error.message : " (no stated reason)");
6208 		return -rte_errno;
6209 	}
6210 	return 0;
6211 }
6212 
6213 /**
6214  * Enable a control flow configured from the control plane.
6215  *
6216  * @param dev
6217  *   Pointer to Ethernet device.
6218  * @param eth_spec
6219  *   An Ethernet flow spec to apply.
6220  * @param eth_mask
6221  *   An Ethernet flow mask to apply.
6222  * @param vlan_spec
6223  *   A VLAN flow spec to apply.
6224  * @param vlan_mask
6225  *   A VLAN flow mask to apply.
6226  *
6227  * @return
6228  *   0 on success, a negative errno value otherwise and rte_errno is set.
6229  */
6230 int
6231 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
6232 		    struct rte_flow_item_eth *eth_spec,
6233 		    struct rte_flow_item_eth *eth_mask,
6234 		    struct rte_flow_item_vlan *vlan_spec,
6235 		    struct rte_flow_item_vlan *vlan_mask)
6236 {
6237 	struct mlx5_priv *priv = dev->data->dev_private;
6238 	const struct rte_flow_attr attr = {
6239 		.ingress = 1,
6240 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
6241 	};
6242 	struct rte_flow_item items[] = {
6243 		{
6244 			.type = RTE_FLOW_ITEM_TYPE_ETH,
6245 			.spec = eth_spec,
6246 			.last = NULL,
6247 			.mask = eth_mask,
6248 		},
6249 		{
6250 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
6251 					      RTE_FLOW_ITEM_TYPE_END,
6252 			.spec = vlan_spec,
6253 			.last = NULL,
6254 			.mask = vlan_mask,
6255 		},
6256 		{
6257 			.type = RTE_FLOW_ITEM_TYPE_END,
6258 		},
6259 	};
6260 	uint16_t queue[priv->reta_idx_n];
6261 	struct rte_flow_action_rss action_rss = {
6262 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
6263 		.level = 0,
6264 		.types = priv->rss_conf.rss_hf,
6265 		.key_len = priv->rss_conf.rss_key_len,
6266 		.queue_num = priv->reta_idx_n,
6267 		.key = priv->rss_conf.rss_key,
6268 		.queue = queue,
6269 	};
6270 	struct rte_flow_action actions[] = {
6271 		{
6272 			.type = RTE_FLOW_ACTION_TYPE_RSS,
6273 			.conf = &action_rss,
6274 		},
6275 		{
6276 			.type = RTE_FLOW_ACTION_TYPE_END,
6277 		},
6278 	};
6279 	uint32_t flow_idx;
6280 	struct rte_flow_error error;
6281 	unsigned int i;
6282 
6283 	if (!priv->reta_idx_n || !priv->rxqs_n) {
6284 		return 0;
6285 	}
6286 	if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
6287 		action_rss.types = 0;
6288 	for (i = 0; i != priv->reta_idx_n; ++i)
6289 		queue[i] = (*priv->reta_idx)[i];
6290 	flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6291 				&attr, items, actions, false, &error);
6292 	if (!flow_idx)
6293 		return -rte_errno;
6294 	return 0;
6295 }
6296 
6297 /**
6298  * Enable a flow control configured from the control plane.
6299  *
6300  * @param dev
6301  *   Pointer to Ethernet device.
6302  * @param eth_spec
6303  *   An Ethernet flow spec to apply.
6304  * @param eth_mask
6305  *   An Ethernet flow mask to apply.
6306  *
6307  * @return
6308  *   0 on success, a negative errno value otherwise and rte_errno is set.
6309  */
6310 int
6311 mlx5_ctrl_flow(struct rte_eth_dev *dev,
6312 	       struct rte_flow_item_eth *eth_spec,
6313 	       struct rte_flow_item_eth *eth_mask)
6314 {
6315 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
6316 }
6317 
6318 /**
6319  * Create default miss flow rule matching lacp traffic
6320  *
6321  * @param dev
6322  *   Pointer to Ethernet device.
6323  * @param eth_spec
6324  *   An Ethernet flow spec to apply.
6325  *
6326  * @return
6327  *   0 on success, a negative errno value otherwise and rte_errno is set.
6328  */
6329 int
6330 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
6331 {
6332 	struct mlx5_priv *priv = dev->data->dev_private;
6333 	/*
6334 	 * The LACP matching is done by only using ether type since using
6335 	 * a multicast dst mac causes kernel to give low priority to this flow.
6336 	 */
6337 	static const struct rte_flow_item_eth lacp_spec = {
6338 		.type = RTE_BE16(0x8809),
6339 	};
6340 	static const struct rte_flow_item_eth lacp_mask = {
6341 		.type = 0xffff,
6342 	};
6343 	const struct rte_flow_attr attr = {
6344 		.ingress = 1,
6345 	};
6346 	struct rte_flow_item items[] = {
6347 		{
6348 			.type = RTE_FLOW_ITEM_TYPE_ETH,
6349 			.spec = &lacp_spec,
6350 			.mask = &lacp_mask,
6351 		},
6352 		{
6353 			.type = RTE_FLOW_ITEM_TYPE_END,
6354 		},
6355 	};
6356 	struct rte_flow_action actions[] = {
6357 		{
6358 			.type = (enum rte_flow_action_type)
6359 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
6360 		},
6361 		{
6362 			.type = RTE_FLOW_ACTION_TYPE_END,
6363 		},
6364 	};
6365 	struct rte_flow_error error;
6366 	uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6367 				&attr, items, actions, false, &error);
6368 
6369 	if (!flow_idx)
6370 		return -rte_errno;
6371 	return 0;
6372 }
6373 
6374 /**
6375  * Destroy a flow.
6376  *
6377  * @see rte_flow_destroy()
6378  * @see rte_flow_ops
6379  */
6380 int
6381 mlx5_flow_destroy(struct rte_eth_dev *dev,
6382 		  struct rte_flow *flow,
6383 		  struct rte_flow_error *error __rte_unused)
6384 {
6385 	struct mlx5_priv *priv = dev->data->dev_private;
6386 
6387 	flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
6388 	return 0;
6389 }
6390 
6391 /**
6392  * Destroy all flows.
6393  *
6394  * @see rte_flow_flush()
6395  * @see rte_flow_ops
6396  */
6397 int
6398 mlx5_flow_flush(struct rte_eth_dev *dev,
6399 		struct rte_flow_error *error __rte_unused)
6400 {
6401 	struct mlx5_priv *priv = dev->data->dev_private;
6402 
6403 	mlx5_flow_list_flush(dev, &priv->flows, false);
6404 	return 0;
6405 }
6406 
6407 /**
6408  * Isolated mode.
6409  *
6410  * @see rte_flow_isolate()
6411  * @see rte_flow_ops
6412  */
6413 int
6414 mlx5_flow_isolate(struct rte_eth_dev *dev,
6415 		  int enable,
6416 		  struct rte_flow_error *error)
6417 {
6418 	struct mlx5_priv *priv = dev->data->dev_private;
6419 
6420 	if (dev->data->dev_started) {
6421 		rte_flow_error_set(error, EBUSY,
6422 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6423 				   NULL,
6424 				   "port must be stopped first");
6425 		return -rte_errno;
6426 	}
6427 	priv->isolated = !!enable;
6428 	if (enable)
6429 		dev->dev_ops = &mlx5_dev_ops_isolate;
6430 	else
6431 		dev->dev_ops = &mlx5_dev_ops;
6432 
6433 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
6434 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
6435 
6436 	return 0;
6437 }
6438 
6439 /**
6440  * Query a flow.
6441  *
6442  * @see rte_flow_query()
6443  * @see rte_flow_ops
6444  */
6445 static int
6446 flow_drv_query(struct rte_eth_dev *dev,
6447 	       uint32_t flow_idx,
6448 	       const struct rte_flow_action *actions,
6449 	       void *data,
6450 	       struct rte_flow_error *error)
6451 {
6452 	struct mlx5_priv *priv = dev->data->dev_private;
6453 	const struct mlx5_flow_driver_ops *fops;
6454 	struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
6455 					       [MLX5_IPOOL_RTE_FLOW],
6456 					       flow_idx);
6457 	enum mlx5_flow_drv_type ftype;
6458 
6459 	if (!flow) {
6460 		return rte_flow_error_set(error, ENOENT,
6461 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6462 			  NULL,
6463 			  "invalid flow handle");
6464 	}
6465 	ftype = flow->drv_type;
6466 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
6467 	fops = flow_get_drv_ops(ftype);
6468 
6469 	return fops->query(dev, flow, actions, data, error);
6470 }
6471 
6472 /**
6473  * Query a flow.
6474  *
6475  * @see rte_flow_query()
6476  * @see rte_flow_ops
6477  */
6478 int
6479 mlx5_flow_query(struct rte_eth_dev *dev,
6480 		struct rte_flow *flow,
6481 		const struct rte_flow_action *actions,
6482 		void *data,
6483 		struct rte_flow_error *error)
6484 {
6485 	int ret;
6486 
6487 	ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
6488 			     error);
6489 	if (ret < 0)
6490 		return ret;
6491 	return 0;
6492 }
6493 
6494 /**
6495  * Manage filter operations.
6496  *
6497  * @param dev
6498  *   Pointer to Ethernet device structure.
6499  * @param filter_type
6500  *   Filter type.
6501  * @param filter_op
6502  *   Operation to perform.
6503  * @param arg
6504  *   Pointer to operation-specific structure.
6505  *
6506  * @return
6507  *   0 on success, a negative errno value otherwise and rte_errno is set.
6508  */
6509 int
6510 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
6511 		     enum rte_filter_type filter_type,
6512 		     enum rte_filter_op filter_op,
6513 		     void *arg)
6514 {
6515 	switch (filter_type) {
6516 	case RTE_ETH_FILTER_GENERIC:
6517 		if (filter_op != RTE_ETH_FILTER_GET) {
6518 			rte_errno = EINVAL;
6519 			return -rte_errno;
6520 		}
6521 		*(const void **)arg = &mlx5_flow_ops;
6522 		return 0;
6523 	default:
6524 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
6525 			dev->data->port_id, filter_type);
6526 		rte_errno = ENOTSUP;
6527 		return -rte_errno;
6528 	}
6529 	return 0;
6530 }
6531 
6532 /**
6533  * Create the needed meter and suffix tables.
6534  *
6535  * @param[in] dev
6536  *   Pointer to Ethernet device.
6537  * @param[in] fm
6538  *   Pointer to the flow meter.
6539  *
6540  * @return
6541  *   Pointer to table set on success, NULL otherwise.
6542  */
6543 struct mlx5_meter_domains_infos *
6544 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
6545 			  const struct mlx5_flow_meter *fm)
6546 {
6547 	const struct mlx5_flow_driver_ops *fops;
6548 
6549 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6550 	return fops->create_mtr_tbls(dev, fm);
6551 }
6552 
6553 /**
6554  * Destroy the meter table set.
6555  *
6556  * @param[in] dev
6557  *   Pointer to Ethernet device.
6558  * @param[in] tbl
6559  *   Pointer to the meter table set.
6560  *
6561  * @return
6562  *   0 on success.
6563  */
6564 int
6565 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
6566 			   struct mlx5_meter_domains_infos *tbls)
6567 {
6568 	const struct mlx5_flow_driver_ops *fops;
6569 
6570 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6571 	return fops->destroy_mtr_tbls(dev, tbls);
6572 }
6573 
6574 /**
6575  * Create policer rules.
6576  *
6577  * @param[in] dev
6578  *   Pointer to Ethernet device.
6579  * @param[in] fm
6580  *   Pointer to flow meter structure.
6581  * @param[in] attr
6582  *   Pointer to flow attributes.
6583  *
6584  * @return
6585  *   0 on success, -1 otherwise.
6586  */
6587 int
6588 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
6589 			       struct mlx5_flow_meter *fm,
6590 			       const struct rte_flow_attr *attr)
6591 {
6592 	const struct mlx5_flow_driver_ops *fops;
6593 
6594 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6595 	return fops->create_policer_rules(dev, fm, attr);
6596 }
6597 
6598 /**
6599  * Destroy policer rules.
6600  *
6601  * @param[in] fm
6602  *   Pointer to flow meter structure.
6603  * @param[in] attr
6604  *   Pointer to flow attributes.
6605  *
6606  * @return
6607  *   0 on success, -1 otherwise.
6608  */
6609 int
6610 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
6611 				struct mlx5_flow_meter *fm,
6612 				const struct rte_flow_attr *attr)
6613 {
6614 	const struct mlx5_flow_driver_ops *fops;
6615 
6616 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6617 	return fops->destroy_policer_rules(dev, fm, attr);
6618 }
6619 
6620 /**
6621  * Allocate a counter.
6622  *
6623  * @param[in] dev
6624  *   Pointer to Ethernet device structure.
6625  *
6626  * @return
6627  *   Index to allocated counter  on success, 0 otherwise.
6628  */
6629 uint32_t
6630 mlx5_counter_alloc(struct rte_eth_dev *dev)
6631 {
6632 	const struct mlx5_flow_driver_ops *fops;
6633 	struct rte_flow_attr attr = { .transfer = 0 };
6634 
6635 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6636 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6637 		return fops->counter_alloc(dev);
6638 	}
6639 	DRV_LOG(ERR,
6640 		"port %u counter allocate is not supported.",
6641 		 dev->data->port_id);
6642 	return 0;
6643 }
6644 
6645 /**
6646  * Free a counter.
6647  *
6648  * @param[in] dev
6649  *   Pointer to Ethernet device structure.
6650  * @param[in] cnt
6651  *   Index to counter to be free.
6652  */
6653 void
6654 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
6655 {
6656 	const struct mlx5_flow_driver_ops *fops;
6657 	struct rte_flow_attr attr = { .transfer = 0 };
6658 
6659 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6660 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6661 		fops->counter_free(dev, cnt);
6662 		return;
6663 	}
6664 	DRV_LOG(ERR,
6665 		"port %u counter free is not supported.",
6666 		 dev->data->port_id);
6667 }
6668 
6669 /**
6670  * Query counter statistics.
6671  *
6672  * @param[in] dev
6673  *   Pointer to Ethernet device structure.
6674  * @param[in] cnt
6675  *   Index to counter to query.
6676  * @param[in] clear
6677  *   Set to clear counter statistics.
6678  * @param[out] pkts
6679  *   The counter hits packets number to save.
6680  * @param[out] bytes
6681  *   The counter hits bytes number to save.
6682  *
6683  * @return
6684  *   0 on success, a negative errno value otherwise.
6685  */
6686 int
6687 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
6688 		   bool clear, uint64_t *pkts, uint64_t *bytes)
6689 {
6690 	const struct mlx5_flow_driver_ops *fops;
6691 	struct rte_flow_attr attr = { .transfer = 0 };
6692 
6693 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6694 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6695 		return fops->counter_query(dev, cnt, clear, pkts, bytes);
6696 	}
6697 	DRV_LOG(ERR,
6698 		"port %u counter query is not supported.",
6699 		 dev->data->port_id);
6700 	return -ENOTSUP;
6701 }
6702 
6703 /**
6704  * Allocate a new memory for the counter values wrapped by all the needed
6705  * management.
6706  *
6707  * @param[in] sh
6708  *   Pointer to mlx5_dev_ctx_shared object.
6709  *
6710  * @return
6711  *   0 on success, a negative errno value otherwise.
6712  */
6713 static int
6714 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
6715 {
6716 	struct mlx5_devx_mkey_attr mkey_attr;
6717 	struct mlx5_counter_stats_mem_mng *mem_mng;
6718 	volatile struct flow_counter_stats *raw_data;
6719 	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
6720 	int size = (sizeof(struct flow_counter_stats) *
6721 			MLX5_COUNTERS_PER_POOL +
6722 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
6723 			sizeof(struct mlx5_counter_stats_mem_mng);
6724 	size_t pgsize = rte_mem_page_size();
6725 	uint8_t *mem;
6726 	int i;
6727 
6728 	if (pgsize == (size_t)-1) {
6729 		DRV_LOG(ERR, "Failed to get mem page size");
6730 		rte_errno = ENOMEM;
6731 		return -ENOMEM;
6732 	}
6733 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
6734 	if (!mem) {
6735 		rte_errno = ENOMEM;
6736 		return -ENOMEM;
6737 	}
6738 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
6739 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
6740 	mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
6741 						 IBV_ACCESS_LOCAL_WRITE);
6742 	if (!mem_mng->umem) {
6743 		rte_errno = errno;
6744 		mlx5_free(mem);
6745 		return -rte_errno;
6746 	}
6747 	mkey_attr.addr = (uintptr_t)mem;
6748 	mkey_attr.size = size;
6749 	mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
6750 	mkey_attr.pd = sh->pdn;
6751 	mkey_attr.log_entity_size = 0;
6752 	mkey_attr.pg_access = 0;
6753 	mkey_attr.klm_array = NULL;
6754 	mkey_attr.klm_num = 0;
6755 	mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
6756 	mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
6757 	mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
6758 	if (!mem_mng->dm) {
6759 		mlx5_os_umem_dereg(mem_mng->umem);
6760 		rte_errno = errno;
6761 		mlx5_free(mem);
6762 		return -rte_errno;
6763 	}
6764 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
6765 	raw_data = (volatile struct flow_counter_stats *)mem;
6766 	for (i = 0; i < raws_n; ++i) {
6767 		mem_mng->raws[i].mem_mng = mem_mng;
6768 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
6769 	}
6770 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
6771 		LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
6772 				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
6773 				 next);
6774 	LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
6775 	sh->cmng.mem_mng = mem_mng;
6776 	return 0;
6777 }
6778 
6779 /**
6780  * Set the statistic memory to the new counter pool.
6781  *
6782  * @param[in] sh
6783  *   Pointer to mlx5_dev_ctx_shared object.
6784  * @param[in] pool
6785  *   Pointer to the pool to set the statistic memory.
6786  *
6787  * @return
6788  *   0 on success, a negative errno value otherwise.
6789  */
6790 static int
6791 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
6792 			       struct mlx5_flow_counter_pool *pool)
6793 {
6794 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6795 	/* Resize statistic memory once used out. */
6796 	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
6797 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
6798 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
6799 		return -1;
6800 	}
6801 	rte_spinlock_lock(&pool->sl);
6802 	pool->raw = cmng->mem_mng->raws + pool->index %
6803 		    MLX5_CNT_CONTAINER_RESIZE;
6804 	rte_spinlock_unlock(&pool->sl);
6805 	pool->raw_hw = NULL;
6806 	return 0;
6807 }
6808 
6809 #define MLX5_POOL_QUERY_FREQ_US 1000000
6810 
6811 /**
6812  * Set the periodic procedure for triggering asynchronous batch queries for all
6813  * the counter pools.
6814  *
6815  * @param[in] sh
6816  *   Pointer to mlx5_dev_ctx_shared object.
6817  */
6818 void
6819 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
6820 {
6821 	uint32_t pools_n, us;
6822 
6823 	pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
6824 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
6825 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
6826 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
6827 		sh->cmng.query_thread_on = 0;
6828 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
6829 	} else {
6830 		sh->cmng.query_thread_on = 1;
6831 	}
6832 }
6833 
6834 /**
6835  * The periodic procedure for triggering asynchronous batch queries for all the
6836  * counter pools. This function is probably called by the host thread.
6837  *
6838  * @param[in] arg
6839  *   The parameter for the alarm process.
6840  */
6841 void
6842 mlx5_flow_query_alarm(void *arg)
6843 {
6844 	struct mlx5_dev_ctx_shared *sh = arg;
6845 	int ret;
6846 	uint16_t pool_index = sh->cmng.pool_index;
6847 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6848 	struct mlx5_flow_counter_pool *pool;
6849 	uint16_t n_valid;
6850 
6851 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
6852 		goto set_alarm;
6853 	rte_spinlock_lock(&cmng->pool_update_sl);
6854 	pool = cmng->pools[pool_index];
6855 	n_valid = cmng->n_valid;
6856 	rte_spinlock_unlock(&cmng->pool_update_sl);
6857 	/* Set the statistic memory to the new created pool. */
6858 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
6859 		goto set_alarm;
6860 	if (pool->raw_hw)
6861 		/* There is a pool query in progress. */
6862 		goto set_alarm;
6863 	pool->raw_hw =
6864 		LIST_FIRST(&sh->cmng.free_stat_raws);
6865 	if (!pool->raw_hw)
6866 		/* No free counter statistics raw memory. */
6867 		goto set_alarm;
6868 	/*
6869 	 * Identify the counters released between query trigger and query
6870 	 * handle more efficiently. The counter released in this gap period
6871 	 * should wait for a new round of query as the new arrived packets
6872 	 * will not be taken into account.
6873 	 */
6874 	pool->query_gen++;
6875 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
6876 					       MLX5_COUNTERS_PER_POOL,
6877 					       NULL, NULL,
6878 					       pool->raw_hw->mem_mng->dm->id,
6879 					       (void *)(uintptr_t)
6880 					       pool->raw_hw->data,
6881 					       sh->devx_comp,
6882 					       (uint64_t)(uintptr_t)pool);
6883 	if (ret) {
6884 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
6885 			" %d", pool->min_dcs->id);
6886 		pool->raw_hw = NULL;
6887 		goto set_alarm;
6888 	}
6889 	LIST_REMOVE(pool->raw_hw, next);
6890 	sh->cmng.pending_queries++;
6891 	pool_index++;
6892 	if (pool_index >= n_valid)
6893 		pool_index = 0;
6894 set_alarm:
6895 	sh->cmng.pool_index = pool_index;
6896 	mlx5_set_query_alarm(sh);
6897 }
6898 
6899 /**
6900  * Check and callback event for new aged flow in the counter pool
6901  *
6902  * @param[in] sh
6903  *   Pointer to mlx5_dev_ctx_shared object.
6904  * @param[in] pool
6905  *   Pointer to Current counter pool.
6906  */
6907 static void
6908 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6909 		   struct mlx5_flow_counter_pool *pool)
6910 {
6911 	struct mlx5_priv *priv;
6912 	struct mlx5_flow_counter *cnt;
6913 	struct mlx5_age_info *age_info;
6914 	struct mlx5_age_param *age_param;
6915 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6916 	struct mlx5_counter_stats_raw *prev = pool->raw;
6917 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
6918 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
6919 	uint16_t expected = AGE_CANDIDATE;
6920 	uint32_t i;
6921 
6922 	pool->time_of_last_age_check = curr_time;
6923 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6924 		cnt = MLX5_POOL_GET_CNT(pool, i);
6925 		age_param = MLX5_CNT_TO_AGE(cnt);
6926 		if (__atomic_load_n(&age_param->state,
6927 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
6928 			continue;
6929 		if (cur->data[i].hits != prev->data[i].hits) {
6930 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
6931 					 __ATOMIC_RELAXED);
6932 			continue;
6933 		}
6934 		if (__atomic_add_fetch(&age_param->sec_since_last_hit,
6935 				       time_delta,
6936 				       __ATOMIC_RELAXED) <= age_param->timeout)
6937 			continue;
6938 		/**
6939 		 * Hold the lock first, or if between the
6940 		 * state AGE_TMOUT and tailq operation the
6941 		 * release happened, the release procedure
6942 		 * may delete a non-existent tailq node.
6943 		 */
6944 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
6945 		age_info = GET_PORT_AGE_INFO(priv);
6946 		rte_spinlock_lock(&age_info->aged_sl);
6947 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
6948 						AGE_TMOUT, false,
6949 						__ATOMIC_RELAXED,
6950 						__ATOMIC_RELAXED)) {
6951 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6952 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6953 		}
6954 		rte_spinlock_unlock(&age_info->aged_sl);
6955 	}
6956 	mlx5_age_event_prepare(sh);
6957 }
6958 
6959 /**
6960  * Handler for the HW respond about ready values from an asynchronous batch
6961  * query. This function is probably called by the host thread.
6962  *
6963  * @param[in] sh
6964  *   The pointer to the shared device context.
6965  * @param[in] async_id
6966  *   The Devx async ID.
6967  * @param[in] status
6968  *   The status of the completion.
6969  */
6970 void
6971 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6972 				  uint64_t async_id, int status)
6973 {
6974 	struct mlx5_flow_counter_pool *pool =
6975 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6976 	struct mlx5_counter_stats_raw *raw_to_free;
6977 	uint8_t query_gen = pool->query_gen ^ 1;
6978 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6979 	enum mlx5_counter_type cnt_type =
6980 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
6981 				MLX5_COUNTER_TYPE_ORIGIN;
6982 
6983 	if (unlikely(status)) {
6984 		raw_to_free = pool->raw_hw;
6985 	} else {
6986 		raw_to_free = pool->raw;
6987 		if (pool->is_aged)
6988 			mlx5_flow_aging_check(sh, pool);
6989 		rte_spinlock_lock(&pool->sl);
6990 		pool->raw = pool->raw_hw;
6991 		rte_spinlock_unlock(&pool->sl);
6992 		/* Be sure the new raw counters data is updated in memory. */
6993 		rte_io_wmb();
6994 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6995 			rte_spinlock_lock(&cmng->csl[cnt_type]);
6996 			TAILQ_CONCAT(&cmng->counters[cnt_type],
6997 				     &pool->counters[query_gen], next);
6998 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
6999 		}
7000 	}
7001 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
7002 	pool->raw_hw = NULL;
7003 	sh->cmng.pending_queries--;
7004 }
7005 
7006 static int
7007 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
7008 		    const struct flow_grp_info *grp_info,
7009 		    struct rte_flow_error *error)
7010 {
7011 	if (grp_info->transfer && grp_info->external &&
7012 	    grp_info->fdb_def_rule) {
7013 		if (group == UINT32_MAX)
7014 			return rte_flow_error_set
7015 						(error, EINVAL,
7016 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
7017 						 NULL,
7018 						 "group index not supported");
7019 		*table = group + 1;
7020 	} else {
7021 		*table = group;
7022 	}
7023 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
7024 	return 0;
7025 }
7026 
7027 /**
7028  * Translate the rte_flow group index to HW table value.
7029  *
7030  * If tunnel offload is disabled, all group ids converted to flow table
7031  * id using the standard method.
7032  * If tunnel offload is enabled, group id can be converted using the
7033  * standard or tunnel conversion method. Group conversion method
7034  * selection depends on flags in `grp_info` parameter:
7035  * - Internal (grp_info.external == 0) groups conversion uses the
7036  *   standard method.
7037  * - Group ids in JUMP action converted with the tunnel conversion.
7038  * - Group id in rule attribute conversion depends on a rule type and
7039  *   group id value:
7040  *   ** non zero group attributes converted with the tunnel method
7041  *   ** zero group attribute in non-tunnel rule is converted using the
7042  *      standard method - there's only one root table
7043  *   ** zero group attribute in steer tunnel rule is converted with the
7044  *      standard method - single root table
7045  *   ** zero group attribute in match tunnel rule is a special OvS
7046  *      case: that value is used for portability reasons. That group
7047  *      id is converted with the tunnel conversion method.
7048  *
7049  * @param[in] dev
7050  *   Port device
7051  * @param[in] tunnel
7052  *   PMD tunnel offload object
7053  * @param[in] group
7054  *   rte_flow group index value.
7055  * @param[out] table
7056  *   HW table value.
7057  * @param[in] grp_info
7058  *   flags used for conversion
7059  * @param[out] error
7060  *   Pointer to error structure.
7061  *
7062  * @return
7063  *   0 on success, a negative errno value otherwise and rte_errno is set.
7064  */
7065 int
7066 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
7067 			 const struct mlx5_flow_tunnel *tunnel,
7068 			 uint32_t group, uint32_t *table,
7069 			 const struct flow_grp_info *grp_info,
7070 			 struct rte_flow_error *error)
7071 {
7072 	int ret;
7073 	bool standard_translation;
7074 
7075 	if (!grp_info->skip_scale && grp_info->external &&
7076 	    group < MLX5_MAX_TABLES_EXTERNAL)
7077 		group *= MLX5_FLOW_TABLE_FACTOR;
7078 	if (is_tunnel_offload_active(dev)) {
7079 		standard_translation = !grp_info->external ||
7080 					grp_info->std_tbl_fix;
7081 	} else {
7082 		standard_translation = true;
7083 	}
7084 	DRV_LOG(DEBUG,
7085 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
7086 		dev->data->port_id, group, grp_info->transfer,
7087 		grp_info->external, grp_info->fdb_def_rule,
7088 		standard_translation ? "STANDARD" : "TUNNEL");
7089 	if (standard_translation)
7090 		ret = flow_group_to_table(dev->data->port_id, group, table,
7091 					  grp_info, error);
7092 	else
7093 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
7094 						      table, error);
7095 
7096 	return ret;
7097 }
7098 
7099 /**
7100  * Discover availability of metadata reg_c's.
7101  *
7102  * Iteratively use test flows to check availability.
7103  *
7104  * @param[in] dev
7105  *   Pointer to the Ethernet device structure.
7106  *
7107  * @return
7108  *   0 on success, a negative errno value otherwise and rte_errno is set.
7109  */
7110 int
7111 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
7112 {
7113 	struct mlx5_priv *priv = dev->data->dev_private;
7114 	struct mlx5_dev_config *config = &priv->config;
7115 	enum modify_reg idx;
7116 	int n = 0;
7117 
7118 	/* reg_c[0] and reg_c[1] are reserved. */
7119 	config->flow_mreg_c[n++] = REG_C_0;
7120 	config->flow_mreg_c[n++] = REG_C_1;
7121 	/* Discover availability of other reg_c's. */
7122 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
7123 		struct rte_flow_attr attr = {
7124 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
7125 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7126 			.ingress = 1,
7127 		};
7128 		struct rte_flow_item items[] = {
7129 			[0] = {
7130 				.type = RTE_FLOW_ITEM_TYPE_END,
7131 			},
7132 		};
7133 		struct rte_flow_action actions[] = {
7134 			[0] = {
7135 				.type = (enum rte_flow_action_type)
7136 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
7137 				.conf = &(struct mlx5_flow_action_copy_mreg){
7138 					.src = REG_C_1,
7139 					.dst = idx,
7140 				},
7141 			},
7142 			[1] = {
7143 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
7144 				.conf = &(struct rte_flow_action_jump){
7145 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
7146 				},
7147 			},
7148 			[2] = {
7149 				.type = RTE_FLOW_ACTION_TYPE_END,
7150 			},
7151 		};
7152 		uint32_t flow_idx;
7153 		struct rte_flow *flow;
7154 		struct rte_flow_error error;
7155 
7156 		if (!config->dv_flow_en)
7157 			break;
7158 		/* Create internal flow, validation skips copy action. */
7159 		flow_idx = flow_list_create(dev, NULL, &attr, items,
7160 					    actions, false, &error);
7161 		flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
7162 				      flow_idx);
7163 		if (!flow)
7164 			continue;
7165 		config->flow_mreg_c[n++] = idx;
7166 		flow_list_destroy(dev, NULL, flow_idx);
7167 	}
7168 	for (; n < MLX5_MREG_C_NUM; ++n)
7169 		config->flow_mreg_c[n] = REG_NON;
7170 	return 0;
7171 }
7172 
7173 /**
7174  * Dump flow raw hw data to file
7175  *
7176  * @param[in] dev
7177  *    The pointer to Ethernet device.
7178  * @param[in] file
7179  *   A pointer to a file for output.
7180  * @param[out] error
7181  *   Perform verbose error reporting if not NULL. PMDs initialize this
7182  *   structure in case of error only.
7183  * @return
7184  *   0 on success, a nagative value otherwise.
7185  */
7186 int
7187 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
7188 		   FILE *file,
7189 		   struct rte_flow_error *error __rte_unused)
7190 {
7191 	struct mlx5_priv *priv = dev->data->dev_private;
7192 	struct mlx5_dev_ctx_shared *sh = priv->sh;
7193 
7194 	if (!priv->config.dv_flow_en) {
7195 		if (fputs("device dv flow disabled\n", file) <= 0)
7196 			return -errno;
7197 		return -ENOTSUP;
7198 	}
7199 	return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
7200 				       sh->tx_domain, file);
7201 }
7202 
7203 /**
7204  * Get aged-out flows.
7205  *
7206  * @param[in] dev
7207  *   Pointer to the Ethernet device structure.
7208  * @param[in] context
7209  *   The address of an array of pointers to the aged-out flows contexts.
7210  * @param[in] nb_countexts
7211  *   The length of context array pointers.
7212  * @param[out] error
7213  *   Perform verbose error reporting if not NULL. Initialized in case of
7214  *   error only.
7215  *
7216  * @return
7217  *   how many contexts get in success, otherwise negative errno value.
7218  *   if nb_contexts is 0, return the amount of all aged contexts.
7219  *   if nb_contexts is not 0 , return the amount of aged flows reported
7220  *   in the context array.
7221  */
7222 int
7223 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
7224 			uint32_t nb_contexts, struct rte_flow_error *error)
7225 {
7226 	const struct mlx5_flow_driver_ops *fops;
7227 	struct rte_flow_attr attr = { .transfer = 0 };
7228 
7229 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7230 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7231 		return fops->get_aged_flows(dev, contexts, nb_contexts,
7232 						    error);
7233 	}
7234 	DRV_LOG(ERR,
7235 		"port %u get aged flows is not supported.",
7236 		 dev->data->port_id);
7237 	return -ENOTSUP;
7238 }
7239 
7240 /* Wrapper for driver action_validate op callback */
7241 static int
7242 flow_drv_action_validate(struct rte_eth_dev *dev,
7243 			 const struct rte_flow_shared_action_conf *conf,
7244 			 const struct rte_flow_action *action,
7245 			 const struct mlx5_flow_driver_ops *fops,
7246 			 struct rte_flow_error *error)
7247 {
7248 	static const char err_msg[] = "shared action validation unsupported";
7249 
7250 	if (!fops->action_validate) {
7251 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7252 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7253 				   NULL, err_msg);
7254 		return -rte_errno;
7255 	}
7256 	return fops->action_validate(dev, conf, action, error);
7257 }
7258 
7259 /**
7260  * Destroys the shared action by handle.
7261  *
7262  * @param dev
7263  *   Pointer to Ethernet device structure.
7264  * @param[in] action
7265  *   Handle for the shared action to be destroyed.
7266  * @param[out] error
7267  *   Perform verbose error reporting if not NULL. PMDs initialize this
7268  *   structure in case of error only.
7269  *
7270  * @return
7271  *   0 on success, a negative errno value otherwise and rte_errno is set.
7272  *
7273  * @note: wrapper for driver action_create op callback.
7274  */
7275 static int
7276 mlx5_shared_action_destroy(struct rte_eth_dev *dev,
7277 			   struct rte_flow_shared_action *action,
7278 			   struct rte_flow_error *error)
7279 {
7280 	static const char err_msg[] = "shared action destruction unsupported";
7281 	struct rte_flow_attr attr = { .transfer = 0 };
7282 	const struct mlx5_flow_driver_ops *fops =
7283 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7284 
7285 	if (!fops->action_destroy) {
7286 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7287 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7288 				   NULL, err_msg);
7289 		return -rte_errno;
7290 	}
7291 	return fops->action_destroy(dev, action, error);
7292 }
7293 
7294 /* Wrapper for driver action_destroy op callback */
7295 static int
7296 flow_drv_action_update(struct rte_eth_dev *dev,
7297 		       struct rte_flow_shared_action *action,
7298 		       const void *action_conf,
7299 		       const struct mlx5_flow_driver_ops *fops,
7300 		       struct rte_flow_error *error)
7301 {
7302 	static const char err_msg[] = "shared action update unsupported";
7303 
7304 	if (!fops->action_update) {
7305 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7306 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7307 				   NULL, err_msg);
7308 		return -rte_errno;
7309 	}
7310 	return fops->action_update(dev, action, action_conf, error);
7311 }
7312 
7313 /* Wrapper for driver action_destroy op callback */
7314 static int
7315 flow_drv_action_query(struct rte_eth_dev *dev,
7316 		      const struct rte_flow_shared_action *action,
7317 		      void *data,
7318 		      const struct mlx5_flow_driver_ops *fops,
7319 		      struct rte_flow_error *error)
7320 {
7321 	static const char err_msg[] = "shared action query unsupported";
7322 
7323 	if (!fops->action_query) {
7324 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7325 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7326 				   NULL, err_msg);
7327 		return -rte_errno;
7328 	}
7329 	return fops->action_query(dev, action, data, error);
7330 }
7331 
7332 /**
7333  * Create shared action for reuse in multiple flow rules.
7334  *
7335  * @param dev
7336  *   Pointer to Ethernet device structure.
7337  * @param[in] action
7338  *   Action configuration for shared action creation.
7339  * @param[out] error
7340  *   Perform verbose error reporting if not NULL. PMDs initialize this
7341  *   structure in case of error only.
7342  * @return
7343  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
7344  */
7345 static struct rte_flow_shared_action *
7346 mlx5_shared_action_create(struct rte_eth_dev *dev,
7347 			  const struct rte_flow_shared_action_conf *conf,
7348 			  const struct rte_flow_action *action,
7349 			  struct rte_flow_error *error)
7350 {
7351 	static const char err_msg[] = "shared action creation unsupported";
7352 	struct rte_flow_attr attr = { .transfer = 0 };
7353 	const struct mlx5_flow_driver_ops *fops =
7354 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7355 
7356 	if (flow_drv_action_validate(dev, conf, action, fops, error))
7357 		return NULL;
7358 	if (!fops->action_create) {
7359 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7360 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7361 				   NULL, err_msg);
7362 		return NULL;
7363 	}
7364 	return fops->action_create(dev, conf, action, error);
7365 }
7366 
7367 /**
7368  * Updates inplace the shared action configuration pointed by *action* handle
7369  * with the configuration provided as *action* argument.
7370  * The update of the shared action configuration effects all flow rules reusing
7371  * the action via handle.
7372  *
7373  * @param dev
7374  *   Pointer to Ethernet device structure.
7375  * @param[in] shared_action
7376  *   Handle for the shared action to be updated.
7377  * @param[in] action
7378  *   Action specification used to modify the action pointed by handle.
7379  *   *action* should be of same type with the action pointed by the *action*
7380  *   handle argument, otherwise considered as invalid.
7381  * @param[out] error
7382  *   Perform verbose error reporting if not NULL. PMDs initialize this
7383  *   structure in case of error only.
7384  *
7385  * @return
7386  *   0 on success, a negative errno value otherwise and rte_errno is set.
7387  */
7388 static int
7389 mlx5_shared_action_update(struct rte_eth_dev *dev,
7390 		struct rte_flow_shared_action *shared_action,
7391 		const struct rte_flow_action *action,
7392 		struct rte_flow_error *error)
7393 {
7394 	struct rte_flow_attr attr = { .transfer = 0 };
7395 	const struct mlx5_flow_driver_ops *fops =
7396 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7397 	int ret;
7398 
7399 	ret = flow_drv_action_validate(dev, NULL, action, fops, error);
7400 	if (ret)
7401 		return ret;
7402 	return flow_drv_action_update(dev, shared_action, action->conf, fops,
7403 				      error);
7404 }
7405 
7406 /**
7407  * Query the shared action by handle.
7408  *
7409  * This function allows retrieving action-specific data such as counters.
7410  * Data is gathered by special action which may be present/referenced in
7411  * more than one flow rule definition.
7412  *
7413  * \see RTE_FLOW_ACTION_TYPE_COUNT
7414  *
7415  * @param dev
7416  *   Pointer to Ethernet device structure.
7417  * @param[in] action
7418  *   Handle for the shared action to query.
7419  * @param[in, out] data
7420  *   Pointer to storage for the associated query data type.
7421  * @param[out] error
7422  *   Perform verbose error reporting if not NULL. PMDs initialize this
7423  *   structure in case of error only.
7424  *
7425  * @return
7426  *   0 on success, a negative errno value otherwise and rte_errno is set.
7427  */
7428 static int
7429 mlx5_shared_action_query(struct rte_eth_dev *dev,
7430 			 const struct rte_flow_shared_action *action,
7431 			 void *data,
7432 			 struct rte_flow_error *error)
7433 {
7434 	struct rte_flow_attr attr = { .transfer = 0 };
7435 	const struct mlx5_flow_driver_ops *fops =
7436 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7437 
7438 	return flow_drv_action_query(dev, action, data, fops, error);
7439 }
7440 
7441 /**
7442  * Destroy all shared actions.
7443  *
7444  * @param dev
7445  *   Pointer to Ethernet device.
7446  *
7447  * @return
7448  *   0 on success, a negative errno value otherwise and rte_errno is set.
7449  */
7450 int
7451 mlx5_shared_action_flush(struct rte_eth_dev *dev)
7452 {
7453 	struct rte_flow_error error;
7454 	struct mlx5_priv *priv = dev->data->dev_private;
7455 	struct mlx5_shared_action_rss *shared_rss;
7456 	int ret = 0;
7457 	uint32_t idx;
7458 
7459 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7460 		      priv->rss_shared_actions, idx, shared_rss, next) {
7461 		ret |= mlx5_shared_action_destroy(dev,
7462 		       (struct rte_flow_shared_action *)(uintptr_t)idx, &error);
7463 	}
7464 	return ret;
7465 }
7466 
7467 #ifndef HAVE_MLX5DV_DR
7468 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
7469 #else
7470 #define MLX5_DOMAIN_SYNC_FLOW \
7471 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
7472 #endif
7473 
7474 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
7475 {
7476 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
7477 	const struct mlx5_flow_driver_ops *fops;
7478 	int ret;
7479 	struct rte_flow_attr attr = { .transfer = 0 };
7480 
7481 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7482 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
7483 	if (ret > 0)
7484 		ret = -ret;
7485 	return ret;
7486 }
7487 
7488 /**
7489  * tunnel offload functionalilty is defined for DV environment only
7490  */
7491 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7492 __extension__
7493 union tunnel_offload_mark {
7494 	uint32_t val;
7495 	struct {
7496 		uint32_t app_reserve:8;
7497 		uint32_t table_id:15;
7498 		uint32_t transfer:1;
7499 		uint32_t _unused_:8;
7500 	};
7501 };
7502 
7503 static bool
7504 mlx5_access_tunnel_offload_db
7505 	(struct rte_eth_dev *dev,
7506 	 bool (*match)(struct rte_eth_dev *,
7507 		       struct mlx5_flow_tunnel *, const void *),
7508 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
7509 	 void (*miss)(struct rte_eth_dev *, void *),
7510 	 void *ctx, bool lock_op);
7511 
7512 static int
7513 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
7514 			     struct rte_flow *flow,
7515 			     const struct rte_flow_attr *attr,
7516 			     const struct rte_flow_action *app_actions,
7517 			     uint32_t flow_idx,
7518 			     struct tunnel_default_miss_ctx *ctx,
7519 			     struct rte_flow_error *error)
7520 {
7521 	struct mlx5_priv *priv = dev->data->dev_private;
7522 	struct mlx5_flow *dev_flow;
7523 	struct rte_flow_attr miss_attr = *attr;
7524 	const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf;
7525 	const struct rte_flow_item miss_items[2] = {
7526 		{
7527 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7528 			.spec = NULL,
7529 			.last = NULL,
7530 			.mask = NULL
7531 		},
7532 		{
7533 			.type = RTE_FLOW_ITEM_TYPE_END,
7534 			.spec = NULL,
7535 			.last = NULL,
7536 			.mask = NULL
7537 		}
7538 	};
7539 	union tunnel_offload_mark mark_id;
7540 	struct rte_flow_action_mark miss_mark;
7541 	struct rte_flow_action miss_actions[3] = {
7542 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
7543 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
7544 	};
7545 	const struct rte_flow_action_jump *jump_data;
7546 	uint32_t i, flow_table = 0; /* prevent compilation warning */
7547 	struct flow_grp_info grp_info = {
7548 		.external = 1,
7549 		.transfer = attr->transfer,
7550 		.fdb_def_rule = !!priv->fdb_def_rule,
7551 		.std_tbl_fix = 0,
7552 	};
7553 	int ret;
7554 
7555 	if (!attr->transfer) {
7556 		uint32_t q_size;
7557 
7558 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
7559 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
7560 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
7561 					 0, SOCKET_ID_ANY);
7562 		if (!ctx->queue)
7563 			return rte_flow_error_set
7564 				(error, ENOMEM,
7565 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
7566 				NULL, "invalid default miss RSS");
7567 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7568 		ctx->action_rss.level = 0,
7569 		ctx->action_rss.types = priv->rss_conf.rss_hf,
7570 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
7571 		ctx->action_rss.queue_num = priv->reta_idx_n,
7572 		ctx->action_rss.key = priv->rss_conf.rss_key,
7573 		ctx->action_rss.queue = ctx->queue;
7574 		if (!priv->reta_idx_n || !priv->rxqs_n)
7575 			return rte_flow_error_set
7576 				(error, EINVAL,
7577 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
7578 				NULL, "invalid port configuration");
7579 		if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
7580 			ctx->action_rss.types = 0;
7581 		for (i = 0; i != priv->reta_idx_n; ++i)
7582 			ctx->queue[i] = (*priv->reta_idx)[i];
7583 	} else {
7584 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
7585 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
7586 	}
7587 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
7588 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
7589 	jump_data = app_actions->conf;
7590 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
7591 	miss_attr.group = jump_data->group;
7592 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
7593 				       &flow_table, &grp_info, error);
7594 	if (ret)
7595 		return rte_flow_error_set(error, EINVAL,
7596 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
7597 					  NULL, "invalid tunnel id");
7598 	mark_id.app_reserve = 0;
7599 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
7600 	mark_id.transfer = !!attr->transfer;
7601 	mark_id._unused_ = 0;
7602 	miss_mark.id = mark_id.val;
7603 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
7604 				    miss_items, miss_actions, flow_idx, error);
7605 	if (!dev_flow)
7606 		return -rte_errno;
7607 	dev_flow->flow = flow;
7608 	dev_flow->external = true;
7609 	dev_flow->tunnel = tunnel;
7610 	/* Subflow object was created, we must include one in the list. */
7611 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
7612 		      dev_flow->handle, next);
7613 	DRV_LOG(DEBUG,
7614 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
7615 		dev->data->port_id, tunnel->app_tunnel.type,
7616 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
7617 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
7618 				  miss_actions, error);
7619 	if (!ret)
7620 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
7621 						  error);
7622 
7623 	return ret;
7624 }
7625 
7626 static const struct mlx5_flow_tbl_data_entry  *
7627 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
7628 {
7629 	struct mlx5_priv *priv = dev->data->dev_private;
7630 	struct mlx5_dev_ctx_shared *sh = priv->sh;
7631 	struct mlx5_hlist_entry *he;
7632 	union tunnel_offload_mark mbits = { .val = mark };
7633 	union mlx5_flow_tbl_key table_key = {
7634 		{
7635 			.table_id = tunnel_id_to_flow_tbl(mbits.table_id),
7636 			.dummy = 0,
7637 			.domain = !!mbits.transfer,
7638 			.direction = 0,
7639 		}
7640 	};
7641 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL);
7642 	return he ?
7643 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
7644 }
7645 
7646 static void
7647 mlx5_flow_tunnel_grp2tbl_remove_cb(struct mlx5_hlist *list,
7648 				   struct mlx5_hlist_entry *entry)
7649 {
7650 	struct mlx5_dev_ctx_shared *sh = list->ctx;
7651 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
7652 
7653 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
7654 			tunnel_flow_tbl_to_id(tte->flow_table));
7655 	mlx5_free(tte);
7656 }
7657 
7658 static int
7659 mlx5_flow_tunnel_grp2tbl_match_cb(struct mlx5_hlist *list __rte_unused,
7660 				  struct mlx5_hlist_entry *entry,
7661 				  uint64_t key, void *cb_ctx __rte_unused)
7662 {
7663 	union tunnel_tbl_key tbl = {
7664 		.val = key,
7665 	};
7666 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
7667 
7668 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
7669 }
7670 
7671 static struct mlx5_hlist_entry *
7672 mlx5_flow_tunnel_grp2tbl_create_cb(struct mlx5_hlist *list, uint64_t key,
7673 				   void *ctx __rte_unused)
7674 {
7675 	struct mlx5_dev_ctx_shared *sh = list->ctx;
7676 	struct tunnel_tbl_entry *tte;
7677 	union tunnel_tbl_key tbl = {
7678 		.val = key,
7679 	};
7680 
7681 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
7682 			  sizeof(*tte), 0,
7683 			  SOCKET_ID_ANY);
7684 	if (!tte)
7685 		goto err;
7686 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
7687 			  &tte->flow_table);
7688 	if (tte->flow_table >= MLX5_MAX_TABLES) {
7689 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
7690 			tte->flow_table);
7691 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
7692 				tte->flow_table);
7693 		goto err;
7694 	} else if (!tte->flow_table) {
7695 		goto err;
7696 	}
7697 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
7698 	tte->tunnel_id = tbl.tunnel_id;
7699 	tte->group = tbl.group;
7700 	return &tte->hash;
7701 err:
7702 	if (tte)
7703 		mlx5_free(tte);
7704 	return NULL;
7705 }
7706 
7707 static uint32_t
7708 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
7709 				const struct mlx5_flow_tunnel *tunnel,
7710 				uint32_t group, uint32_t *table,
7711 				struct rte_flow_error *error)
7712 {
7713 	struct mlx5_hlist_entry *he;
7714 	struct tunnel_tbl_entry *tte;
7715 	union tunnel_tbl_key key = {
7716 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
7717 		.group = group
7718 	};
7719 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7720 	struct mlx5_hlist *group_hash;
7721 
7722 	group_hash = tunnel ? tunnel->groups : thub->groups;
7723 	he = mlx5_hlist_register(group_hash, key.val, NULL);
7724 	if (!he)
7725 		return rte_flow_error_set(error, EINVAL,
7726 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
7727 					  NULL,
7728 					  "tunnel group index not supported");
7729 	tte = container_of(he, typeof(*tte), hash);
7730 	*table = tte->flow_table;
7731 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
7732 		dev->data->port_id, key.tunnel_id, group, *table);
7733 	return 0;
7734 }
7735 
7736 static void
7737 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
7738 		      struct mlx5_flow_tunnel *tunnel)
7739 {
7740 	struct mlx5_priv *priv = dev->data->dev_private;
7741 	struct mlx5_indexed_pool *ipool;
7742 
7743 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
7744 		dev->data->port_id, tunnel->tunnel_id);
7745 	LIST_REMOVE(tunnel, chain);
7746 	mlx5_hlist_destroy(tunnel->groups);
7747 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
7748 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
7749 }
7750 
7751 static bool
7752 mlx5_access_tunnel_offload_db
7753 	(struct rte_eth_dev *dev,
7754 	 bool (*match)(struct rte_eth_dev *,
7755 		       struct mlx5_flow_tunnel *, const void *),
7756 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
7757 	 void (*miss)(struct rte_eth_dev *, void *),
7758 	 void *ctx, bool lock_op)
7759 {
7760 	bool verdict = false;
7761 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7762 	struct mlx5_flow_tunnel *tunnel;
7763 
7764 	rte_spinlock_lock(&thub->sl);
7765 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
7766 		verdict = match(dev, tunnel, (const void *)ctx);
7767 		if (verdict)
7768 			break;
7769 	}
7770 	if (!lock_op)
7771 		rte_spinlock_unlock(&thub->sl);
7772 	if (verdict && hit)
7773 		hit(dev, tunnel, ctx);
7774 	if (!verdict && miss)
7775 		miss(dev, ctx);
7776 	if (lock_op)
7777 		rte_spinlock_unlock(&thub->sl);
7778 
7779 	return verdict;
7780 }
7781 
7782 struct tunnel_db_find_tunnel_id_ctx {
7783 	uint32_t tunnel_id;
7784 	struct mlx5_flow_tunnel *tunnel;
7785 };
7786 
7787 static bool
7788 find_tunnel_id_match(struct rte_eth_dev *dev,
7789 		     struct mlx5_flow_tunnel *tunnel, const void *x)
7790 {
7791 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
7792 
7793 	RTE_SET_USED(dev);
7794 	return tunnel->tunnel_id == ctx->tunnel_id;
7795 }
7796 
7797 static void
7798 find_tunnel_id_hit(struct rte_eth_dev *dev,
7799 		   struct mlx5_flow_tunnel *tunnel, void *x)
7800 {
7801 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
7802 	RTE_SET_USED(dev);
7803 	ctx->tunnel = tunnel;
7804 }
7805 
7806 static struct mlx5_flow_tunnel *
7807 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
7808 {
7809 	struct tunnel_db_find_tunnel_id_ctx ctx = {
7810 		.tunnel_id = id,
7811 	};
7812 
7813 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
7814 				      find_tunnel_id_hit, NULL, &ctx, true);
7815 
7816 	return ctx.tunnel;
7817 }
7818 
7819 static struct mlx5_flow_tunnel *
7820 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
7821 			  const struct rte_flow_tunnel *app_tunnel)
7822 {
7823 	struct mlx5_priv *priv = dev->data->dev_private;
7824 	struct mlx5_indexed_pool *ipool;
7825 	struct mlx5_flow_tunnel *tunnel;
7826 	uint32_t id;
7827 
7828 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
7829 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
7830 	if (!tunnel)
7831 		return NULL;
7832 	if (id >= MLX5_MAX_TUNNELS) {
7833 		mlx5_ipool_free(ipool, id);
7834 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
7835 		return NULL;
7836 	}
7837 	tunnel->groups = mlx5_hlist_create("tunnel groups", 1024, 0, 0,
7838 					   mlx5_flow_tunnel_grp2tbl_create_cb,
7839 					   mlx5_flow_tunnel_grp2tbl_match_cb,
7840 					   mlx5_flow_tunnel_grp2tbl_remove_cb);
7841 	if (!tunnel->groups) {
7842 		mlx5_ipool_free(ipool, id);
7843 		return NULL;
7844 	}
7845 	tunnel->groups->ctx = priv->sh;
7846 	/* initiate new PMD tunnel */
7847 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
7848 	tunnel->tunnel_id = id;
7849 	tunnel->action.type = (typeof(tunnel->action.type))
7850 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
7851 	tunnel->action.conf = tunnel;
7852 	tunnel->item.type = (typeof(tunnel->item.type))
7853 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
7854 	tunnel->item.spec = tunnel;
7855 	tunnel->item.last = NULL;
7856 	tunnel->item.mask = NULL;
7857 
7858 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
7859 		dev->data->port_id, tunnel->tunnel_id);
7860 
7861 	return tunnel;
7862 }
7863 
7864 struct tunnel_db_get_tunnel_ctx {
7865 	const struct rte_flow_tunnel *app_tunnel;
7866 	struct mlx5_flow_tunnel *tunnel;
7867 };
7868 
7869 static bool get_tunnel_match(struct rte_eth_dev *dev,
7870 			     struct mlx5_flow_tunnel *tunnel, const void *x)
7871 {
7872 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
7873 
7874 	RTE_SET_USED(dev);
7875 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
7876 		       sizeof(*ctx->app_tunnel));
7877 }
7878 
7879 static void get_tunnel_hit(struct rte_eth_dev *dev,
7880 			   struct mlx5_flow_tunnel *tunnel, void *x)
7881 {
7882 	/* called under tunnel spinlock protection */
7883 	struct tunnel_db_get_tunnel_ctx *ctx = x;
7884 
7885 	RTE_SET_USED(dev);
7886 	tunnel->refctn++;
7887 	ctx->tunnel = tunnel;
7888 }
7889 
7890 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
7891 {
7892 	/* called under tunnel spinlock protection */
7893 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7894 	struct tunnel_db_get_tunnel_ctx *ctx = x;
7895 
7896 	rte_spinlock_unlock(&thub->sl);
7897 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
7898 	ctx->tunnel->refctn = 1;
7899 	rte_spinlock_lock(&thub->sl);
7900 	if (ctx->tunnel)
7901 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
7902 }
7903 
7904 
7905 static int
7906 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
7907 		     const struct rte_flow_tunnel *app_tunnel,
7908 		     struct mlx5_flow_tunnel **tunnel)
7909 {
7910 	struct tunnel_db_get_tunnel_ctx ctx = {
7911 		.app_tunnel = app_tunnel,
7912 	};
7913 
7914 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
7915 				      get_tunnel_miss, &ctx, true);
7916 	*tunnel = ctx.tunnel;
7917 	return ctx.tunnel ? 0 : -ENOMEM;
7918 }
7919 
7920 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
7921 {
7922 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
7923 
7924 	if (!thub)
7925 		return;
7926 	if (!LIST_EMPTY(&thub->tunnels))
7927 		DRV_LOG(WARNING, "port %u tunnels present\n", port_id);
7928 	mlx5_hlist_destroy(thub->groups);
7929 	mlx5_free(thub);
7930 }
7931 
7932 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
7933 {
7934 	int err;
7935 	struct mlx5_flow_tunnel_hub *thub;
7936 
7937 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
7938 			   0, SOCKET_ID_ANY);
7939 	if (!thub)
7940 		return -ENOMEM;
7941 	LIST_INIT(&thub->tunnels);
7942 	rte_spinlock_init(&thub->sl);
7943 	thub->groups = mlx5_hlist_create("flow groups",
7944 					 rte_align32pow2(MLX5_MAX_TABLES), 0,
7945 					 0, mlx5_flow_tunnel_grp2tbl_create_cb,
7946 					 mlx5_flow_tunnel_grp2tbl_match_cb,
7947 					 mlx5_flow_tunnel_grp2tbl_remove_cb);
7948 	if (!thub->groups) {
7949 		err = -rte_errno;
7950 		goto err;
7951 	}
7952 	thub->groups->ctx = sh;
7953 	sh->tunnel_hub = thub;
7954 
7955 	return 0;
7956 
7957 err:
7958 	if (thub->groups)
7959 		mlx5_hlist_destroy(thub->groups);
7960 	if (thub)
7961 		mlx5_free(thub);
7962 	return err;
7963 }
7964 
7965 static inline bool
7966 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
7967 			  struct rte_flow_tunnel *tunnel,
7968 			  const char *err_msg)
7969 {
7970 	err_msg = NULL;
7971 	if (!is_tunnel_offload_active(dev)) {
7972 		err_msg = "tunnel offload was not activated";
7973 		goto out;
7974 	} else if (!tunnel) {
7975 		err_msg = "no application tunnel";
7976 		goto out;
7977 	}
7978 
7979 	switch (tunnel->type) {
7980 	default:
7981 		err_msg = "unsupported tunnel type";
7982 		goto out;
7983 	case RTE_FLOW_ITEM_TYPE_VXLAN:
7984 		break;
7985 	}
7986 
7987 out:
7988 	return !err_msg;
7989 }
7990 
7991 static int
7992 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
7993 		    struct rte_flow_tunnel *app_tunnel,
7994 		    struct rte_flow_action **actions,
7995 		    uint32_t *num_of_actions,
7996 		    struct rte_flow_error *error)
7997 {
7998 	int ret;
7999 	struct mlx5_flow_tunnel *tunnel;
8000 	const char *err_msg = NULL;
8001 	bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
8002 
8003 	if (!verdict)
8004 		return rte_flow_error_set(error, EINVAL,
8005 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
8006 					  err_msg);
8007 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
8008 	if (ret < 0) {
8009 		return rte_flow_error_set(error, ret,
8010 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
8011 					  "failed to initialize pmd tunnel");
8012 	}
8013 	*actions = &tunnel->action;
8014 	*num_of_actions = 1;
8015 	return 0;
8016 }
8017 
8018 static int
8019 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
8020 		       struct rte_flow_tunnel *app_tunnel,
8021 		       struct rte_flow_item **items,
8022 		       uint32_t *num_of_items,
8023 		       struct rte_flow_error *error)
8024 {
8025 	int ret;
8026 	struct mlx5_flow_tunnel *tunnel;
8027 	const char *err_msg = NULL;
8028 	bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
8029 
8030 	if (!verdict)
8031 		return rte_flow_error_set(error, EINVAL,
8032 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8033 					  err_msg);
8034 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
8035 	if (ret < 0) {
8036 		return rte_flow_error_set(error, ret,
8037 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8038 					  "failed to initialize pmd tunnel");
8039 	}
8040 	*items = &tunnel->item;
8041 	*num_of_items = 1;
8042 	return 0;
8043 }
8044 
8045 struct tunnel_db_element_release_ctx {
8046 	struct rte_flow_item *items;
8047 	struct rte_flow_action *actions;
8048 	uint32_t num_elements;
8049 	struct rte_flow_error *error;
8050 	int ret;
8051 };
8052 
8053 static bool
8054 tunnel_element_release_match(struct rte_eth_dev *dev,
8055 			     struct mlx5_flow_tunnel *tunnel, const void *x)
8056 {
8057 	const struct tunnel_db_element_release_ctx *ctx = x;
8058 
8059 	RTE_SET_USED(dev);
8060 	if (ctx->num_elements != 1)
8061 		return false;
8062 	else if (ctx->items)
8063 		return ctx->items == &tunnel->item;
8064 	else if (ctx->actions)
8065 		return ctx->actions == &tunnel->action;
8066 
8067 	return false;
8068 }
8069 
8070 static void
8071 tunnel_element_release_hit(struct rte_eth_dev *dev,
8072 			   struct mlx5_flow_tunnel *tunnel, void *x)
8073 {
8074 	struct tunnel_db_element_release_ctx *ctx = x;
8075 	ctx->ret = 0;
8076 	if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
8077 		mlx5_flow_tunnel_free(dev, tunnel);
8078 }
8079 
8080 static void
8081 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
8082 {
8083 	struct tunnel_db_element_release_ctx *ctx = x;
8084 	RTE_SET_USED(dev);
8085 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
8086 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8087 				      "invalid argument");
8088 }
8089 
8090 static int
8091 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
8092 		       struct rte_flow_item *pmd_items,
8093 		       uint32_t num_items, struct rte_flow_error *err)
8094 {
8095 	struct tunnel_db_element_release_ctx ctx = {
8096 		.items = pmd_items,
8097 		.actions = NULL,
8098 		.num_elements = num_items,
8099 		.error = err,
8100 	};
8101 
8102 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
8103 				      tunnel_element_release_hit,
8104 				      tunnel_element_release_miss, &ctx, false);
8105 
8106 	return ctx.ret;
8107 }
8108 
8109 static int
8110 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
8111 			 struct rte_flow_action *pmd_actions,
8112 			 uint32_t num_actions, struct rte_flow_error *err)
8113 {
8114 	struct tunnel_db_element_release_ctx ctx = {
8115 		.items = NULL,
8116 		.actions = pmd_actions,
8117 		.num_elements = num_actions,
8118 		.error = err,
8119 	};
8120 
8121 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
8122 				      tunnel_element_release_hit,
8123 				      tunnel_element_release_miss, &ctx, false);
8124 
8125 	return ctx.ret;
8126 }
8127 
8128 static int
8129 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
8130 				  struct rte_mbuf *m,
8131 				  struct rte_flow_restore_info *info,
8132 				  struct rte_flow_error *err)
8133 {
8134 	uint64_t ol_flags = m->ol_flags;
8135 	const struct mlx5_flow_tbl_data_entry *tble;
8136 	const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID;
8137 
8138 	if (!is_tunnel_offload_active(dev)) {
8139 		info->flags = 0;
8140 		return 0;
8141 	}
8142 
8143 	if ((ol_flags & mask) != mask)
8144 		goto err;
8145 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
8146 	if (!tble) {
8147 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
8148 			dev->data->port_id, m->hash.fdir.hi);
8149 		goto err;
8150 	}
8151 	MLX5_ASSERT(tble->tunnel);
8152 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
8153 	info->group_id = tble->group_id;
8154 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
8155 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
8156 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
8157 
8158 	return 0;
8159 
8160 err:
8161 	return rte_flow_error_set(err, EINVAL,
8162 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8163 				  "failed to get restore info");
8164 }
8165 
8166 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
8167 static int
8168 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
8169 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
8170 			   __rte_unused struct rte_flow_action **actions,
8171 			   __rte_unused uint32_t *num_of_actions,
8172 			   __rte_unused struct rte_flow_error *error)
8173 {
8174 	return -ENOTSUP;
8175 }
8176 
8177 static int
8178 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
8179 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
8180 		       __rte_unused struct rte_flow_item **items,
8181 		       __rte_unused uint32_t *num_of_items,
8182 		       __rte_unused struct rte_flow_error *error)
8183 {
8184 	return -ENOTSUP;
8185 }
8186 
8187 static int
8188 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
8189 			      __rte_unused struct rte_flow_item *pmd_items,
8190 			      __rte_unused uint32_t num_items,
8191 			      __rte_unused struct rte_flow_error *err)
8192 {
8193 	return -ENOTSUP;
8194 }
8195 
8196 static int
8197 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
8198 				__rte_unused struct rte_flow_action *pmd_action,
8199 				__rte_unused uint32_t num_actions,
8200 				__rte_unused struct rte_flow_error *err)
8201 {
8202 	return -ENOTSUP;
8203 }
8204 
8205 static int
8206 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
8207 				  __rte_unused struct rte_mbuf *m,
8208 				  __rte_unused struct rte_flow_restore_info *i,
8209 				  __rte_unused struct rte_flow_error *err)
8210 {
8211 	return -ENOTSUP;
8212 }
8213 
8214 static int
8215 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
8216 			     __rte_unused struct rte_flow *flow,
8217 			     __rte_unused const struct rte_flow_attr *attr,
8218 			     __rte_unused const struct rte_flow_action *actions,
8219 			     __rte_unused uint32_t flow_idx,
8220 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
8221 			     __rte_unused struct rte_flow_error *error)
8222 {
8223 	return -ENOTSUP;
8224 }
8225 
8226 static struct mlx5_flow_tunnel *
8227 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
8228 		    __rte_unused uint32_t id)
8229 {
8230 	return NULL;
8231 }
8232 
8233 static void
8234 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
8235 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
8236 {
8237 }
8238 
8239 static uint32_t
8240 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
8241 				__rte_unused const struct mlx5_flow_tunnel *t,
8242 				__rte_unused uint32_t group,
8243 				__rte_unused uint32_t *table,
8244 				struct rte_flow_error *error)
8245 {
8246 	return rte_flow_error_set(error, ENOTSUP,
8247 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8248 				  "tunnel offload requires DV support");
8249 }
8250 
8251 void
8252 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
8253 			__rte_unused  uint16_t port_id)
8254 {
8255 }
8256 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
8257 
8258