xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision cd346367f898d619edf53f13628d6e539dbcab40)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12 
13 /* Verbs header. */
14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic ignored "-Wpedantic"
17 #endif
18 #include <infiniband/verbs.h>
19 #ifdef PEDANTIC
20 #pragma GCC diagnostic error "-Wpedantic"
21 #endif
22 
23 #include <rte_common.h>
24 #include <rte_ether.h>
25 #include <rte_ethdev_driver.h>
26 #include <rte_flow.h>
27 #include <rte_cycles.h>
28 #include <rte_flow_driver.h>
29 #include <rte_malloc.h>
30 #include <rte_ip.h>
31 
32 #include <mlx5_glue.h>
33 #include <mlx5_devx_cmds.h>
34 #include <mlx5_prm.h>
35 
36 #include "mlx5_defs.h"
37 #include "mlx5.h"
38 #include "mlx5_flow.h"
39 #include "mlx5_rxtx.h"
40 
41 /** Device flow drivers. */
42 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
43 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
44 #endif
45 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
46 
47 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
48 
49 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
50 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
51 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
52 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
53 #endif
54 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
55 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
56 };
57 
58 enum mlx5_expansion {
59 	MLX5_EXPANSION_ROOT,
60 	MLX5_EXPANSION_ROOT_OUTER,
61 	MLX5_EXPANSION_ROOT_ETH_VLAN,
62 	MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
63 	MLX5_EXPANSION_OUTER_ETH,
64 	MLX5_EXPANSION_OUTER_ETH_VLAN,
65 	MLX5_EXPANSION_OUTER_VLAN,
66 	MLX5_EXPANSION_OUTER_IPV4,
67 	MLX5_EXPANSION_OUTER_IPV4_UDP,
68 	MLX5_EXPANSION_OUTER_IPV4_TCP,
69 	MLX5_EXPANSION_OUTER_IPV6,
70 	MLX5_EXPANSION_OUTER_IPV6_UDP,
71 	MLX5_EXPANSION_OUTER_IPV6_TCP,
72 	MLX5_EXPANSION_VXLAN,
73 	MLX5_EXPANSION_VXLAN_GPE,
74 	MLX5_EXPANSION_GRE,
75 	MLX5_EXPANSION_MPLS,
76 	MLX5_EXPANSION_ETH,
77 	MLX5_EXPANSION_ETH_VLAN,
78 	MLX5_EXPANSION_VLAN,
79 	MLX5_EXPANSION_IPV4,
80 	MLX5_EXPANSION_IPV4_UDP,
81 	MLX5_EXPANSION_IPV4_TCP,
82 	MLX5_EXPANSION_IPV6,
83 	MLX5_EXPANSION_IPV6_UDP,
84 	MLX5_EXPANSION_IPV6_TCP,
85 };
86 
87 /** Supported expansion of items. */
88 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
89 	[MLX5_EXPANSION_ROOT] = {
90 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
91 						 MLX5_EXPANSION_IPV4,
92 						 MLX5_EXPANSION_IPV6),
93 		.type = RTE_FLOW_ITEM_TYPE_END,
94 	},
95 	[MLX5_EXPANSION_ROOT_OUTER] = {
96 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
97 						 MLX5_EXPANSION_OUTER_IPV4,
98 						 MLX5_EXPANSION_OUTER_IPV6),
99 		.type = RTE_FLOW_ITEM_TYPE_END,
100 	},
101 	[MLX5_EXPANSION_ROOT_ETH_VLAN] = {
102 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
103 		.type = RTE_FLOW_ITEM_TYPE_END,
104 	},
105 	[MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
106 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
107 		.type = RTE_FLOW_ITEM_TYPE_END,
108 	},
109 	[MLX5_EXPANSION_OUTER_ETH] = {
110 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
111 						 MLX5_EXPANSION_OUTER_IPV6,
112 						 MLX5_EXPANSION_MPLS),
113 		.type = RTE_FLOW_ITEM_TYPE_ETH,
114 		.rss_types = 0,
115 	},
116 	[MLX5_EXPANSION_OUTER_ETH_VLAN] = {
117 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
118 		.type = RTE_FLOW_ITEM_TYPE_ETH,
119 		.rss_types = 0,
120 	},
121 	[MLX5_EXPANSION_OUTER_VLAN] = {
122 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
123 						 MLX5_EXPANSION_OUTER_IPV6),
124 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
125 	},
126 	[MLX5_EXPANSION_OUTER_IPV4] = {
127 		.next = RTE_FLOW_EXPAND_RSS_NEXT
128 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
129 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
130 			 MLX5_EXPANSION_GRE,
131 			 MLX5_EXPANSION_IPV4,
132 			 MLX5_EXPANSION_IPV6),
133 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
134 		.rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
135 			ETH_RSS_NONFRAG_IPV4_OTHER,
136 	},
137 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
138 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
139 						 MLX5_EXPANSION_VXLAN_GPE),
140 		.type = RTE_FLOW_ITEM_TYPE_UDP,
141 		.rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
142 	},
143 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
144 		.type = RTE_FLOW_ITEM_TYPE_TCP,
145 		.rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
146 	},
147 	[MLX5_EXPANSION_OUTER_IPV6] = {
148 		.next = RTE_FLOW_EXPAND_RSS_NEXT
149 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
150 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
151 			 MLX5_EXPANSION_IPV4,
152 			 MLX5_EXPANSION_IPV6),
153 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
154 		.rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
155 			ETH_RSS_NONFRAG_IPV6_OTHER,
156 	},
157 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
158 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
159 						 MLX5_EXPANSION_VXLAN_GPE),
160 		.type = RTE_FLOW_ITEM_TYPE_UDP,
161 		.rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
162 	},
163 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
164 		.type = RTE_FLOW_ITEM_TYPE_TCP,
165 		.rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
166 	},
167 	[MLX5_EXPANSION_VXLAN] = {
168 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
169 						 MLX5_EXPANSION_IPV4,
170 						 MLX5_EXPANSION_IPV6),
171 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
172 	},
173 	[MLX5_EXPANSION_VXLAN_GPE] = {
174 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
175 						 MLX5_EXPANSION_IPV4,
176 						 MLX5_EXPANSION_IPV6),
177 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
178 	},
179 	[MLX5_EXPANSION_GRE] = {
180 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
181 		.type = RTE_FLOW_ITEM_TYPE_GRE,
182 	},
183 	[MLX5_EXPANSION_MPLS] = {
184 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
185 						 MLX5_EXPANSION_IPV6),
186 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
187 	},
188 	[MLX5_EXPANSION_ETH] = {
189 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
190 						 MLX5_EXPANSION_IPV6),
191 		.type = RTE_FLOW_ITEM_TYPE_ETH,
192 	},
193 	[MLX5_EXPANSION_ETH_VLAN] = {
194 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
195 		.type = RTE_FLOW_ITEM_TYPE_ETH,
196 	},
197 	[MLX5_EXPANSION_VLAN] = {
198 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
199 						 MLX5_EXPANSION_IPV6),
200 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
201 	},
202 	[MLX5_EXPANSION_IPV4] = {
203 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
204 						 MLX5_EXPANSION_IPV4_TCP),
205 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
206 		.rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
207 			ETH_RSS_NONFRAG_IPV4_OTHER,
208 	},
209 	[MLX5_EXPANSION_IPV4_UDP] = {
210 		.type = RTE_FLOW_ITEM_TYPE_UDP,
211 		.rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
212 	},
213 	[MLX5_EXPANSION_IPV4_TCP] = {
214 		.type = RTE_FLOW_ITEM_TYPE_TCP,
215 		.rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
216 	},
217 	[MLX5_EXPANSION_IPV6] = {
218 		.next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
219 						 MLX5_EXPANSION_IPV6_TCP),
220 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
221 		.rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
222 			ETH_RSS_NONFRAG_IPV6_OTHER,
223 	},
224 	[MLX5_EXPANSION_IPV6_UDP] = {
225 		.type = RTE_FLOW_ITEM_TYPE_UDP,
226 		.rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
227 	},
228 	[MLX5_EXPANSION_IPV6_TCP] = {
229 		.type = RTE_FLOW_ITEM_TYPE_TCP,
230 		.rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
231 	},
232 };
233 
234 static const struct rte_flow_ops mlx5_flow_ops = {
235 	.validate = mlx5_flow_validate,
236 	.create = mlx5_flow_create,
237 	.destroy = mlx5_flow_destroy,
238 	.flush = mlx5_flow_flush,
239 	.isolate = mlx5_flow_isolate,
240 	.query = mlx5_flow_query,
241 	.dev_dump = mlx5_flow_dev_dump,
242 	.get_aged_flows = mlx5_flow_get_aged_flows,
243 };
244 
245 /* Convert FDIR request to Generic flow. */
246 struct mlx5_fdir {
247 	struct rte_flow_attr attr;
248 	struct rte_flow_item items[4];
249 	struct rte_flow_item_eth l2;
250 	struct rte_flow_item_eth l2_mask;
251 	union {
252 		struct rte_flow_item_ipv4 ipv4;
253 		struct rte_flow_item_ipv6 ipv6;
254 	} l3;
255 	union {
256 		struct rte_flow_item_ipv4 ipv4;
257 		struct rte_flow_item_ipv6 ipv6;
258 	} l3_mask;
259 	union {
260 		struct rte_flow_item_udp udp;
261 		struct rte_flow_item_tcp tcp;
262 	} l4;
263 	union {
264 		struct rte_flow_item_udp udp;
265 		struct rte_flow_item_tcp tcp;
266 	} l4_mask;
267 	struct rte_flow_action actions[2];
268 	struct rte_flow_action_queue queue;
269 };
270 
271 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
272 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
273 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
274 };
275 
276 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
277 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
278 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
279 	{ 9, 10, 11 }, { 12, 13, 14 },
280 };
281 
282 /* Tunnel information. */
283 struct mlx5_flow_tunnel_info {
284 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
285 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
286 };
287 
288 static struct mlx5_flow_tunnel_info tunnels_info[] = {
289 	{
290 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
291 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
292 	},
293 	{
294 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
295 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
296 	},
297 	{
298 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
299 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
300 	},
301 	{
302 		.tunnel = MLX5_FLOW_LAYER_GRE,
303 		.ptype = RTE_PTYPE_TUNNEL_GRE,
304 	},
305 	{
306 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
307 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
308 	},
309 	{
310 		.tunnel = MLX5_FLOW_LAYER_MPLS,
311 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
312 	},
313 	{
314 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
315 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
316 	},
317 	{
318 		.tunnel = MLX5_FLOW_LAYER_IPIP,
319 		.ptype = RTE_PTYPE_TUNNEL_IP,
320 	},
321 	{
322 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
323 		.ptype = RTE_PTYPE_TUNNEL_IP,
324 	},
325 	{
326 		.tunnel = MLX5_FLOW_LAYER_GTP,
327 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
328 	},
329 };
330 
331 /**
332  * Translate tag ID to register.
333  *
334  * @param[in] dev
335  *   Pointer to the Ethernet device structure.
336  * @param[in] feature
337  *   The feature that request the register.
338  * @param[in] id
339  *   The request register ID.
340  * @param[out] error
341  *   Error description in case of any.
342  *
343  * @return
344  *   The request register on success, a negative errno
345  *   value otherwise and rte_errno is set.
346  */
347 int
348 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
349 		     enum mlx5_feature_name feature,
350 		     uint32_t id,
351 		     struct rte_flow_error *error)
352 {
353 	struct mlx5_priv *priv = dev->data->dev_private;
354 	struct mlx5_dev_config *config = &priv->config;
355 	enum modify_reg start_reg;
356 	bool skip_mtr_reg = false;
357 
358 	switch (feature) {
359 	case MLX5_HAIRPIN_RX:
360 		return REG_B;
361 	case MLX5_HAIRPIN_TX:
362 		return REG_A;
363 	case MLX5_METADATA_RX:
364 		switch (config->dv_xmeta_en) {
365 		case MLX5_XMETA_MODE_LEGACY:
366 			return REG_B;
367 		case MLX5_XMETA_MODE_META16:
368 			return REG_C_0;
369 		case MLX5_XMETA_MODE_META32:
370 			return REG_C_1;
371 		}
372 		break;
373 	case MLX5_METADATA_TX:
374 		return REG_A;
375 	case MLX5_METADATA_FDB:
376 		switch (config->dv_xmeta_en) {
377 		case MLX5_XMETA_MODE_LEGACY:
378 			return REG_NONE;
379 		case MLX5_XMETA_MODE_META16:
380 			return REG_C_0;
381 		case MLX5_XMETA_MODE_META32:
382 			return REG_C_1;
383 		}
384 		break;
385 	case MLX5_FLOW_MARK:
386 		switch (config->dv_xmeta_en) {
387 		case MLX5_XMETA_MODE_LEGACY:
388 			return REG_NONE;
389 		case MLX5_XMETA_MODE_META16:
390 			return REG_C_1;
391 		case MLX5_XMETA_MODE_META32:
392 			return REG_C_0;
393 		}
394 		break;
395 	case MLX5_MTR_SFX:
396 		/*
397 		 * If meter color and flow match share one register, flow match
398 		 * should use the meter color register for match.
399 		 */
400 		if (priv->mtr_reg_share)
401 			return priv->mtr_color_reg;
402 		else
403 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
404 			       REG_C_3;
405 	case MLX5_MTR_COLOR:
406 		MLX5_ASSERT(priv->mtr_color_reg != REG_NONE);
407 		return priv->mtr_color_reg;
408 	case MLX5_COPY_MARK:
409 		/*
410 		 * Metadata COPY_MARK register using is in meter suffix sub
411 		 * flow while with meter. It's safe to share the same register.
412 		 */
413 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
414 	case MLX5_APP_TAG:
415 		/*
416 		 * If meter is enable, it will engage the register for color
417 		 * match and flow match. If meter color match is not using the
418 		 * REG_C_2, need to skip the REG_C_x be used by meter color
419 		 * match.
420 		 * If meter is disable, free to use all available registers.
421 		 */
422 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
423 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
424 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
425 		if (id > (REG_C_7 - start_reg))
426 			return rte_flow_error_set(error, EINVAL,
427 						  RTE_FLOW_ERROR_TYPE_ITEM,
428 						  NULL, "invalid tag id");
429 		if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE)
430 			return rte_flow_error_set(error, ENOTSUP,
431 						  RTE_FLOW_ERROR_TYPE_ITEM,
432 						  NULL, "unsupported tag id");
433 		/*
434 		 * This case means meter is using the REG_C_x great than 2.
435 		 * Take care not to conflict with meter color REG_C_x.
436 		 * If the available index REG_C_y >= REG_C_x, skip the
437 		 * color register.
438 		 */
439 		if (skip_mtr_reg && config->flow_mreg_c
440 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
441 			if (id >= (REG_C_7 - start_reg))
442 				return rte_flow_error_set(error, EINVAL,
443 						       RTE_FLOW_ERROR_TYPE_ITEM,
444 							NULL, "invalid tag id");
445 			if (config->flow_mreg_c
446 			    [id + 1 + start_reg - REG_C_0] != REG_NONE)
447 				return config->flow_mreg_c
448 					       [id + 1 + start_reg - REG_C_0];
449 			return rte_flow_error_set(error, ENOTSUP,
450 						  RTE_FLOW_ERROR_TYPE_ITEM,
451 						  NULL, "unsupported tag id");
452 		}
453 		return config->flow_mreg_c[id + start_reg - REG_C_0];
454 	}
455 	MLX5_ASSERT(false);
456 	return rte_flow_error_set(error, EINVAL,
457 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
458 				  NULL, "invalid feature name");
459 }
460 
461 /**
462  * Check extensive flow metadata register support.
463  *
464  * @param dev
465  *   Pointer to rte_eth_dev structure.
466  *
467  * @return
468  *   True if device supports extensive flow metadata register, otherwise false.
469  */
470 bool
471 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
472 {
473 	struct mlx5_priv *priv = dev->data->dev_private;
474 	struct mlx5_dev_config *config = &priv->config;
475 
476 	/*
477 	 * Having available reg_c can be regarded inclusively as supporting
478 	 * extensive flow metadata register, which could mean,
479 	 * - metadata register copy action by modify header.
480 	 * - 16 modify header actions is supported.
481 	 * - reg_c's are preserved across different domain (FDB and NIC) on
482 	 *   packet loopback by flow lookup miss.
483 	 */
484 	return config->flow_mreg_c[2] != REG_NONE;
485 }
486 
487 /**
488  * Discover the maximum number of priority available.
489  *
490  * @param[in] dev
491  *   Pointer to the Ethernet device structure.
492  *
493  * @return
494  *   number of supported flow priority on success, a negative errno
495  *   value otherwise and rte_errno is set.
496  */
497 int
498 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
499 {
500 	struct mlx5_priv *priv = dev->data->dev_private;
501 	struct {
502 		struct ibv_flow_attr attr;
503 		struct ibv_flow_spec_eth eth;
504 		struct ibv_flow_spec_action_drop drop;
505 	} flow_attr = {
506 		.attr = {
507 			.num_of_specs = 2,
508 			.port = (uint8_t)priv->dev_port,
509 		},
510 		.eth = {
511 			.type = IBV_FLOW_SPEC_ETH,
512 			.size = sizeof(struct ibv_flow_spec_eth),
513 		},
514 		.drop = {
515 			.size = sizeof(struct ibv_flow_spec_action_drop),
516 			.type = IBV_FLOW_SPEC_ACTION_DROP,
517 		},
518 	};
519 	struct ibv_flow *flow;
520 	struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
521 	uint16_t vprio[] = { 8, 16 };
522 	int i;
523 	int priority = 0;
524 
525 	if (!drop) {
526 		rte_errno = ENOTSUP;
527 		return -rte_errno;
528 	}
529 	for (i = 0; i != RTE_DIM(vprio); i++) {
530 		flow_attr.attr.priority = vprio[i] - 1;
531 		flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
532 		if (!flow)
533 			break;
534 		claim_zero(mlx5_glue->destroy_flow(flow));
535 		priority = vprio[i];
536 	}
537 	mlx5_hrxq_drop_release(dev);
538 	switch (priority) {
539 	case 8:
540 		priority = RTE_DIM(priority_map_3);
541 		break;
542 	case 16:
543 		priority = RTE_DIM(priority_map_5);
544 		break;
545 	default:
546 		rte_errno = ENOTSUP;
547 		DRV_LOG(ERR,
548 			"port %u verbs maximum priority: %d expected 8/16",
549 			dev->data->port_id, priority);
550 		return -rte_errno;
551 	}
552 	DRV_LOG(INFO, "port %u flow maximum priority: %d",
553 		dev->data->port_id, priority);
554 	return priority;
555 }
556 
557 /**
558  * Adjust flow priority based on the highest layer and the request priority.
559  *
560  * @param[in] dev
561  *   Pointer to the Ethernet device structure.
562  * @param[in] priority
563  *   The rule base priority.
564  * @param[in] subpriority
565  *   The priority based on the items.
566  *
567  * @return
568  *   The new priority.
569  */
570 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
571 				   uint32_t subpriority)
572 {
573 	uint32_t res = 0;
574 	struct mlx5_priv *priv = dev->data->dev_private;
575 
576 	switch (priv->config.flow_prio) {
577 	case RTE_DIM(priority_map_3):
578 		res = priority_map_3[priority][subpriority];
579 		break;
580 	case RTE_DIM(priority_map_5):
581 		res = priority_map_5[priority][subpriority];
582 		break;
583 	}
584 	return  res;
585 }
586 
587 /**
588  * Verify the @p item specifications (spec, last, mask) are compatible with the
589  * NIC capabilities.
590  *
591  * @param[in] item
592  *   Item specification.
593  * @param[in] mask
594  *   @p item->mask or flow default bit-masks.
595  * @param[in] nic_mask
596  *   Bit-masks covering supported fields by the NIC to compare with user mask.
597  * @param[in] size
598  *   Bit-masks size in bytes.
599  * @param[out] error
600  *   Pointer to error structure.
601  *
602  * @return
603  *   0 on success, a negative errno value otherwise and rte_errno is set.
604  */
605 int
606 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
607 			  const uint8_t *mask,
608 			  const uint8_t *nic_mask,
609 			  unsigned int size,
610 			  struct rte_flow_error *error)
611 {
612 	unsigned int i;
613 
614 	MLX5_ASSERT(nic_mask);
615 	for (i = 0; i < size; ++i)
616 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
617 			return rte_flow_error_set(error, ENOTSUP,
618 						  RTE_FLOW_ERROR_TYPE_ITEM,
619 						  item,
620 						  "mask enables non supported"
621 						  " bits");
622 	if (!item->spec && (item->mask || item->last))
623 		return rte_flow_error_set(error, EINVAL,
624 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
625 					  "mask/last without a spec is not"
626 					  " supported");
627 	if (item->spec && item->last) {
628 		uint8_t spec[size];
629 		uint8_t last[size];
630 		unsigned int i;
631 		int ret;
632 
633 		for (i = 0; i < size; ++i) {
634 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
635 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
636 		}
637 		ret = memcmp(spec, last, size);
638 		if (ret != 0)
639 			return rte_flow_error_set(error, EINVAL,
640 						  RTE_FLOW_ERROR_TYPE_ITEM,
641 						  item,
642 						  "range is not valid");
643 	}
644 	return 0;
645 }
646 
647 /**
648  * Adjust the hash fields according to the @p flow information.
649  *
650  * @param[in] dev_flow.
651  *   Pointer to the mlx5_flow.
652  * @param[in] tunnel
653  *   1 when the hash field is for a tunnel item.
654  * @param[in] layer_types
655  *   ETH_RSS_* types.
656  * @param[in] hash_fields
657  *   Item hash fields.
658  *
659  * @return
660  *   The hash fields that should be used.
661  */
662 uint64_t
663 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
664 			    int tunnel __rte_unused, uint64_t layer_types,
665 			    uint64_t hash_fields)
666 {
667 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
668 	int rss_request_inner = rss_desc->level >= 2;
669 
670 	/* Check RSS hash level for tunnel. */
671 	if (tunnel && rss_request_inner)
672 		hash_fields |= IBV_RX_HASH_INNER;
673 	else if (tunnel || rss_request_inner)
674 		return 0;
675 #endif
676 	/* Check if requested layer matches RSS hash fields. */
677 	if (!(rss_desc->types & layer_types))
678 		return 0;
679 	return hash_fields;
680 }
681 
682 /**
683  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
684  * if several tunnel rules are used on this queue, the tunnel ptype will be
685  * cleared.
686  *
687  * @param rxq_ctrl
688  *   Rx queue to update.
689  */
690 static void
691 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
692 {
693 	unsigned int i;
694 	uint32_t tunnel_ptype = 0;
695 
696 	/* Look up for the ptype to use. */
697 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
698 		if (!rxq_ctrl->flow_tunnels_n[i])
699 			continue;
700 		if (!tunnel_ptype) {
701 			tunnel_ptype = tunnels_info[i].ptype;
702 		} else {
703 			tunnel_ptype = 0;
704 			break;
705 		}
706 	}
707 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
708 }
709 
710 /**
711  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
712  * flow.
713  *
714  * @param[in] dev
715  *   Pointer to the Ethernet device structure.
716  * @param[in] dev_handle
717  *   Pointer to device flow handle structure.
718  */
719 static void
720 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
721 		       struct mlx5_flow_handle *dev_handle)
722 {
723 	struct mlx5_priv *priv = dev->data->dev_private;
724 	const int mark = dev_handle->mark;
725 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
726 	struct mlx5_hrxq *hrxq;
727 	unsigned int i;
728 
729 	if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
730 		return;
731 	hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
732 			      dev_handle->rix_hrxq);
733 	if (!hrxq)
734 		return;
735 	for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
736 		int idx = hrxq->ind_table->queues[i];
737 		struct mlx5_rxq_ctrl *rxq_ctrl =
738 			container_of((*priv->rxqs)[idx],
739 				     struct mlx5_rxq_ctrl, rxq);
740 
741 		/*
742 		 * To support metadata register copy on Tx loopback,
743 		 * this must be always enabled (metadata may arive
744 		 * from other port - not from local flows only.
745 		 */
746 		if (priv->config.dv_flow_en &&
747 		    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
748 		    mlx5_flow_ext_mreg_supported(dev)) {
749 			rxq_ctrl->rxq.mark = 1;
750 			rxq_ctrl->flow_mark_n = 1;
751 		} else if (mark) {
752 			rxq_ctrl->rxq.mark = 1;
753 			rxq_ctrl->flow_mark_n++;
754 		}
755 		if (tunnel) {
756 			unsigned int j;
757 
758 			/* Increase the counter matching the flow. */
759 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
760 				if ((tunnels_info[j].tunnel &
761 				     dev_handle->layers) ==
762 				    tunnels_info[j].tunnel) {
763 					rxq_ctrl->flow_tunnels_n[j]++;
764 					break;
765 				}
766 			}
767 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
768 		}
769 	}
770 }
771 
772 /**
773  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
774  *
775  * @param[in] dev
776  *   Pointer to the Ethernet device structure.
777  * @param[in] flow
778  *   Pointer to flow structure.
779  */
780 static void
781 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
782 {
783 	struct mlx5_priv *priv = dev->data->dev_private;
784 	uint32_t handle_idx;
785 	struct mlx5_flow_handle *dev_handle;
786 
787 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
788 		       handle_idx, dev_handle, next)
789 		flow_drv_rxq_flags_set(dev, dev_handle);
790 }
791 
792 /**
793  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
794  * device flow if no other flow uses it with the same kind of request.
795  *
796  * @param dev
797  *   Pointer to Ethernet device.
798  * @param[in] dev_handle
799  *   Pointer to the device flow handle structure.
800  */
801 static void
802 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
803 			struct mlx5_flow_handle *dev_handle)
804 {
805 	struct mlx5_priv *priv = dev->data->dev_private;
806 	const int mark = dev_handle->mark;
807 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
808 	struct mlx5_hrxq *hrxq;
809 	unsigned int i;
810 
811 	if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
812 		return;
813 	hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
814 			      dev_handle->rix_hrxq);
815 	if (!hrxq)
816 		return;
817 	MLX5_ASSERT(dev->data->dev_started);
818 	for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
819 		int idx = hrxq->ind_table->queues[i];
820 		struct mlx5_rxq_ctrl *rxq_ctrl =
821 			container_of((*priv->rxqs)[idx],
822 				     struct mlx5_rxq_ctrl, rxq);
823 
824 		if (priv->config.dv_flow_en &&
825 		    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
826 		    mlx5_flow_ext_mreg_supported(dev)) {
827 			rxq_ctrl->rxq.mark = 1;
828 			rxq_ctrl->flow_mark_n = 1;
829 		} else if (mark) {
830 			rxq_ctrl->flow_mark_n--;
831 			rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
832 		}
833 		if (tunnel) {
834 			unsigned int j;
835 
836 			/* Decrease the counter matching the flow. */
837 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
838 				if ((tunnels_info[j].tunnel &
839 				     dev_handle->layers) ==
840 				    tunnels_info[j].tunnel) {
841 					rxq_ctrl->flow_tunnels_n[j]--;
842 					break;
843 				}
844 			}
845 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
846 		}
847 	}
848 }
849 
850 /**
851  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
852  * @p flow if no other flow uses it with the same kind of request.
853  *
854  * @param dev
855  *   Pointer to Ethernet device.
856  * @param[in] flow
857  *   Pointer to the flow.
858  */
859 static void
860 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
861 {
862 	struct mlx5_priv *priv = dev->data->dev_private;
863 	uint32_t handle_idx;
864 	struct mlx5_flow_handle *dev_handle;
865 
866 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
867 		       handle_idx, dev_handle, next)
868 		flow_drv_rxq_flags_trim(dev, dev_handle);
869 }
870 
871 /**
872  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
873  *
874  * @param dev
875  *   Pointer to Ethernet device.
876  */
877 static void
878 flow_rxq_flags_clear(struct rte_eth_dev *dev)
879 {
880 	struct mlx5_priv *priv = dev->data->dev_private;
881 	unsigned int i;
882 
883 	for (i = 0; i != priv->rxqs_n; ++i) {
884 		struct mlx5_rxq_ctrl *rxq_ctrl;
885 		unsigned int j;
886 
887 		if (!(*priv->rxqs)[i])
888 			continue;
889 		rxq_ctrl = container_of((*priv->rxqs)[i],
890 					struct mlx5_rxq_ctrl, rxq);
891 		rxq_ctrl->flow_mark_n = 0;
892 		rxq_ctrl->rxq.mark = 0;
893 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
894 			rxq_ctrl->flow_tunnels_n[j] = 0;
895 		rxq_ctrl->rxq.tunnel = 0;
896 	}
897 }
898 
899 /**
900  * Set the Rx queue dynamic metadata (mask and offset) for a flow
901  *
902  * @param[in] dev
903  *   Pointer to the Ethernet device structure.
904  */
905 void
906 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
907 {
908 	struct mlx5_priv *priv = dev->data->dev_private;
909 	struct mlx5_rxq_data *data;
910 	unsigned int i;
911 
912 	for (i = 0; i != priv->rxqs_n; ++i) {
913 		if (!(*priv->rxqs)[i])
914 			continue;
915 		data = (*priv->rxqs)[i];
916 		if (!rte_flow_dynf_metadata_avail()) {
917 			data->dynf_meta = 0;
918 			data->flow_meta_mask = 0;
919 			data->flow_meta_offset = -1;
920 		} else {
921 			data->dynf_meta = 1;
922 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
923 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
924 		}
925 	}
926 }
927 
928 /*
929  * return a pointer to the desired action in the list of actions.
930  *
931  * @param[in] actions
932  *   The list of actions to search the action in.
933  * @param[in] action
934  *   The action to find.
935  *
936  * @return
937  *   Pointer to the action in the list, if found. NULL otherwise.
938  */
939 const struct rte_flow_action *
940 mlx5_flow_find_action(const struct rte_flow_action *actions,
941 		      enum rte_flow_action_type action)
942 {
943 	if (actions == NULL)
944 		return NULL;
945 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
946 		if (actions->type == action)
947 			return actions;
948 	return NULL;
949 }
950 
951 /*
952  * Validate the flag action.
953  *
954  * @param[in] action_flags
955  *   Bit-fields that holds the actions detected until now.
956  * @param[in] attr
957  *   Attributes of flow that includes this action.
958  * @param[out] error
959  *   Pointer to error structure.
960  *
961  * @return
962  *   0 on success, a negative errno value otherwise and rte_errno is set.
963  */
964 int
965 mlx5_flow_validate_action_flag(uint64_t action_flags,
966 			       const struct rte_flow_attr *attr,
967 			       struct rte_flow_error *error)
968 {
969 	if (action_flags & MLX5_FLOW_ACTION_MARK)
970 		return rte_flow_error_set(error, EINVAL,
971 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
972 					  "can't mark and flag in same flow");
973 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
974 		return rte_flow_error_set(error, EINVAL,
975 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
976 					  "can't have 2 flag"
977 					  " actions in same flow");
978 	if (attr->egress)
979 		return rte_flow_error_set(error, ENOTSUP,
980 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
981 					  "flag action not supported for "
982 					  "egress");
983 	return 0;
984 }
985 
986 /*
987  * Validate the mark action.
988  *
989  * @param[in] action
990  *   Pointer to the queue action.
991  * @param[in] action_flags
992  *   Bit-fields that holds the actions detected until now.
993  * @param[in] attr
994  *   Attributes of flow that includes this action.
995  * @param[out] error
996  *   Pointer to error structure.
997  *
998  * @return
999  *   0 on success, a negative errno value otherwise and rte_errno is set.
1000  */
1001 int
1002 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1003 			       uint64_t action_flags,
1004 			       const struct rte_flow_attr *attr,
1005 			       struct rte_flow_error *error)
1006 {
1007 	const struct rte_flow_action_mark *mark = action->conf;
1008 
1009 	if (!mark)
1010 		return rte_flow_error_set(error, EINVAL,
1011 					  RTE_FLOW_ERROR_TYPE_ACTION,
1012 					  action,
1013 					  "configuration cannot be null");
1014 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1015 		return rte_flow_error_set(error, EINVAL,
1016 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1017 					  &mark->id,
1018 					  "mark id must in 0 <= id < "
1019 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1020 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1021 		return rte_flow_error_set(error, EINVAL,
1022 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1023 					  "can't flag and mark in same flow");
1024 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1025 		return rte_flow_error_set(error, EINVAL,
1026 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1027 					  "can't have 2 mark actions in same"
1028 					  " flow");
1029 	if (attr->egress)
1030 		return rte_flow_error_set(error, ENOTSUP,
1031 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1032 					  "mark action not supported for "
1033 					  "egress");
1034 	return 0;
1035 }
1036 
1037 /*
1038  * Validate the drop action.
1039  *
1040  * @param[in] action_flags
1041  *   Bit-fields that holds the actions detected until now.
1042  * @param[in] attr
1043  *   Attributes of flow that includes this action.
1044  * @param[out] error
1045  *   Pointer to error structure.
1046  *
1047  * @return
1048  *   0 on success, a negative errno value otherwise and rte_errno is set.
1049  */
1050 int
1051 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1052 			       const struct rte_flow_attr *attr,
1053 			       struct rte_flow_error *error)
1054 {
1055 	if (attr->egress)
1056 		return rte_flow_error_set(error, ENOTSUP,
1057 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1058 					  "drop action not supported for "
1059 					  "egress");
1060 	return 0;
1061 }
1062 
1063 /*
1064  * Validate the queue action.
1065  *
1066  * @param[in] action
1067  *   Pointer to the queue action.
1068  * @param[in] action_flags
1069  *   Bit-fields that holds the actions detected until now.
1070  * @param[in] dev
1071  *   Pointer to the Ethernet device structure.
1072  * @param[in] attr
1073  *   Attributes of flow that includes this action.
1074  * @param[out] error
1075  *   Pointer to error structure.
1076  *
1077  * @return
1078  *   0 on success, a negative errno value otherwise and rte_errno is set.
1079  */
1080 int
1081 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1082 				uint64_t action_flags,
1083 				struct rte_eth_dev *dev,
1084 				const struct rte_flow_attr *attr,
1085 				struct rte_flow_error *error)
1086 {
1087 	struct mlx5_priv *priv = dev->data->dev_private;
1088 	const struct rte_flow_action_queue *queue = action->conf;
1089 
1090 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1091 		return rte_flow_error_set(error, EINVAL,
1092 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1093 					  "can't have 2 fate actions in"
1094 					  " same flow");
1095 	if (!priv->rxqs_n)
1096 		return rte_flow_error_set(error, EINVAL,
1097 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1098 					  NULL, "No Rx queues configured");
1099 	if (queue->index >= priv->rxqs_n)
1100 		return rte_flow_error_set(error, EINVAL,
1101 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1102 					  &queue->index,
1103 					  "queue index out of range");
1104 	if (!(*priv->rxqs)[queue->index])
1105 		return rte_flow_error_set(error, EINVAL,
1106 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1107 					  &queue->index,
1108 					  "queue is not configured");
1109 	if (attr->egress)
1110 		return rte_flow_error_set(error, ENOTSUP,
1111 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1112 					  "queue action not supported for "
1113 					  "egress");
1114 	return 0;
1115 }
1116 
1117 /*
1118  * Validate the rss action.
1119  *
1120  * @param[in] action
1121  *   Pointer to the queue action.
1122  * @param[in] action_flags
1123  *   Bit-fields that holds the actions detected until now.
1124  * @param[in] dev
1125  *   Pointer to the Ethernet device structure.
1126  * @param[in] attr
1127  *   Attributes of flow that includes this action.
1128  * @param[in] item_flags
1129  *   Items that were detected.
1130  * @param[out] error
1131  *   Pointer to error structure.
1132  *
1133  * @return
1134  *   0 on success, a negative errno value otherwise and rte_errno is set.
1135  */
1136 int
1137 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1138 			      uint64_t action_flags,
1139 			      struct rte_eth_dev *dev,
1140 			      const struct rte_flow_attr *attr,
1141 			      uint64_t item_flags,
1142 			      struct rte_flow_error *error)
1143 {
1144 	struct mlx5_priv *priv = dev->data->dev_private;
1145 	const struct rte_flow_action_rss *rss = action->conf;
1146 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1147 	unsigned int i;
1148 
1149 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1150 		return rte_flow_error_set(error, EINVAL,
1151 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1152 					  "can't have 2 fate actions"
1153 					  " in same flow");
1154 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1155 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1156 		return rte_flow_error_set(error, ENOTSUP,
1157 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1158 					  &rss->func,
1159 					  "RSS hash function not supported");
1160 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1161 	if (rss->level > 2)
1162 #else
1163 	if (rss->level > 1)
1164 #endif
1165 		return rte_flow_error_set(error, ENOTSUP,
1166 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1167 					  &rss->level,
1168 					  "tunnel RSS is not supported");
1169 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
1170 	if (rss->key_len == 0 && rss->key != NULL)
1171 		return rte_flow_error_set(error, ENOTSUP,
1172 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1173 					  &rss->key_len,
1174 					  "RSS hash key length 0");
1175 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1176 		return rte_flow_error_set(error, ENOTSUP,
1177 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1178 					  &rss->key_len,
1179 					  "RSS hash key too small");
1180 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1181 		return rte_flow_error_set(error, ENOTSUP,
1182 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1183 					  &rss->key_len,
1184 					  "RSS hash key too large");
1185 	if (rss->queue_num > priv->config.ind_table_max_size)
1186 		return rte_flow_error_set(error, ENOTSUP,
1187 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1188 					  &rss->queue_num,
1189 					  "number of queues too large");
1190 	if (rss->types & MLX5_RSS_HF_MASK)
1191 		return rte_flow_error_set(error, ENOTSUP,
1192 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1193 					  &rss->types,
1194 					  "some RSS protocols are not"
1195 					  " supported");
1196 	if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1197 	    !(rss->types & ETH_RSS_IP))
1198 		return rte_flow_error_set(error, EINVAL,
1199 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1200 					  "L3 partial RSS requested but L3 RSS"
1201 					  " type not specified");
1202 	if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1203 	    !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1204 		return rte_flow_error_set(error, EINVAL,
1205 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1206 					  "L4 partial RSS requested but L4 RSS"
1207 					  " type not specified");
1208 	if (!priv->rxqs_n)
1209 		return rte_flow_error_set(error, EINVAL,
1210 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1211 					  NULL, "No Rx queues configured");
1212 	if (!rss->queue_num)
1213 		return rte_flow_error_set(error, EINVAL,
1214 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1215 					  NULL, "No queues configured");
1216 	for (i = 0; i != rss->queue_num; ++i) {
1217 		if (rss->queue[i] >= priv->rxqs_n)
1218 			return rte_flow_error_set
1219 				(error, EINVAL,
1220 				 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1221 				 &rss->queue[i], "queue index out of range");
1222 		if (!(*priv->rxqs)[rss->queue[i]])
1223 			return rte_flow_error_set
1224 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1225 				 &rss->queue[i], "queue is not configured");
1226 	}
1227 	if (attr->egress)
1228 		return rte_flow_error_set(error, ENOTSUP,
1229 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1230 					  "rss action not supported for "
1231 					  "egress");
1232 	if (rss->level > 1 &&  !tunnel)
1233 		return rte_flow_error_set(error, EINVAL,
1234 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1235 					  "inner RSS is not supported for "
1236 					  "non-tunnel flows");
1237 	return 0;
1238 }
1239 
1240 /*
1241  * Validate the default miss action.
1242  *
1243  * @param[in] action_flags
1244  *   Bit-fields that holds the actions detected until now.
1245  * @param[out] error
1246  *   Pointer to error structure.
1247  *
1248  * @return
1249  *   0 on success, a negative errno value otherwise and rte_errno is set.
1250  */
1251 int
1252 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1253 				const struct rte_flow_attr *attr,
1254 				struct rte_flow_error *error)
1255 {
1256 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1257 		return rte_flow_error_set(error, EINVAL,
1258 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1259 					  "can't have 2 fate actions in"
1260 					  " same flow");
1261 	if (attr->egress)
1262 		return rte_flow_error_set(error, ENOTSUP,
1263 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1264 					  "default miss action not supported "
1265 					  "for egress");
1266 	if (attr->group)
1267 		return rte_flow_error_set(error, ENOTSUP,
1268 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1269 					  "only group 0 is supported");
1270 	if (attr->transfer)
1271 		return rte_flow_error_set(error, ENOTSUP,
1272 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1273 					  NULL, "transfer is not supported");
1274 	return 0;
1275 }
1276 
1277 /*
1278  * Validate the count action.
1279  *
1280  * @param[in] dev
1281  *   Pointer to the Ethernet device structure.
1282  * @param[in] attr
1283  *   Attributes of flow that includes this action.
1284  * @param[out] error
1285  *   Pointer to error structure.
1286  *
1287  * @return
1288  *   0 on success, a negative errno value otherwise and rte_errno is set.
1289  */
1290 int
1291 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1292 				const struct rte_flow_attr *attr,
1293 				struct rte_flow_error *error)
1294 {
1295 	if (attr->egress)
1296 		return rte_flow_error_set(error, ENOTSUP,
1297 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1298 					  "count action not supported for "
1299 					  "egress");
1300 	return 0;
1301 }
1302 
1303 /**
1304  * Verify the @p attributes will be correctly understood by the NIC and store
1305  * them in the @p flow if everything is correct.
1306  *
1307  * @param[in] dev
1308  *   Pointer to the Ethernet device structure.
1309  * @param[in] attributes
1310  *   Pointer to flow attributes
1311  * @param[out] error
1312  *   Pointer to error structure.
1313  *
1314  * @return
1315  *   0 on success, a negative errno value otherwise and rte_errno is set.
1316  */
1317 int
1318 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1319 			      const struct rte_flow_attr *attributes,
1320 			      struct rte_flow_error *error)
1321 {
1322 	struct mlx5_priv *priv = dev->data->dev_private;
1323 	uint32_t priority_max = priv->config.flow_prio - 1;
1324 
1325 	if (attributes->group)
1326 		return rte_flow_error_set(error, ENOTSUP,
1327 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1328 					  NULL, "groups is not supported");
1329 	if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1330 	    attributes->priority >= priority_max)
1331 		return rte_flow_error_set(error, ENOTSUP,
1332 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1333 					  NULL, "priority out of range");
1334 	if (attributes->egress)
1335 		return rte_flow_error_set(error, ENOTSUP,
1336 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1337 					  "egress is not supported");
1338 	if (attributes->transfer && !priv->config.dv_esw_en)
1339 		return rte_flow_error_set(error, ENOTSUP,
1340 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1341 					  NULL, "transfer is not supported");
1342 	if (!attributes->ingress)
1343 		return rte_flow_error_set(error, EINVAL,
1344 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1345 					  NULL,
1346 					  "ingress attribute is mandatory");
1347 	return 0;
1348 }
1349 
1350 /**
1351  * Validate ICMP6 item.
1352  *
1353  * @param[in] item
1354  *   Item specification.
1355  * @param[in] item_flags
1356  *   Bit-fields that holds the items detected until now.
1357  * @param[out] error
1358  *   Pointer to error structure.
1359  *
1360  * @return
1361  *   0 on success, a negative errno value otherwise and rte_errno is set.
1362  */
1363 int
1364 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1365 			       uint64_t item_flags,
1366 			       uint8_t target_protocol,
1367 			       struct rte_flow_error *error)
1368 {
1369 	const struct rte_flow_item_icmp6 *mask = item->mask;
1370 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1371 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1372 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1373 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1374 				      MLX5_FLOW_LAYER_OUTER_L4;
1375 	int ret;
1376 
1377 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1378 		return rte_flow_error_set(error, EINVAL,
1379 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1380 					  "protocol filtering not compatible"
1381 					  " with ICMP6 layer");
1382 	if (!(item_flags & l3m))
1383 		return rte_flow_error_set(error, EINVAL,
1384 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1385 					  "IPv6 is mandatory to filter on"
1386 					  " ICMP6");
1387 	if (item_flags & l4m)
1388 		return rte_flow_error_set(error, EINVAL,
1389 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1390 					  "multiple L4 layers not supported");
1391 	if (!mask)
1392 		mask = &rte_flow_item_icmp6_mask;
1393 	ret = mlx5_flow_item_acceptable
1394 		(item, (const uint8_t *)mask,
1395 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
1396 		 sizeof(struct rte_flow_item_icmp6), error);
1397 	if (ret < 0)
1398 		return ret;
1399 	return 0;
1400 }
1401 
1402 /**
1403  * Validate ICMP item.
1404  *
1405  * @param[in] item
1406  *   Item specification.
1407  * @param[in] item_flags
1408  *   Bit-fields that holds the items detected until now.
1409  * @param[out] error
1410  *   Pointer to error structure.
1411  *
1412  * @return
1413  *   0 on success, a negative errno value otherwise and rte_errno is set.
1414  */
1415 int
1416 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1417 			     uint64_t item_flags,
1418 			     uint8_t target_protocol,
1419 			     struct rte_flow_error *error)
1420 {
1421 	const struct rte_flow_item_icmp *mask = item->mask;
1422 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1423 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1424 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1425 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1426 				      MLX5_FLOW_LAYER_OUTER_L4;
1427 	int ret;
1428 
1429 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1430 		return rte_flow_error_set(error, EINVAL,
1431 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1432 					  "protocol filtering not compatible"
1433 					  " with ICMP layer");
1434 	if (!(item_flags & l3m))
1435 		return rte_flow_error_set(error, EINVAL,
1436 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1437 					  "IPv4 is mandatory to filter"
1438 					  " on ICMP");
1439 	if (item_flags & l4m)
1440 		return rte_flow_error_set(error, EINVAL,
1441 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1442 					  "multiple L4 layers not supported");
1443 	if (!mask)
1444 		mask = &rte_flow_item_icmp_mask;
1445 	ret = mlx5_flow_item_acceptable
1446 		(item, (const uint8_t *)mask,
1447 		 (const uint8_t *)&rte_flow_item_icmp_mask,
1448 		 sizeof(struct rte_flow_item_icmp), error);
1449 	if (ret < 0)
1450 		return ret;
1451 	return 0;
1452 }
1453 
1454 /**
1455  * Validate Ethernet item.
1456  *
1457  * @param[in] item
1458  *   Item specification.
1459  * @param[in] item_flags
1460  *   Bit-fields that holds the items detected until now.
1461  * @param[out] error
1462  *   Pointer to error structure.
1463  *
1464  * @return
1465  *   0 on success, a negative errno value otherwise and rte_errno is set.
1466  */
1467 int
1468 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1469 			    uint64_t item_flags,
1470 			    struct rte_flow_error *error)
1471 {
1472 	const struct rte_flow_item_eth *mask = item->mask;
1473 	const struct rte_flow_item_eth nic_mask = {
1474 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1475 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1476 		.type = RTE_BE16(0xffff),
1477 	};
1478 	int ret;
1479 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1480 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
1481 				       MLX5_FLOW_LAYER_OUTER_L2;
1482 
1483 	if (item_flags & ethm)
1484 		return rte_flow_error_set(error, ENOTSUP,
1485 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1486 					  "multiple L2 layers not supported");
1487 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1488 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1489 		return rte_flow_error_set(error, EINVAL,
1490 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1491 					  "L2 layer should not follow "
1492 					  "L3 layers");
1493 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1494 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1495 		return rte_flow_error_set(error, EINVAL,
1496 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1497 					  "L2 layer should not follow VLAN");
1498 	if (!mask)
1499 		mask = &rte_flow_item_eth_mask;
1500 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1501 					(const uint8_t *)&nic_mask,
1502 					sizeof(struct rte_flow_item_eth),
1503 					error);
1504 	return ret;
1505 }
1506 
1507 /**
1508  * Validate VLAN item.
1509  *
1510  * @param[in] item
1511  *   Item specification.
1512  * @param[in] item_flags
1513  *   Bit-fields that holds the items detected until now.
1514  * @param[in] dev
1515  *   Ethernet device flow is being created on.
1516  * @param[out] error
1517  *   Pointer to error structure.
1518  *
1519  * @return
1520  *   0 on success, a negative errno value otherwise and rte_errno is set.
1521  */
1522 int
1523 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1524 			     uint64_t item_flags,
1525 			     struct rte_eth_dev *dev,
1526 			     struct rte_flow_error *error)
1527 {
1528 	const struct rte_flow_item_vlan *spec = item->spec;
1529 	const struct rte_flow_item_vlan *mask = item->mask;
1530 	const struct rte_flow_item_vlan nic_mask = {
1531 		.tci = RTE_BE16(UINT16_MAX),
1532 		.inner_type = RTE_BE16(UINT16_MAX),
1533 	};
1534 	uint16_t vlan_tag = 0;
1535 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1536 	int ret;
1537 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1538 					MLX5_FLOW_LAYER_INNER_L4) :
1539 				       (MLX5_FLOW_LAYER_OUTER_L3 |
1540 					MLX5_FLOW_LAYER_OUTER_L4);
1541 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1542 					MLX5_FLOW_LAYER_OUTER_VLAN;
1543 
1544 	if (item_flags & vlanm)
1545 		return rte_flow_error_set(error, EINVAL,
1546 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1547 					  "multiple VLAN layers not supported");
1548 	else if ((item_flags & l34m) != 0)
1549 		return rte_flow_error_set(error, EINVAL,
1550 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1551 					  "VLAN cannot follow L3/L4 layer");
1552 	if (!mask)
1553 		mask = &rte_flow_item_vlan_mask;
1554 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1555 					(const uint8_t *)&nic_mask,
1556 					sizeof(struct rte_flow_item_vlan),
1557 					error);
1558 	if (ret)
1559 		return ret;
1560 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1561 		struct mlx5_priv *priv = dev->data->dev_private;
1562 
1563 		if (priv->vmwa_context) {
1564 			/*
1565 			 * Non-NULL context means we have a virtual machine
1566 			 * and SR-IOV enabled, we have to create VLAN interface
1567 			 * to make hypervisor to setup E-Switch vport
1568 			 * context correctly. We avoid creating the multiple
1569 			 * VLAN interfaces, so we cannot support VLAN tag mask.
1570 			 */
1571 			return rte_flow_error_set(error, EINVAL,
1572 						  RTE_FLOW_ERROR_TYPE_ITEM,
1573 						  item,
1574 						  "VLAN tag mask is not"
1575 						  " supported in virtual"
1576 						  " environment");
1577 		}
1578 	}
1579 	if (spec) {
1580 		vlan_tag = spec->tci;
1581 		vlan_tag &= mask->tci;
1582 	}
1583 	/*
1584 	 * From verbs perspective an empty VLAN is equivalent
1585 	 * to a packet without VLAN layer.
1586 	 */
1587 	if (!vlan_tag)
1588 		return rte_flow_error_set(error, EINVAL,
1589 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1590 					  item->spec,
1591 					  "VLAN cannot be empty");
1592 	return 0;
1593 }
1594 
1595 /**
1596  * Validate IPV4 item.
1597  *
1598  * @param[in] item
1599  *   Item specification.
1600  * @param[in] item_flags
1601  *   Bit-fields that holds the items detected until now.
1602  * @param[in] acc_mask
1603  *   Acceptable mask, if NULL default internal default mask
1604  *   will be used to check whether item fields are supported.
1605  * @param[out] error
1606  *   Pointer to error structure.
1607  *
1608  * @return
1609  *   0 on success, a negative errno value otherwise and rte_errno is set.
1610  */
1611 int
1612 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1613 			     uint64_t item_flags,
1614 			     uint64_t last_item,
1615 			     uint16_t ether_type,
1616 			     const struct rte_flow_item_ipv4 *acc_mask,
1617 			     struct rte_flow_error *error)
1618 {
1619 	const struct rte_flow_item_ipv4 *mask = item->mask;
1620 	const struct rte_flow_item_ipv4 *spec = item->spec;
1621 	const struct rte_flow_item_ipv4 nic_mask = {
1622 		.hdr = {
1623 			.src_addr = RTE_BE32(0xffffffff),
1624 			.dst_addr = RTE_BE32(0xffffffff),
1625 			.type_of_service = 0xff,
1626 			.next_proto_id = 0xff,
1627 		},
1628 	};
1629 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1630 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1631 				      MLX5_FLOW_LAYER_OUTER_L3;
1632 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1633 				      MLX5_FLOW_LAYER_OUTER_L4;
1634 	int ret;
1635 	uint8_t next_proto = 0xFF;
1636 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1637 				  MLX5_FLOW_LAYER_OUTER_VLAN |
1638 				  MLX5_FLOW_LAYER_INNER_VLAN);
1639 
1640 	if ((last_item & l2_vlan) && ether_type &&
1641 	    ether_type != RTE_ETHER_TYPE_IPV4)
1642 		return rte_flow_error_set(error, EINVAL,
1643 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1644 					  "IPv4 cannot follow L2/VLAN layer "
1645 					  "which ether type is not IPv4");
1646 	if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1647 		if (mask && spec)
1648 			next_proto = mask->hdr.next_proto_id &
1649 				     spec->hdr.next_proto_id;
1650 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1651 			return rte_flow_error_set(error, EINVAL,
1652 						  RTE_FLOW_ERROR_TYPE_ITEM,
1653 						  item,
1654 						  "multiple tunnel "
1655 						  "not supported");
1656 	}
1657 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1658 		return rte_flow_error_set(error, EINVAL,
1659 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1660 					  "wrong tunnel type - IPv6 specified "
1661 					  "but IPv4 item provided");
1662 	if (item_flags & l3m)
1663 		return rte_flow_error_set(error, ENOTSUP,
1664 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1665 					  "multiple L3 layers not supported");
1666 	else if (item_flags & l4m)
1667 		return rte_flow_error_set(error, EINVAL,
1668 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1669 					  "L3 cannot follow an L4 layer.");
1670 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1671 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1672 		return rte_flow_error_set(error, EINVAL,
1673 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1674 					  "L3 cannot follow an NVGRE layer.");
1675 	if (!mask)
1676 		mask = &rte_flow_item_ipv4_mask;
1677 	else if (mask->hdr.next_proto_id != 0 &&
1678 		 mask->hdr.next_proto_id != 0xff)
1679 		return rte_flow_error_set(error, EINVAL,
1680 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1681 					  "partial mask is not supported"
1682 					  " for protocol");
1683 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1684 					acc_mask ? (const uint8_t *)acc_mask
1685 						 : (const uint8_t *)&nic_mask,
1686 					sizeof(struct rte_flow_item_ipv4),
1687 					error);
1688 	if (ret < 0)
1689 		return ret;
1690 	return 0;
1691 }
1692 
1693 /**
1694  * Validate IPV6 item.
1695  *
1696  * @param[in] item
1697  *   Item specification.
1698  * @param[in] item_flags
1699  *   Bit-fields that holds the items detected until now.
1700  * @param[in] acc_mask
1701  *   Acceptable mask, if NULL default internal default mask
1702  *   will be used to check whether item fields are supported.
1703  * @param[out] error
1704  *   Pointer to error structure.
1705  *
1706  * @return
1707  *   0 on success, a negative errno value otherwise and rte_errno is set.
1708  */
1709 int
1710 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1711 			     uint64_t item_flags,
1712 			     uint64_t last_item,
1713 			     uint16_t ether_type,
1714 			     const struct rte_flow_item_ipv6 *acc_mask,
1715 			     struct rte_flow_error *error)
1716 {
1717 	const struct rte_flow_item_ipv6 *mask = item->mask;
1718 	const struct rte_flow_item_ipv6 *spec = item->spec;
1719 	const struct rte_flow_item_ipv6 nic_mask = {
1720 		.hdr = {
1721 			.src_addr =
1722 				"\xff\xff\xff\xff\xff\xff\xff\xff"
1723 				"\xff\xff\xff\xff\xff\xff\xff\xff",
1724 			.dst_addr =
1725 				"\xff\xff\xff\xff\xff\xff\xff\xff"
1726 				"\xff\xff\xff\xff\xff\xff\xff\xff",
1727 			.vtc_flow = RTE_BE32(0xffffffff),
1728 			.proto = 0xff,
1729 		},
1730 	};
1731 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1732 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1733 				      MLX5_FLOW_LAYER_OUTER_L3;
1734 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1735 				      MLX5_FLOW_LAYER_OUTER_L4;
1736 	int ret;
1737 	uint8_t next_proto = 0xFF;
1738 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1739 				  MLX5_FLOW_LAYER_OUTER_VLAN |
1740 				  MLX5_FLOW_LAYER_INNER_VLAN);
1741 
1742 	if ((last_item & l2_vlan) && ether_type &&
1743 	    ether_type != RTE_ETHER_TYPE_IPV6)
1744 		return rte_flow_error_set(error, EINVAL,
1745 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1746 					  "IPv6 cannot follow L2/VLAN layer "
1747 					  "which ether type is not IPv6");
1748 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1749 		if (mask && spec)
1750 			next_proto = mask->hdr.proto & spec->hdr.proto;
1751 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1752 			return rte_flow_error_set(error, EINVAL,
1753 						  RTE_FLOW_ERROR_TYPE_ITEM,
1754 						  item,
1755 						  "multiple tunnel "
1756 						  "not supported");
1757 	}
1758 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
1759 		return rte_flow_error_set(error, EINVAL,
1760 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1761 					  "wrong tunnel type - IPv4 specified "
1762 					  "but IPv6 item provided");
1763 	if (item_flags & l3m)
1764 		return rte_flow_error_set(error, ENOTSUP,
1765 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1766 					  "multiple L3 layers not supported");
1767 	else if (item_flags & l4m)
1768 		return rte_flow_error_set(error, EINVAL,
1769 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1770 					  "L3 cannot follow an L4 layer.");
1771 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1772 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1773 		return rte_flow_error_set(error, EINVAL,
1774 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1775 					  "L3 cannot follow an NVGRE layer.");
1776 	if (!mask)
1777 		mask = &rte_flow_item_ipv6_mask;
1778 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1779 					acc_mask ? (const uint8_t *)acc_mask
1780 						 : (const uint8_t *)&nic_mask,
1781 					sizeof(struct rte_flow_item_ipv6),
1782 					error);
1783 	if (ret < 0)
1784 		return ret;
1785 	return 0;
1786 }
1787 
1788 /**
1789  * Validate UDP item.
1790  *
1791  * @param[in] item
1792  *   Item specification.
1793  * @param[in] item_flags
1794  *   Bit-fields that holds the items detected until now.
1795  * @param[in] target_protocol
1796  *   The next protocol in the previous item.
1797  * @param[in] flow_mask
1798  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
1799  * @param[out] error
1800  *   Pointer to error structure.
1801  *
1802  * @return
1803  *   0 on success, a negative errno value otherwise and rte_errno is set.
1804  */
1805 int
1806 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
1807 			    uint64_t item_flags,
1808 			    uint8_t target_protocol,
1809 			    struct rte_flow_error *error)
1810 {
1811 	const struct rte_flow_item_udp *mask = item->mask;
1812 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1813 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1814 				      MLX5_FLOW_LAYER_OUTER_L3;
1815 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1816 				      MLX5_FLOW_LAYER_OUTER_L4;
1817 	int ret;
1818 
1819 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
1820 		return rte_flow_error_set(error, EINVAL,
1821 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1822 					  "protocol filtering not compatible"
1823 					  " with UDP layer");
1824 	if (!(item_flags & l3m))
1825 		return rte_flow_error_set(error, EINVAL,
1826 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1827 					  "L3 is mandatory to filter on L4");
1828 	if (item_flags & l4m)
1829 		return rte_flow_error_set(error, EINVAL,
1830 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1831 					  "multiple L4 layers not supported");
1832 	if (!mask)
1833 		mask = &rte_flow_item_udp_mask;
1834 	ret = mlx5_flow_item_acceptable
1835 		(item, (const uint8_t *)mask,
1836 		 (const uint8_t *)&rte_flow_item_udp_mask,
1837 		 sizeof(struct rte_flow_item_udp), error);
1838 	if (ret < 0)
1839 		return ret;
1840 	return 0;
1841 }
1842 
1843 /**
1844  * Validate TCP item.
1845  *
1846  * @param[in] item
1847  *   Item specification.
1848  * @param[in] item_flags
1849  *   Bit-fields that holds the items detected until now.
1850  * @param[in] target_protocol
1851  *   The next protocol in the previous item.
1852  * @param[out] error
1853  *   Pointer to error structure.
1854  *
1855  * @return
1856  *   0 on success, a negative errno value otherwise and rte_errno is set.
1857  */
1858 int
1859 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
1860 			    uint64_t item_flags,
1861 			    uint8_t target_protocol,
1862 			    const struct rte_flow_item_tcp *flow_mask,
1863 			    struct rte_flow_error *error)
1864 {
1865 	const struct rte_flow_item_tcp *mask = item->mask;
1866 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1867 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1868 				      MLX5_FLOW_LAYER_OUTER_L3;
1869 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1870 				      MLX5_FLOW_LAYER_OUTER_L4;
1871 	int ret;
1872 
1873 	MLX5_ASSERT(flow_mask);
1874 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
1875 		return rte_flow_error_set(error, EINVAL,
1876 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1877 					  "protocol filtering not compatible"
1878 					  " with TCP layer");
1879 	if (!(item_flags & l3m))
1880 		return rte_flow_error_set(error, EINVAL,
1881 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1882 					  "L3 is mandatory to filter on L4");
1883 	if (item_flags & l4m)
1884 		return rte_flow_error_set(error, EINVAL,
1885 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1886 					  "multiple L4 layers not supported");
1887 	if (!mask)
1888 		mask = &rte_flow_item_tcp_mask;
1889 	ret = mlx5_flow_item_acceptable
1890 		(item, (const uint8_t *)mask,
1891 		 (const uint8_t *)flow_mask,
1892 		 sizeof(struct rte_flow_item_tcp), error);
1893 	if (ret < 0)
1894 		return ret;
1895 	return 0;
1896 }
1897 
1898 /**
1899  * Validate VXLAN item.
1900  *
1901  * @param[in] item
1902  *   Item specification.
1903  * @param[in] item_flags
1904  *   Bit-fields that holds the items detected until now.
1905  * @param[in] target_protocol
1906  *   The next protocol in the previous item.
1907  * @param[out] error
1908  *   Pointer to error structure.
1909  *
1910  * @return
1911  *   0 on success, a negative errno value otherwise and rte_errno is set.
1912  */
1913 int
1914 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
1915 			      uint64_t item_flags,
1916 			      struct rte_flow_error *error)
1917 {
1918 	const struct rte_flow_item_vxlan *spec = item->spec;
1919 	const struct rte_flow_item_vxlan *mask = item->mask;
1920 	int ret;
1921 	union vni {
1922 		uint32_t vlan_id;
1923 		uint8_t vni[4];
1924 	} id = { .vlan_id = 0, };
1925 
1926 
1927 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1928 		return rte_flow_error_set(error, ENOTSUP,
1929 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1930 					  "multiple tunnel layers not"
1931 					  " supported");
1932 	/*
1933 	 * Verify only UDPv4 is present as defined in
1934 	 * https://tools.ietf.org/html/rfc7348
1935 	 */
1936 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1937 		return rte_flow_error_set(error, EINVAL,
1938 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1939 					  "no outer UDP layer found");
1940 	if (!mask)
1941 		mask = &rte_flow_item_vxlan_mask;
1942 	ret = mlx5_flow_item_acceptable
1943 		(item, (const uint8_t *)mask,
1944 		 (const uint8_t *)&rte_flow_item_vxlan_mask,
1945 		 sizeof(struct rte_flow_item_vxlan),
1946 		 error);
1947 	if (ret < 0)
1948 		return ret;
1949 	if (spec) {
1950 		memcpy(&id.vni[1], spec->vni, 3);
1951 		memcpy(&id.vni[1], mask->vni, 3);
1952 	}
1953 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1954 		return rte_flow_error_set(error, ENOTSUP,
1955 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1956 					  "VXLAN tunnel must be fully defined");
1957 	return 0;
1958 }
1959 
1960 /**
1961  * Validate VXLAN_GPE item.
1962  *
1963  * @param[in] item
1964  *   Item specification.
1965  * @param[in] item_flags
1966  *   Bit-fields that holds the items detected until now.
1967  * @param[in] priv
1968  *   Pointer to the private data structure.
1969  * @param[in] target_protocol
1970  *   The next protocol in the previous item.
1971  * @param[out] error
1972  *   Pointer to error structure.
1973  *
1974  * @return
1975  *   0 on success, a negative errno value otherwise and rte_errno is set.
1976  */
1977 int
1978 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
1979 				  uint64_t item_flags,
1980 				  struct rte_eth_dev *dev,
1981 				  struct rte_flow_error *error)
1982 {
1983 	struct mlx5_priv *priv = dev->data->dev_private;
1984 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1985 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1986 	int ret;
1987 	union vni {
1988 		uint32_t vlan_id;
1989 		uint8_t vni[4];
1990 	} id = { .vlan_id = 0, };
1991 
1992 	if (!priv->config.l3_vxlan_en)
1993 		return rte_flow_error_set(error, ENOTSUP,
1994 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1995 					  "L3 VXLAN is not enabled by device"
1996 					  " parameter and/or not configured in"
1997 					  " firmware");
1998 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1999 		return rte_flow_error_set(error, ENOTSUP,
2000 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2001 					  "multiple tunnel layers not"
2002 					  " supported");
2003 	/*
2004 	 * Verify only UDPv4 is present as defined in
2005 	 * https://tools.ietf.org/html/rfc7348
2006 	 */
2007 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2008 		return rte_flow_error_set(error, EINVAL,
2009 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2010 					  "no outer UDP layer found");
2011 	if (!mask)
2012 		mask = &rte_flow_item_vxlan_gpe_mask;
2013 	ret = mlx5_flow_item_acceptable
2014 		(item, (const uint8_t *)mask,
2015 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2016 		 sizeof(struct rte_flow_item_vxlan_gpe),
2017 		 error);
2018 	if (ret < 0)
2019 		return ret;
2020 	if (spec) {
2021 		if (spec->protocol)
2022 			return rte_flow_error_set(error, ENOTSUP,
2023 						  RTE_FLOW_ERROR_TYPE_ITEM,
2024 						  item,
2025 						  "VxLAN-GPE protocol"
2026 						  " not supported");
2027 		memcpy(&id.vni[1], spec->vni, 3);
2028 		memcpy(&id.vni[1], mask->vni, 3);
2029 	}
2030 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2031 		return rte_flow_error_set(error, ENOTSUP,
2032 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2033 					  "VXLAN-GPE tunnel must be fully"
2034 					  " defined");
2035 	return 0;
2036 }
2037 /**
2038  * Validate GRE Key item.
2039  *
2040  * @param[in] item
2041  *   Item specification.
2042  * @param[in] item_flags
2043  *   Bit flags to mark detected items.
2044  * @param[in] gre_item
2045  *   Pointer to gre_item
2046  * @param[out] error
2047  *   Pointer to error structure.
2048  *
2049  * @return
2050  *   0 on success, a negative errno value otherwise and rte_errno is set.
2051  */
2052 int
2053 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2054 				uint64_t item_flags,
2055 				const struct rte_flow_item *gre_item,
2056 				struct rte_flow_error *error)
2057 {
2058 	const rte_be32_t *mask = item->mask;
2059 	int ret = 0;
2060 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2061 	const struct rte_flow_item_gre *gre_spec;
2062 	const struct rte_flow_item_gre *gre_mask;
2063 
2064 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2065 		return rte_flow_error_set(error, ENOTSUP,
2066 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2067 					  "Multiple GRE key not support");
2068 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2069 		return rte_flow_error_set(error, ENOTSUP,
2070 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2071 					  "No preceding GRE header");
2072 	if (item_flags & MLX5_FLOW_LAYER_INNER)
2073 		return rte_flow_error_set(error, ENOTSUP,
2074 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2075 					  "GRE key following a wrong item");
2076 	gre_mask = gre_item->mask;
2077 	if (!gre_mask)
2078 		gre_mask = &rte_flow_item_gre_mask;
2079 	gre_spec = gre_item->spec;
2080 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2081 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2082 		return rte_flow_error_set(error, EINVAL,
2083 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2084 					  "Key bit must be on");
2085 
2086 	if (!mask)
2087 		mask = &gre_key_default_mask;
2088 	ret = mlx5_flow_item_acceptable
2089 		(item, (const uint8_t *)mask,
2090 		 (const uint8_t *)&gre_key_default_mask,
2091 		 sizeof(rte_be32_t), error);
2092 	return ret;
2093 }
2094 
2095 /**
2096  * Validate GRE item.
2097  *
2098  * @param[in] item
2099  *   Item specification.
2100  * @param[in] item_flags
2101  *   Bit flags to mark detected items.
2102  * @param[in] target_protocol
2103  *   The next protocol in the previous item.
2104  * @param[out] error
2105  *   Pointer to error structure.
2106  *
2107  * @return
2108  *   0 on success, a negative errno value otherwise and rte_errno is set.
2109  */
2110 int
2111 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2112 			    uint64_t item_flags,
2113 			    uint8_t target_protocol,
2114 			    struct rte_flow_error *error)
2115 {
2116 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2117 	const struct rte_flow_item_gre *mask = item->mask;
2118 	int ret;
2119 	const struct rte_flow_item_gre nic_mask = {
2120 		.c_rsvd0_ver = RTE_BE16(0xB000),
2121 		.protocol = RTE_BE16(UINT16_MAX),
2122 	};
2123 
2124 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2125 		return rte_flow_error_set(error, EINVAL,
2126 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2127 					  "protocol filtering not compatible"
2128 					  " with this GRE layer");
2129 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2130 		return rte_flow_error_set(error, ENOTSUP,
2131 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2132 					  "multiple tunnel layers not"
2133 					  " supported");
2134 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2135 		return rte_flow_error_set(error, ENOTSUP,
2136 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2137 					  "L3 Layer is missing");
2138 	if (!mask)
2139 		mask = &rte_flow_item_gre_mask;
2140 	ret = mlx5_flow_item_acceptable
2141 		(item, (const uint8_t *)mask,
2142 		 (const uint8_t *)&nic_mask,
2143 		 sizeof(struct rte_flow_item_gre), error);
2144 	if (ret < 0)
2145 		return ret;
2146 #ifndef HAVE_MLX5DV_DR
2147 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2148 	if (spec && (spec->protocol & mask->protocol))
2149 		return rte_flow_error_set(error, ENOTSUP,
2150 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2151 					  "without MPLS support the"
2152 					  " specification cannot be used for"
2153 					  " filtering");
2154 #endif
2155 #endif
2156 	return 0;
2157 }
2158 
2159 /**
2160  * Validate Geneve item.
2161  *
2162  * @param[in] item
2163  *   Item specification.
2164  * @param[in] itemFlags
2165  *   Bit-fields that holds the items detected until now.
2166  * @param[in] enPriv
2167  *   Pointer to the private data structure.
2168  * @param[out] error
2169  *   Pointer to error structure.
2170  *
2171  * @return
2172  *   0 on success, a negative errno value otherwise and rte_errno is set.
2173  */
2174 
2175 int
2176 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2177 			       uint64_t item_flags,
2178 			       struct rte_eth_dev *dev,
2179 			       struct rte_flow_error *error)
2180 {
2181 	struct mlx5_priv *priv = dev->data->dev_private;
2182 	const struct rte_flow_item_geneve *spec = item->spec;
2183 	const struct rte_flow_item_geneve *mask = item->mask;
2184 	int ret;
2185 	uint16_t gbhdr;
2186 	uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2187 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2188 	const struct rte_flow_item_geneve nic_mask = {
2189 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2190 		.vni = "\xff\xff\xff",
2191 		.protocol = RTE_BE16(UINT16_MAX),
2192 	};
2193 
2194 	if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2195 		return rte_flow_error_set(error, ENOTSUP,
2196 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2197 					  "L3 Geneve is not enabled by device"
2198 					  " parameter and/or not configured in"
2199 					  " firmware");
2200 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2201 		return rte_flow_error_set(error, ENOTSUP,
2202 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2203 					  "multiple tunnel layers not"
2204 					  " supported");
2205 	/*
2206 	 * Verify only UDPv4 is present as defined in
2207 	 * https://tools.ietf.org/html/rfc7348
2208 	 */
2209 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2210 		return rte_flow_error_set(error, EINVAL,
2211 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2212 					  "no outer UDP layer found");
2213 	if (!mask)
2214 		mask = &rte_flow_item_geneve_mask;
2215 	ret = mlx5_flow_item_acceptable
2216 				  (item, (const uint8_t *)mask,
2217 				   (const uint8_t *)&nic_mask,
2218 				   sizeof(struct rte_flow_item_geneve), error);
2219 	if (ret)
2220 		return ret;
2221 	if (spec) {
2222 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2223 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2224 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2225 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2226 			return rte_flow_error_set(error, ENOTSUP,
2227 						  RTE_FLOW_ERROR_TYPE_ITEM,
2228 						  item,
2229 						  "Geneve protocol unsupported"
2230 						  " fields are being used");
2231 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2232 			return rte_flow_error_set
2233 					(error, ENOTSUP,
2234 					 RTE_FLOW_ERROR_TYPE_ITEM,
2235 					 item,
2236 					 "Unsupported Geneve options length");
2237 	}
2238 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2239 		return rte_flow_error_set
2240 				    (error, ENOTSUP,
2241 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
2242 				     "Geneve tunnel must be fully defined");
2243 	return 0;
2244 }
2245 
2246 /**
2247  * Validate MPLS item.
2248  *
2249  * @param[in] dev
2250  *   Pointer to the rte_eth_dev structure.
2251  * @param[in] item
2252  *   Item specification.
2253  * @param[in] item_flags
2254  *   Bit-fields that holds the items detected until now.
2255  * @param[in] prev_layer
2256  *   The protocol layer indicated in previous item.
2257  * @param[out] error
2258  *   Pointer to error structure.
2259  *
2260  * @return
2261  *   0 on success, a negative errno value otherwise and rte_errno is set.
2262  */
2263 int
2264 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2265 			     const struct rte_flow_item *item __rte_unused,
2266 			     uint64_t item_flags __rte_unused,
2267 			     uint64_t prev_layer __rte_unused,
2268 			     struct rte_flow_error *error)
2269 {
2270 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2271 	const struct rte_flow_item_mpls *mask = item->mask;
2272 	struct mlx5_priv *priv = dev->data->dev_private;
2273 	int ret;
2274 
2275 	if (!priv->config.mpls_en)
2276 		return rte_flow_error_set(error, ENOTSUP,
2277 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2278 					  "MPLS not supported or"
2279 					  " disabled in firmware"
2280 					  " configuration.");
2281 	/* MPLS over IP, UDP, GRE is allowed */
2282 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2283 			    MLX5_FLOW_LAYER_OUTER_L4_UDP |
2284 			    MLX5_FLOW_LAYER_GRE)))
2285 		return rte_flow_error_set(error, EINVAL,
2286 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2287 					  "protocol filtering not compatible"
2288 					  " with MPLS layer");
2289 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2290 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2291 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
2292 		return rte_flow_error_set(error, ENOTSUP,
2293 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2294 					  "multiple tunnel layers not"
2295 					  " supported");
2296 	if (!mask)
2297 		mask = &rte_flow_item_mpls_mask;
2298 	ret = mlx5_flow_item_acceptable
2299 		(item, (const uint8_t *)mask,
2300 		 (const uint8_t *)&rte_flow_item_mpls_mask,
2301 		 sizeof(struct rte_flow_item_mpls), error);
2302 	if (ret < 0)
2303 		return ret;
2304 	return 0;
2305 #else
2306 	return rte_flow_error_set(error, ENOTSUP,
2307 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
2308 				  "MPLS is not supported by Verbs, please"
2309 				  " update.");
2310 #endif
2311 }
2312 
2313 /**
2314  * Validate NVGRE item.
2315  *
2316  * @param[in] item
2317  *   Item specification.
2318  * @param[in] item_flags
2319  *   Bit flags to mark detected items.
2320  * @param[in] target_protocol
2321  *   The next protocol in the previous item.
2322  * @param[out] error
2323  *   Pointer to error structure.
2324  *
2325  * @return
2326  *   0 on success, a negative errno value otherwise and rte_errno is set.
2327  */
2328 int
2329 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2330 			      uint64_t item_flags,
2331 			      uint8_t target_protocol,
2332 			      struct rte_flow_error *error)
2333 {
2334 	const struct rte_flow_item_nvgre *mask = item->mask;
2335 	int ret;
2336 
2337 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2338 		return rte_flow_error_set(error, EINVAL,
2339 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2340 					  "protocol filtering not compatible"
2341 					  " with this GRE layer");
2342 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2343 		return rte_flow_error_set(error, ENOTSUP,
2344 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2345 					  "multiple tunnel layers not"
2346 					  " supported");
2347 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2348 		return rte_flow_error_set(error, ENOTSUP,
2349 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2350 					  "L3 Layer is missing");
2351 	if (!mask)
2352 		mask = &rte_flow_item_nvgre_mask;
2353 	ret = mlx5_flow_item_acceptable
2354 		(item, (const uint8_t *)mask,
2355 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
2356 		 sizeof(struct rte_flow_item_nvgre), error);
2357 	if (ret < 0)
2358 		return ret;
2359 	return 0;
2360 }
2361 
2362 /* Allocate unique ID for the split Q/RSS subflows. */
2363 static uint32_t
2364 flow_qrss_get_id(struct rte_eth_dev *dev)
2365 {
2366 	struct mlx5_priv *priv = dev->data->dev_private;
2367 	uint32_t qrss_id, ret;
2368 
2369 	ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2370 	if (ret)
2371 		return 0;
2372 	MLX5_ASSERT(qrss_id);
2373 	return qrss_id;
2374 }
2375 
2376 /* Free unique ID for the split Q/RSS subflows. */
2377 static void
2378 flow_qrss_free_id(struct rte_eth_dev *dev,  uint32_t qrss_id)
2379 {
2380 	struct mlx5_priv *priv = dev->data->dev_private;
2381 
2382 	if (qrss_id)
2383 		mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2384 }
2385 
2386 /**
2387  * Release resource related QUEUE/RSS action split.
2388  *
2389  * @param dev
2390  *   Pointer to Ethernet device.
2391  * @param flow
2392  *   Flow to release id's from.
2393  */
2394 static void
2395 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2396 			     struct rte_flow *flow)
2397 {
2398 	struct mlx5_priv *priv = dev->data->dev_private;
2399 	uint32_t handle_idx;
2400 	struct mlx5_flow_handle *dev_handle;
2401 
2402 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2403 		       handle_idx, dev_handle, next)
2404 		if (dev_handle->split_flow_id)
2405 			flow_qrss_free_id(dev, dev_handle->split_flow_id);
2406 }
2407 
2408 static int
2409 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2410 		   const struct rte_flow_attr *attr __rte_unused,
2411 		   const struct rte_flow_item items[] __rte_unused,
2412 		   const struct rte_flow_action actions[] __rte_unused,
2413 		   bool external __rte_unused,
2414 		   int hairpin __rte_unused,
2415 		   struct rte_flow_error *error)
2416 {
2417 	return rte_flow_error_set(error, ENOTSUP,
2418 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2419 }
2420 
2421 static struct mlx5_flow *
2422 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
2423 		  const struct rte_flow_attr *attr __rte_unused,
2424 		  const struct rte_flow_item items[] __rte_unused,
2425 		  const struct rte_flow_action actions[] __rte_unused,
2426 		  struct rte_flow_error *error)
2427 {
2428 	rte_flow_error_set(error, ENOTSUP,
2429 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2430 	return NULL;
2431 }
2432 
2433 static int
2434 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2435 		    struct mlx5_flow *dev_flow __rte_unused,
2436 		    const struct rte_flow_attr *attr __rte_unused,
2437 		    const struct rte_flow_item items[] __rte_unused,
2438 		    const struct rte_flow_action actions[] __rte_unused,
2439 		    struct rte_flow_error *error)
2440 {
2441 	return rte_flow_error_set(error, ENOTSUP,
2442 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2443 }
2444 
2445 static int
2446 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2447 		struct rte_flow *flow __rte_unused,
2448 		struct rte_flow_error *error)
2449 {
2450 	return rte_flow_error_set(error, ENOTSUP,
2451 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2452 }
2453 
2454 static void
2455 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2456 		 struct rte_flow *flow __rte_unused)
2457 {
2458 }
2459 
2460 static void
2461 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2462 		  struct rte_flow *flow __rte_unused)
2463 {
2464 }
2465 
2466 static int
2467 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2468 		struct rte_flow *flow __rte_unused,
2469 		const struct rte_flow_action *actions __rte_unused,
2470 		void *data __rte_unused,
2471 		struct rte_flow_error *error)
2472 {
2473 	return rte_flow_error_set(error, ENOTSUP,
2474 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2475 }
2476 
2477 /* Void driver to protect from null pointer reference. */
2478 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2479 	.validate = flow_null_validate,
2480 	.prepare = flow_null_prepare,
2481 	.translate = flow_null_translate,
2482 	.apply = flow_null_apply,
2483 	.remove = flow_null_remove,
2484 	.destroy = flow_null_destroy,
2485 	.query = flow_null_query,
2486 };
2487 
2488 /**
2489  * Select flow driver type according to flow attributes and device
2490  * configuration.
2491  *
2492  * @param[in] dev
2493  *   Pointer to the dev structure.
2494  * @param[in] attr
2495  *   Pointer to the flow attributes.
2496  *
2497  * @return
2498  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2499  */
2500 static enum mlx5_flow_drv_type
2501 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2502 {
2503 	struct mlx5_priv *priv = dev->data->dev_private;
2504 	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
2505 
2506 	if (attr->transfer && priv->config.dv_esw_en)
2507 		type = MLX5_FLOW_TYPE_DV;
2508 	if (!attr->transfer)
2509 		type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2510 						 MLX5_FLOW_TYPE_VERBS;
2511 	return type;
2512 }
2513 
2514 #define flow_get_drv_ops(type) flow_drv_ops[type]
2515 
2516 /**
2517  * Flow driver validation API. This abstracts calling driver specific functions.
2518  * The type of flow driver is determined according to flow attributes.
2519  *
2520  * @param[in] dev
2521  *   Pointer to the dev structure.
2522  * @param[in] attr
2523  *   Pointer to the flow attributes.
2524  * @param[in] items
2525  *   Pointer to the list of items.
2526  * @param[in] actions
2527  *   Pointer to the list of actions.
2528  * @param[in] external
2529  *   This flow rule is created by request external to PMD.
2530  * @param[in] hairpin
2531  *   Number of hairpin TX actions, 0 means classic flow.
2532  * @param[out] error
2533  *   Pointer to the error structure.
2534  *
2535  * @return
2536  *   0 on success, a negative errno value otherwise and rte_errno is set.
2537  */
2538 static inline int
2539 flow_drv_validate(struct rte_eth_dev *dev,
2540 		  const struct rte_flow_attr *attr,
2541 		  const struct rte_flow_item items[],
2542 		  const struct rte_flow_action actions[],
2543 		  bool external, int hairpin, struct rte_flow_error *error)
2544 {
2545 	const struct mlx5_flow_driver_ops *fops;
2546 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2547 
2548 	fops = flow_get_drv_ops(type);
2549 	return fops->validate(dev, attr, items, actions, external,
2550 			      hairpin, error);
2551 }
2552 
2553 /**
2554  * Flow driver preparation API. This abstracts calling driver specific
2555  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2556  * calculates the size of memory required for device flow, allocates the memory,
2557  * initializes the device flow and returns the pointer.
2558  *
2559  * @note
2560  *   This function initializes device flow structure such as dv or verbs in
2561  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2562  *   rest. For example, adding returning device flow to flow->dev_flow list and
2563  *   setting backward reference to the flow should be done out of this function.
2564  *   layers field is not filled either.
2565  *
2566  * @param[in] dev
2567  *   Pointer to the dev structure.
2568  * @param[in] attr
2569  *   Pointer to the flow attributes.
2570  * @param[in] items
2571  *   Pointer to the list of items.
2572  * @param[in] actions
2573  *   Pointer to the list of actions.
2574  * @param[in] flow_idx
2575  *   This memory pool index to the flow.
2576  * @param[out] error
2577  *   Pointer to the error structure.
2578  *
2579  * @return
2580  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2581  */
2582 static inline struct mlx5_flow *
2583 flow_drv_prepare(struct rte_eth_dev *dev,
2584 		 const struct rte_flow *flow,
2585 		 const struct rte_flow_attr *attr,
2586 		 const struct rte_flow_item items[],
2587 		 const struct rte_flow_action actions[],
2588 		 uint32_t flow_idx,
2589 		 struct rte_flow_error *error)
2590 {
2591 	const struct mlx5_flow_driver_ops *fops;
2592 	enum mlx5_flow_drv_type type = flow->drv_type;
2593 	struct mlx5_flow *mlx5_flow = NULL;
2594 
2595 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2596 	fops = flow_get_drv_ops(type);
2597 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
2598 	if (mlx5_flow)
2599 		mlx5_flow->flow_idx = flow_idx;
2600 	return mlx5_flow;
2601 }
2602 
2603 /**
2604  * Flow driver translation API. This abstracts calling driver specific
2605  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2606  * translates a generic flow into a driver flow. flow_drv_prepare() must
2607  * precede.
2608  *
2609  * @note
2610  *   dev_flow->layers could be filled as a result of parsing during translation
2611  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2612  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2613  *   flow->actions could be overwritten even though all the expanded dev_flows
2614  *   have the same actions.
2615  *
2616  * @param[in] dev
2617  *   Pointer to the rte dev structure.
2618  * @param[in, out] dev_flow
2619  *   Pointer to the mlx5 flow.
2620  * @param[in] attr
2621  *   Pointer to the flow attributes.
2622  * @param[in] items
2623  *   Pointer to the list of items.
2624  * @param[in] actions
2625  *   Pointer to the list of actions.
2626  * @param[out] error
2627  *   Pointer to the error structure.
2628  *
2629  * @return
2630  *   0 on success, a negative errno value otherwise and rte_errno is set.
2631  */
2632 static inline int
2633 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2634 		   const struct rte_flow_attr *attr,
2635 		   const struct rte_flow_item items[],
2636 		   const struct rte_flow_action actions[],
2637 		   struct rte_flow_error *error)
2638 {
2639 	const struct mlx5_flow_driver_ops *fops;
2640 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2641 
2642 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2643 	fops = flow_get_drv_ops(type);
2644 	return fops->translate(dev, dev_flow, attr, items, actions, error);
2645 }
2646 
2647 /**
2648  * Flow driver apply API. This abstracts calling driver specific functions.
2649  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2650  * translated driver flows on to device. flow_drv_translate() must precede.
2651  *
2652  * @param[in] dev
2653  *   Pointer to Ethernet device structure.
2654  * @param[in, out] flow
2655  *   Pointer to flow structure.
2656  * @param[out] error
2657  *   Pointer to error structure.
2658  *
2659  * @return
2660  *   0 on success, a negative errno value otherwise and rte_errno is set.
2661  */
2662 static inline int
2663 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2664 	       struct rte_flow_error *error)
2665 {
2666 	const struct mlx5_flow_driver_ops *fops;
2667 	enum mlx5_flow_drv_type type = flow->drv_type;
2668 
2669 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2670 	fops = flow_get_drv_ops(type);
2671 	return fops->apply(dev, flow, error);
2672 }
2673 
2674 /**
2675  * Flow driver remove API. This abstracts calling driver specific functions.
2676  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2677  * on device. All the resources of the flow should be freed by calling
2678  * flow_drv_destroy().
2679  *
2680  * @param[in] dev
2681  *   Pointer to Ethernet device.
2682  * @param[in, out] flow
2683  *   Pointer to flow structure.
2684  */
2685 static inline void
2686 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2687 {
2688 	const struct mlx5_flow_driver_ops *fops;
2689 	enum mlx5_flow_drv_type type = flow->drv_type;
2690 
2691 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2692 	fops = flow_get_drv_ops(type);
2693 	fops->remove(dev, flow);
2694 }
2695 
2696 /**
2697  * Flow driver destroy API. This abstracts calling driver specific functions.
2698  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2699  * on device and releases resources of the flow.
2700  *
2701  * @param[in] dev
2702  *   Pointer to Ethernet device.
2703  * @param[in, out] flow
2704  *   Pointer to flow structure.
2705  */
2706 static inline void
2707 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2708 {
2709 	const struct mlx5_flow_driver_ops *fops;
2710 	enum mlx5_flow_drv_type type = flow->drv_type;
2711 
2712 	flow_mreg_split_qrss_release(dev, flow);
2713 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2714 	fops = flow_get_drv_ops(type);
2715 	fops->destroy(dev, flow);
2716 }
2717 
2718 /**
2719  * Get RSS action from the action list.
2720  *
2721  * @param[in] actions
2722  *   Pointer to the list of actions.
2723  *
2724  * @return
2725  *   Pointer to the RSS action if exist, else return NULL.
2726  */
2727 static const struct rte_flow_action_rss*
2728 flow_get_rss_action(const struct rte_flow_action actions[])
2729 {
2730 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2731 		switch (actions->type) {
2732 		case RTE_FLOW_ACTION_TYPE_RSS:
2733 			return (const struct rte_flow_action_rss *)
2734 			       actions->conf;
2735 		default:
2736 			break;
2737 		}
2738 	}
2739 	return NULL;
2740 }
2741 
2742 static unsigned int
2743 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2744 {
2745 	const struct rte_flow_item *item;
2746 	unsigned int has_vlan = 0;
2747 
2748 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2749 		if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2750 			has_vlan = 1;
2751 			break;
2752 		}
2753 	}
2754 	if (has_vlan)
2755 		return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2756 				       MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2757 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2758 			       MLX5_EXPANSION_ROOT_OUTER;
2759 }
2760 
2761 /**
2762  *  Get layer flags from the prefix flow.
2763  *
2764  *  Some flows may be split to several subflows, the prefix subflow gets the
2765  *  match items and the suffix sub flow gets the actions.
2766  *  Some actions need the user defined match item flags to get the detail for
2767  *  the action.
2768  *  This function helps the suffix flow to get the item layer flags from prefix
2769  *  subflow.
2770  *
2771  * @param[in] dev_flow
2772  *   Pointer the created preifx subflow.
2773  *
2774  * @return
2775  *   The layers get from prefix subflow.
2776  */
2777 static inline uint64_t
2778 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
2779 {
2780 	uint64_t layers = 0;
2781 
2782 	/*
2783 	 * Layers bits could be localization, but usually the compiler will
2784 	 * help to do the optimization work for source code.
2785 	 * If no decap actions, use the layers directly.
2786 	 */
2787 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
2788 		return dev_flow->handle->layers;
2789 	/* Convert L3 layers with decap action. */
2790 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
2791 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2792 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
2793 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2794 	/* Convert L4 layers with decap action.  */
2795 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
2796 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2797 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
2798 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2799 	return layers;
2800 }
2801 
2802 /**
2803  * Get metadata split action information.
2804  *
2805  * @param[in] actions
2806  *   Pointer to the list of actions.
2807  * @param[out] qrss
2808  *   Pointer to the return pointer.
2809  * @param[out] qrss_type
2810  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
2811  *   if no QUEUE/RSS is found.
2812  * @param[out] encap_idx
2813  *   Pointer to the index of the encap action if exists, otherwise the last
2814  *   action index.
2815  *
2816  * @return
2817  *   Total number of actions.
2818  */
2819 static int
2820 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
2821 				       const struct rte_flow_action **qrss,
2822 				       int *encap_idx)
2823 {
2824 	const struct rte_flow_action_raw_encap *raw_encap;
2825 	int actions_n = 0;
2826 	int raw_decap_idx = -1;
2827 
2828 	*encap_idx = -1;
2829 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2830 		switch (actions->type) {
2831 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2832 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2833 			*encap_idx = actions_n;
2834 			break;
2835 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
2836 			raw_decap_idx = actions_n;
2837 			break;
2838 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2839 			raw_encap = actions->conf;
2840 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
2841 				*encap_idx = raw_decap_idx != -1 ?
2842 						      raw_decap_idx : actions_n;
2843 			break;
2844 		case RTE_FLOW_ACTION_TYPE_QUEUE:
2845 		case RTE_FLOW_ACTION_TYPE_RSS:
2846 			*qrss = actions;
2847 			break;
2848 		default:
2849 			break;
2850 		}
2851 		actions_n++;
2852 	}
2853 	if (*encap_idx == -1)
2854 		*encap_idx = actions_n;
2855 	/* Count RTE_FLOW_ACTION_TYPE_END. */
2856 	return actions_n + 1;
2857 }
2858 
2859 /**
2860  * Check meter action from the action list.
2861  *
2862  * @param[in] actions
2863  *   Pointer to the list of actions.
2864  * @param[out] mtr
2865  *   Pointer to the meter exist flag.
2866  *
2867  * @return
2868  *   Total number of actions.
2869  */
2870 static int
2871 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
2872 {
2873 	int actions_n = 0;
2874 
2875 	MLX5_ASSERT(mtr);
2876 	*mtr = 0;
2877 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2878 		switch (actions->type) {
2879 		case RTE_FLOW_ACTION_TYPE_METER:
2880 			*mtr = 1;
2881 			break;
2882 		default:
2883 			break;
2884 		}
2885 		actions_n++;
2886 	}
2887 	/* Count RTE_FLOW_ACTION_TYPE_END. */
2888 	return actions_n + 1;
2889 }
2890 
2891 /**
2892  * Check if the flow should be splited due to hairpin.
2893  * The reason for the split is that in current HW we can't
2894  * support encap on Rx, so if a flow have encap we move it
2895  * to Tx.
2896  *
2897  * @param dev
2898  *   Pointer to Ethernet device.
2899  * @param[in] attr
2900  *   Flow rule attributes.
2901  * @param[in] actions
2902  *   Associated actions (list terminated by the END action).
2903  *
2904  * @return
2905  *   > 0 the number of actions and the flow should be split,
2906  *   0 when no split required.
2907  */
2908 static int
2909 flow_check_hairpin_split(struct rte_eth_dev *dev,
2910 			 const struct rte_flow_attr *attr,
2911 			 const struct rte_flow_action actions[])
2912 {
2913 	int queue_action = 0;
2914 	int action_n = 0;
2915 	int encap = 0;
2916 	const struct rte_flow_action_queue *queue;
2917 	const struct rte_flow_action_rss *rss;
2918 	const struct rte_flow_action_raw_encap *raw_encap;
2919 
2920 	if (!attr->ingress)
2921 		return 0;
2922 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2923 		switch (actions->type) {
2924 		case RTE_FLOW_ACTION_TYPE_QUEUE:
2925 			queue = actions->conf;
2926 			if (queue == NULL)
2927 				return 0;
2928 			if (mlx5_rxq_get_type(dev, queue->index) !=
2929 			    MLX5_RXQ_TYPE_HAIRPIN)
2930 				return 0;
2931 			queue_action = 1;
2932 			action_n++;
2933 			break;
2934 		case RTE_FLOW_ACTION_TYPE_RSS:
2935 			rss = actions->conf;
2936 			if (rss == NULL || rss->queue_num == 0)
2937 				return 0;
2938 			if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
2939 			    MLX5_RXQ_TYPE_HAIRPIN)
2940 				return 0;
2941 			queue_action = 1;
2942 			action_n++;
2943 			break;
2944 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2945 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2946 			encap = 1;
2947 			action_n++;
2948 			break;
2949 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2950 			raw_encap = actions->conf;
2951 			if (raw_encap->size >
2952 			    (sizeof(struct rte_flow_item_eth) +
2953 			     sizeof(struct rte_flow_item_ipv4)))
2954 				encap = 1;
2955 			action_n++;
2956 			break;
2957 		default:
2958 			action_n++;
2959 			break;
2960 		}
2961 	}
2962 	if (encap == 1 && queue_action)
2963 		return action_n;
2964 	return 0;
2965 }
2966 
2967 /* Declare flow create/destroy prototype in advance. */
2968 static uint32_t
2969 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
2970 		 const struct rte_flow_attr *attr,
2971 		 const struct rte_flow_item items[],
2972 		 const struct rte_flow_action actions[],
2973 		 bool external, struct rte_flow_error *error);
2974 
2975 static void
2976 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
2977 		  uint32_t flow_idx);
2978 
2979 /**
2980  * Add a flow of copying flow metadata registers in RX_CP_TBL.
2981  *
2982  * As mark_id is unique, if there's already a registered flow for the mark_id,
2983  * return by increasing the reference counter of the resource. Otherwise, create
2984  * the resource (mcp_res) and flow.
2985  *
2986  * Flow looks like,
2987  *   - If ingress port is ANY and reg_c[1] is mark_id,
2988  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
2989  *
2990  * For default flow (zero mark_id), flow is like,
2991  *   - If ingress port is ANY,
2992  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
2993  *
2994  * @param dev
2995  *   Pointer to Ethernet device.
2996  * @param mark_id
2997  *   ID of MARK action, zero means default flow for META.
2998  * @param[out] error
2999  *   Perform verbose error reporting if not NULL.
3000  *
3001  * @return
3002  *   Associated resource on success, NULL otherwise and rte_errno is set.
3003  */
3004 static struct mlx5_flow_mreg_copy_resource *
3005 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3006 			  struct rte_flow_error *error)
3007 {
3008 	struct mlx5_priv *priv = dev->data->dev_private;
3009 	struct rte_flow_attr attr = {
3010 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3011 		.ingress = 1,
3012 	};
3013 	struct mlx5_rte_flow_item_tag tag_spec = {
3014 		.data = mark_id,
3015 	};
3016 	struct rte_flow_item items[] = {
3017 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3018 	};
3019 	struct rte_flow_action_mark ftag = {
3020 		.id = mark_id,
3021 	};
3022 	struct mlx5_flow_action_copy_mreg cp_mreg = {
3023 		.dst = REG_B,
3024 		.src = 0,
3025 	};
3026 	struct rte_flow_action_jump jump = {
3027 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3028 	};
3029 	struct rte_flow_action actions[] = {
3030 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3031 	};
3032 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3033 	uint32_t idx = 0;
3034 	int ret;
3035 
3036 	/* Fill the register fileds in the flow. */
3037 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3038 	if (ret < 0)
3039 		return NULL;
3040 	tag_spec.id = ret;
3041 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3042 	if (ret < 0)
3043 		return NULL;
3044 	cp_mreg.src = ret;
3045 	/* Check if already registered. */
3046 	MLX5_ASSERT(priv->mreg_cp_tbl);
3047 	mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
3048 	if (mcp_res) {
3049 		/* For non-default rule. */
3050 		if (mark_id != MLX5_DEFAULT_COPY_ID)
3051 			mcp_res->refcnt++;
3052 		MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID ||
3053 			    mcp_res->refcnt == 1);
3054 		return mcp_res;
3055 	}
3056 	/* Provide the full width of FLAG specific value. */
3057 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3058 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3059 	/* Build a new flow. */
3060 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
3061 		items[0] = (struct rte_flow_item){
3062 			.type = (enum rte_flow_item_type)
3063 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3064 			.spec = &tag_spec,
3065 		};
3066 		items[1] = (struct rte_flow_item){
3067 			.type = RTE_FLOW_ITEM_TYPE_END,
3068 		};
3069 		actions[0] = (struct rte_flow_action){
3070 			.type = (enum rte_flow_action_type)
3071 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3072 			.conf = &ftag,
3073 		};
3074 		actions[1] = (struct rte_flow_action){
3075 			.type = (enum rte_flow_action_type)
3076 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3077 			.conf = &cp_mreg,
3078 		};
3079 		actions[2] = (struct rte_flow_action){
3080 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
3081 			.conf = &jump,
3082 		};
3083 		actions[3] = (struct rte_flow_action){
3084 			.type = RTE_FLOW_ACTION_TYPE_END,
3085 		};
3086 	} else {
3087 		/* Default rule, wildcard match. */
3088 		attr.priority = MLX5_FLOW_PRIO_RSVD;
3089 		items[0] = (struct rte_flow_item){
3090 			.type = RTE_FLOW_ITEM_TYPE_END,
3091 		};
3092 		actions[0] = (struct rte_flow_action){
3093 			.type = (enum rte_flow_action_type)
3094 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3095 			.conf = &cp_mreg,
3096 		};
3097 		actions[1] = (struct rte_flow_action){
3098 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
3099 			.conf = &jump,
3100 		};
3101 		actions[2] = (struct rte_flow_action){
3102 			.type = RTE_FLOW_ACTION_TYPE_END,
3103 		};
3104 	}
3105 	/* Build a new entry. */
3106 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3107 	if (!mcp_res) {
3108 		rte_errno = ENOMEM;
3109 		return NULL;
3110 	}
3111 	mcp_res->idx = idx;
3112 	/*
3113 	 * The copy Flows are not included in any list. There
3114 	 * ones are referenced from other Flows and can not
3115 	 * be applied, removed, deleted in ardbitrary order
3116 	 * by list traversing.
3117 	 */
3118 	mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3119 					 actions, false, error);
3120 	if (!mcp_res->rix_flow)
3121 		goto error;
3122 	mcp_res->refcnt++;
3123 	mcp_res->hlist_ent.key = mark_id;
3124 	ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
3125 				&mcp_res->hlist_ent);
3126 	MLX5_ASSERT(!ret);
3127 	if (ret)
3128 		goto error;
3129 	return mcp_res;
3130 error:
3131 	if (mcp_res->rix_flow)
3132 		flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3133 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3134 	return NULL;
3135 }
3136 
3137 /**
3138  * Release flow in RX_CP_TBL.
3139  *
3140  * @param dev
3141  *   Pointer to Ethernet device.
3142  * @flow
3143  *   Parent flow for wich copying is provided.
3144  */
3145 static void
3146 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3147 			  struct rte_flow *flow)
3148 {
3149 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3150 	struct mlx5_priv *priv = dev->data->dev_private;
3151 
3152 	if (!flow->rix_mreg_copy)
3153 		return;
3154 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3155 				 flow->rix_mreg_copy);
3156 	if (!mcp_res || !priv->mreg_cp_tbl)
3157 		return;
3158 	if (flow->copy_applied) {
3159 		MLX5_ASSERT(mcp_res->appcnt);
3160 		flow->copy_applied = 0;
3161 		--mcp_res->appcnt;
3162 		if (!mcp_res->appcnt) {
3163 			struct rte_flow *mcp_flow = mlx5_ipool_get
3164 					(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3165 					mcp_res->rix_flow);
3166 
3167 			if (mcp_flow)
3168 				flow_drv_remove(dev, mcp_flow);
3169 		}
3170 	}
3171 	/*
3172 	 * We do not check availability of metadata registers here,
3173 	 * because copy resources are not allocated in this case.
3174 	 */
3175 	if (--mcp_res->refcnt)
3176 		return;
3177 	MLX5_ASSERT(mcp_res->rix_flow);
3178 	flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3179 	mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3180 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3181 	flow->rix_mreg_copy = 0;
3182 }
3183 
3184 /**
3185  * Start flow in RX_CP_TBL.
3186  *
3187  * @param dev
3188  *   Pointer to Ethernet device.
3189  * @flow
3190  *   Parent flow for wich copying is provided.
3191  *
3192  * @return
3193  *   0 on success, a negative errno value otherwise and rte_errno is set.
3194  */
3195 static int
3196 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
3197 			    struct rte_flow *flow)
3198 {
3199 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3200 	struct mlx5_priv *priv = dev->data->dev_private;
3201 	int ret;
3202 
3203 	if (!flow->rix_mreg_copy || flow->copy_applied)
3204 		return 0;
3205 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3206 				 flow->rix_mreg_copy);
3207 	if (!mcp_res)
3208 		return 0;
3209 	if (!mcp_res->appcnt) {
3210 		struct rte_flow *mcp_flow = mlx5_ipool_get
3211 				(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3212 				mcp_res->rix_flow);
3213 
3214 		if (mcp_flow) {
3215 			ret = flow_drv_apply(dev, mcp_flow, NULL);
3216 			if (ret)
3217 				return ret;
3218 		}
3219 	}
3220 	++mcp_res->appcnt;
3221 	flow->copy_applied = 1;
3222 	return 0;
3223 }
3224 
3225 /**
3226  * Stop flow in RX_CP_TBL.
3227  *
3228  * @param dev
3229  *   Pointer to Ethernet device.
3230  * @flow
3231  *   Parent flow for wich copying is provided.
3232  */
3233 static void
3234 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3235 			   struct rte_flow *flow)
3236 {
3237 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3238 	struct mlx5_priv *priv = dev->data->dev_private;
3239 
3240 	if (!flow->rix_mreg_copy || !flow->copy_applied)
3241 		return;
3242 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3243 				 flow->rix_mreg_copy);
3244 	if (!mcp_res)
3245 		return;
3246 	MLX5_ASSERT(mcp_res->appcnt);
3247 	--mcp_res->appcnt;
3248 	flow->copy_applied = 0;
3249 	if (!mcp_res->appcnt) {
3250 		struct rte_flow *mcp_flow = mlx5_ipool_get
3251 				(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3252 				mcp_res->rix_flow);
3253 
3254 		if (mcp_flow)
3255 			flow_drv_remove(dev, mcp_flow);
3256 	}
3257 }
3258 
3259 /**
3260  * Remove the default copy action from RX_CP_TBL.
3261  *
3262  * @param dev
3263  *   Pointer to Ethernet device.
3264  */
3265 static void
3266 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3267 {
3268 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3269 	struct mlx5_priv *priv = dev->data->dev_private;
3270 
3271 	/* Check if default flow is registered. */
3272 	if (!priv->mreg_cp_tbl)
3273 		return;
3274 	mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl,
3275 					    MLX5_DEFAULT_COPY_ID);
3276 	if (!mcp_res)
3277 		return;
3278 	MLX5_ASSERT(mcp_res->rix_flow);
3279 	flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3280 	mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3281 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3282 }
3283 
3284 /**
3285  * Add the default copy action in in RX_CP_TBL.
3286  *
3287  * @param dev
3288  *   Pointer to Ethernet device.
3289  * @param[out] error
3290  *   Perform verbose error reporting if not NULL.
3291  *
3292  * @return
3293  *   0 for success, negative value otherwise and rte_errno is set.
3294  */
3295 static int
3296 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3297 				  struct rte_flow_error *error)
3298 {
3299 	struct mlx5_priv *priv = dev->data->dev_private;
3300 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3301 
3302 	/* Check whether extensive metadata feature is engaged. */
3303 	if (!priv->config.dv_flow_en ||
3304 	    priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3305 	    !mlx5_flow_ext_mreg_supported(dev) ||
3306 	    !priv->sh->dv_regc0_mask)
3307 		return 0;
3308 	mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3309 	if (!mcp_res)
3310 		return -rte_errno;
3311 	return 0;
3312 }
3313 
3314 /**
3315  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3316  *
3317  * All the flow having Q/RSS action should be split by
3318  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3319  * performs the following,
3320  *   - CQE->flow_tag := reg_c[1] (MARK)
3321  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3322  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3323  * but there should be a flow per each MARK ID set by MARK action.
3324  *
3325  * For the aforementioned reason, if there's a MARK action in flow's action
3326  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3327  * the MARK ID to CQE's flow_tag like,
3328  *   - If reg_c[1] is mark_id,
3329  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3330  *
3331  * For SET_META action which stores value in reg_c[0], as the destination is
3332  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3333  * MARK ID means the default flow. The default flow looks like,
3334  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3335  *
3336  * @param dev
3337  *   Pointer to Ethernet device.
3338  * @param flow
3339  *   Pointer to flow structure.
3340  * @param[in] actions
3341  *   Pointer to the list of actions.
3342  * @param[out] error
3343  *   Perform verbose error reporting if not NULL.
3344  *
3345  * @return
3346  *   0 on success, negative value otherwise and rte_errno is set.
3347  */
3348 static int
3349 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3350 			    struct rte_flow *flow,
3351 			    const struct rte_flow_action *actions,
3352 			    struct rte_flow_error *error)
3353 {
3354 	struct mlx5_priv *priv = dev->data->dev_private;
3355 	struct mlx5_dev_config *config = &priv->config;
3356 	struct mlx5_flow_mreg_copy_resource *mcp_res;
3357 	const struct rte_flow_action_mark *mark;
3358 
3359 	/* Check whether extensive metadata feature is engaged. */
3360 	if (!config->dv_flow_en ||
3361 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3362 	    !mlx5_flow_ext_mreg_supported(dev) ||
3363 	    !priv->sh->dv_regc0_mask)
3364 		return 0;
3365 	/* Find MARK action. */
3366 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3367 		switch (actions->type) {
3368 		case RTE_FLOW_ACTION_TYPE_FLAG:
3369 			mcp_res = flow_mreg_add_copy_action
3370 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
3371 			if (!mcp_res)
3372 				return -rte_errno;
3373 			flow->rix_mreg_copy = mcp_res->idx;
3374 			if (dev->data->dev_started) {
3375 				mcp_res->appcnt++;
3376 				flow->copy_applied = 1;
3377 			}
3378 			return 0;
3379 		case RTE_FLOW_ACTION_TYPE_MARK:
3380 			mark = (const struct rte_flow_action_mark *)
3381 				actions->conf;
3382 			mcp_res =
3383 				flow_mreg_add_copy_action(dev, mark->id, error);
3384 			if (!mcp_res)
3385 				return -rte_errno;
3386 			flow->rix_mreg_copy = mcp_res->idx;
3387 			if (dev->data->dev_started) {
3388 				mcp_res->appcnt++;
3389 				flow->copy_applied = 1;
3390 			}
3391 			return 0;
3392 		default:
3393 			break;
3394 		}
3395 	}
3396 	return 0;
3397 }
3398 
3399 #define MLX5_MAX_SPLIT_ACTIONS 24
3400 #define MLX5_MAX_SPLIT_ITEMS 24
3401 
3402 /**
3403  * Split the hairpin flow.
3404  * Since HW can't support encap on Rx we move the encap to Tx.
3405  * If the count action is after the encap then we also
3406  * move the count action. in this case the count will also measure
3407  * the outer bytes.
3408  *
3409  * @param dev
3410  *   Pointer to Ethernet device.
3411  * @param[in] actions
3412  *   Associated actions (list terminated by the END action).
3413  * @param[out] actions_rx
3414  *   Rx flow actions.
3415  * @param[out] actions_tx
3416  *   Tx flow actions..
3417  * @param[out] pattern_tx
3418  *   The pattern items for the Tx flow.
3419  * @param[out] flow_id
3420  *   The flow ID connected to this flow.
3421  *
3422  * @return
3423  *   0 on success.
3424  */
3425 static int
3426 flow_hairpin_split(struct rte_eth_dev *dev,
3427 		   const struct rte_flow_action actions[],
3428 		   struct rte_flow_action actions_rx[],
3429 		   struct rte_flow_action actions_tx[],
3430 		   struct rte_flow_item pattern_tx[],
3431 		   uint32_t *flow_id)
3432 {
3433 	struct mlx5_priv *priv = dev->data->dev_private;
3434 	const struct rte_flow_action_raw_encap *raw_encap;
3435 	const struct rte_flow_action_raw_decap *raw_decap;
3436 	struct mlx5_rte_flow_action_set_tag *set_tag;
3437 	struct rte_flow_action *tag_action;
3438 	struct mlx5_rte_flow_item_tag *tag_item;
3439 	struct rte_flow_item *item;
3440 	char *addr;
3441 	int encap = 0;
3442 
3443 	mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3444 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3445 		switch (actions->type) {
3446 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3447 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3448 			rte_memcpy(actions_tx, actions,
3449 			       sizeof(struct rte_flow_action));
3450 			actions_tx++;
3451 			break;
3452 		case RTE_FLOW_ACTION_TYPE_COUNT:
3453 			if (encap) {
3454 				rte_memcpy(actions_tx, actions,
3455 					   sizeof(struct rte_flow_action));
3456 				actions_tx++;
3457 			} else {
3458 				rte_memcpy(actions_rx, actions,
3459 					   sizeof(struct rte_flow_action));
3460 				actions_rx++;
3461 			}
3462 			break;
3463 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3464 			raw_encap = actions->conf;
3465 			if (raw_encap->size >
3466 			    (sizeof(struct rte_flow_item_eth) +
3467 			     sizeof(struct rte_flow_item_ipv4))) {
3468 				memcpy(actions_tx, actions,
3469 				       sizeof(struct rte_flow_action));
3470 				actions_tx++;
3471 				encap = 1;
3472 			} else {
3473 				rte_memcpy(actions_rx, actions,
3474 					   sizeof(struct rte_flow_action));
3475 				actions_rx++;
3476 			}
3477 			break;
3478 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3479 			raw_decap = actions->conf;
3480 			if (raw_decap->size <
3481 			    (sizeof(struct rte_flow_item_eth) +
3482 			     sizeof(struct rte_flow_item_ipv4))) {
3483 				memcpy(actions_tx, actions,
3484 				       sizeof(struct rte_flow_action));
3485 				actions_tx++;
3486 			} else {
3487 				rte_memcpy(actions_rx, actions,
3488 					   sizeof(struct rte_flow_action));
3489 				actions_rx++;
3490 			}
3491 			break;
3492 		default:
3493 			rte_memcpy(actions_rx, actions,
3494 				   sizeof(struct rte_flow_action));
3495 			actions_rx++;
3496 			break;
3497 		}
3498 	}
3499 	/* Add set meta action and end action for the Rx flow. */
3500 	tag_action = actions_rx;
3501 	tag_action->type = (enum rte_flow_action_type)
3502 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3503 	actions_rx++;
3504 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3505 	actions_rx++;
3506 	set_tag = (void *)actions_rx;
3507 	set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3508 	MLX5_ASSERT(set_tag->id > REG_NONE);
3509 	set_tag->data = *flow_id;
3510 	tag_action->conf = set_tag;
3511 	/* Create Tx item list. */
3512 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3513 	addr = (void *)&pattern_tx[2];
3514 	item = pattern_tx;
3515 	item->type = (enum rte_flow_item_type)
3516 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3517 	tag_item = (void *)addr;
3518 	tag_item->data = *flow_id;
3519 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3520 	MLX5_ASSERT(set_tag->id > REG_NONE);
3521 	item->spec = tag_item;
3522 	addr += sizeof(struct mlx5_rte_flow_item_tag);
3523 	tag_item = (void *)addr;
3524 	tag_item->data = UINT32_MAX;
3525 	tag_item->id = UINT16_MAX;
3526 	item->mask = tag_item;
3527 	addr += sizeof(struct mlx5_rte_flow_item_tag);
3528 	item->last = NULL;
3529 	item++;
3530 	item->type = RTE_FLOW_ITEM_TYPE_END;
3531 	return 0;
3532 }
3533 
3534 /**
3535  * The last stage of splitting chain, just creates the subflow
3536  * without any modification.
3537  *
3538  * @param[in] dev
3539  *   Pointer to Ethernet device.
3540  * @param[in] flow
3541  *   Parent flow structure pointer.
3542  * @param[in, out] sub_flow
3543  *   Pointer to return the created subflow, may be NULL.
3544  * @param[in] prefix_layers
3545  *   Prefix subflow layers, may be 0.
3546  * @param[in] attr
3547  *   Flow rule attributes.
3548  * @param[in] items
3549  *   Pattern specification (list terminated by the END pattern item).
3550  * @param[in] actions
3551  *   Associated actions (list terminated by the END action).
3552  * @param[in] external
3553  *   This flow rule is created by request external to PMD.
3554  * @param[in] flow_idx
3555  *   This memory pool index to the flow.
3556  * @param[out] error
3557  *   Perform verbose error reporting if not NULL.
3558  * @return
3559  *   0 on success, negative value otherwise
3560  */
3561 static int
3562 flow_create_split_inner(struct rte_eth_dev *dev,
3563 			struct rte_flow *flow,
3564 			struct mlx5_flow **sub_flow,
3565 			uint64_t prefix_layers,
3566 			const struct rte_flow_attr *attr,
3567 			const struct rte_flow_item items[],
3568 			const struct rte_flow_action actions[],
3569 			bool external, uint32_t flow_idx,
3570 			struct rte_flow_error *error)
3571 {
3572 	struct mlx5_flow *dev_flow;
3573 
3574 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
3575 		flow_idx, error);
3576 	if (!dev_flow)
3577 		return -rte_errno;
3578 	dev_flow->flow = flow;
3579 	dev_flow->external = external;
3580 	/* Subflow object was created, we must include one in the list. */
3581 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
3582 		      dev_flow->handle, next);
3583 	/*
3584 	 * If dev_flow is as one of the suffix flow, some actions in suffix
3585 	 * flow may need some user defined item layer flags.
3586 	 */
3587 	if (prefix_layers)
3588 		dev_flow->handle->layers = prefix_layers;
3589 	if (sub_flow)
3590 		*sub_flow = dev_flow;
3591 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3592 }
3593 
3594 /**
3595  * Split the meter flow.
3596  *
3597  * As meter flow will split to three sub flow, other than meter
3598  * action, the other actions make sense to only meter accepts
3599  * the packet. If it need to be dropped, no other additional
3600  * actions should be take.
3601  *
3602  * One kind of special action which decapsulates the L3 tunnel
3603  * header will be in the prefix sub flow, as not to take the
3604  * L3 tunnel header into account.
3605  *
3606  * @param dev
3607  *   Pointer to Ethernet device.
3608  * @param[in] items
3609  *   Pattern specification (list terminated by the END pattern item).
3610  * @param[out] sfx_items
3611  *   Suffix flow match items (list terminated by the END pattern item).
3612  * @param[in] actions
3613  *   Associated actions (list terminated by the END action).
3614  * @param[out] actions_sfx
3615  *   Suffix flow actions.
3616  * @param[out] actions_pre
3617  *   Prefix flow actions.
3618  * @param[out] pattern_sfx
3619  *   The pattern items for the suffix flow.
3620  * @param[out] tag_sfx
3621  *   Pointer to suffix flow tag.
3622  *
3623  * @return
3624  *   0 on success.
3625  */
3626 static int
3627 flow_meter_split_prep(struct rte_eth_dev *dev,
3628 		 const struct rte_flow_item items[],
3629 		 struct rte_flow_item sfx_items[],
3630 		 const struct rte_flow_action actions[],
3631 		 struct rte_flow_action actions_sfx[],
3632 		 struct rte_flow_action actions_pre[])
3633 {
3634 	struct rte_flow_action *tag_action = NULL;
3635 	struct rte_flow_item *tag_item;
3636 	struct mlx5_rte_flow_action_set_tag *set_tag;
3637 	struct rte_flow_error error;
3638 	const struct rte_flow_action_raw_encap *raw_encap;
3639 	const struct rte_flow_action_raw_decap *raw_decap;
3640 	struct mlx5_rte_flow_item_tag *tag_spec;
3641 	struct mlx5_rte_flow_item_tag *tag_mask;
3642 	uint32_t tag_id;
3643 	bool copy_vlan = false;
3644 
3645 	/* Prepare the actions for prefix and suffix flow. */
3646 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3647 		struct rte_flow_action **action_cur = NULL;
3648 
3649 		switch (actions->type) {
3650 		case RTE_FLOW_ACTION_TYPE_METER:
3651 			/* Add the extra tag action first. */
3652 			tag_action = actions_pre;
3653 			tag_action->type = (enum rte_flow_action_type)
3654 					   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3655 			actions_pre++;
3656 			action_cur = &actions_pre;
3657 			break;
3658 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3659 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
3660 			action_cur = &actions_pre;
3661 			break;
3662 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3663 			raw_encap = actions->conf;
3664 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
3665 				action_cur = &actions_pre;
3666 			break;
3667 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3668 			raw_decap = actions->conf;
3669 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3670 				action_cur = &actions_pre;
3671 			break;
3672 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3673 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3674 			copy_vlan = true;
3675 			break;
3676 		default:
3677 			break;
3678 		}
3679 		if (!action_cur)
3680 			action_cur = &actions_sfx;
3681 		memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
3682 		(*action_cur)++;
3683 	}
3684 	/* Add end action to the actions. */
3685 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
3686 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
3687 	actions_pre++;
3688 	/* Set the tag. */
3689 	set_tag = (void *)actions_pre;
3690 	set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3691 	/*
3692 	 * Get the id from the qrss_pool to make qrss share the id with meter.
3693 	 */
3694 	tag_id = flow_qrss_get_id(dev);
3695 	set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
3696 	assert(tag_action);
3697 	tag_action->conf = set_tag;
3698 	/* Prepare the suffix subflow items. */
3699 	tag_item = sfx_items++;
3700 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3701 		int item_type = items->type;
3702 
3703 		switch (item_type) {
3704 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
3705 			memcpy(sfx_items, items, sizeof(*sfx_items));
3706 			sfx_items++;
3707 			break;
3708 		case RTE_FLOW_ITEM_TYPE_VLAN:
3709 			if (copy_vlan) {
3710 				memcpy(sfx_items, items, sizeof(*sfx_items));
3711 				/*
3712 				 * Convert to internal match item, it is used
3713 				 * for vlan push and set vid.
3714 				 */
3715 				sfx_items->type = (enum rte_flow_item_type)
3716 						  MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
3717 				sfx_items++;
3718 			}
3719 			break;
3720 		default:
3721 			break;
3722 		}
3723 	}
3724 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
3725 	sfx_items++;
3726 	tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
3727 	tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
3728 	tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3729 	tag_mask = tag_spec + 1;
3730 	tag_mask->data = 0xffffff00;
3731 	tag_item->type = (enum rte_flow_item_type)
3732 			 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3733 	tag_item->spec = tag_spec;
3734 	tag_item->last = NULL;
3735 	tag_item->mask = tag_mask;
3736 	return tag_id;
3737 }
3738 
3739 /**
3740  * Split action list having QUEUE/RSS for metadata register copy.
3741  *
3742  * Once Q/RSS action is detected in user's action list, the flow action
3743  * should be split in order to copy metadata registers, which will happen in
3744  * RX_CP_TBL like,
3745  *   - CQE->flow_tag := reg_c[1] (MARK)
3746  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3747  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
3748  * This is because the last action of each flow must be a terminal action
3749  * (QUEUE, RSS or DROP).
3750  *
3751  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
3752  * stored and kept in the mlx5_flow structure per each sub_flow.
3753  *
3754  * The Q/RSS action is replaced with,
3755  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
3756  * And the following JUMP action is added at the end,
3757  *   - JUMP, to RX_CP_TBL.
3758  *
3759  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
3760  * flow_create_split_metadata() routine. The flow will look like,
3761  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
3762  *
3763  * @param dev
3764  *   Pointer to Ethernet device.
3765  * @param[out] split_actions
3766  *   Pointer to store split actions to jump to CP_TBL.
3767  * @param[in] actions
3768  *   Pointer to the list of original flow actions.
3769  * @param[in] qrss
3770  *   Pointer to the Q/RSS action.
3771  * @param[in] actions_n
3772  *   Number of original actions.
3773  * @param[out] error
3774  *   Perform verbose error reporting if not NULL.
3775  *
3776  * @return
3777  *   non-zero unique flow_id on success, otherwise 0 and
3778  *   error/rte_error are set.
3779  */
3780 static uint32_t
3781 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
3782 			  struct rte_flow_action *split_actions,
3783 			  const struct rte_flow_action *actions,
3784 			  const struct rte_flow_action *qrss,
3785 			  int actions_n, struct rte_flow_error *error)
3786 {
3787 	struct mlx5_rte_flow_action_set_tag *set_tag;
3788 	struct rte_flow_action_jump *jump;
3789 	const int qrss_idx = qrss - actions;
3790 	uint32_t flow_id = 0;
3791 	int ret = 0;
3792 
3793 	/*
3794 	 * Given actions will be split
3795 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
3796 	 * - Add jump to mreg CP_TBL.
3797 	 * As a result, there will be one more action.
3798 	 */
3799 	++actions_n;
3800 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
3801 	set_tag = (void *)(split_actions + actions_n);
3802 	/*
3803 	 * If tag action is not set to void(it means we are not the meter
3804 	 * suffix flow), add the tag action. Since meter suffix flow already
3805 	 * has the tag added.
3806 	 */
3807 	if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
3808 		/*
3809 		 * Allocate the new subflow ID. This one is unique within
3810 		 * device and not shared with representors. Otherwise,
3811 		 * we would have to resolve multi-thread access synch
3812 		 * issue. Each flow on the shared device is appended
3813 		 * with source vport identifier, so the resulting
3814 		 * flows will be unique in the shared (by master and
3815 		 * representors) domain even if they have coinciding
3816 		 * IDs.
3817 		 */
3818 		flow_id = flow_qrss_get_id(dev);
3819 		if (!flow_id)
3820 			return rte_flow_error_set(error, ENOMEM,
3821 						  RTE_FLOW_ERROR_TYPE_ACTION,
3822 						  NULL, "can't allocate id "
3823 						  "for split Q/RSS subflow");
3824 		/* Internal SET_TAG action to set flow ID. */
3825 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
3826 			.data = flow_id,
3827 		};
3828 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
3829 		if (ret < 0)
3830 			return ret;
3831 		set_tag->id = ret;
3832 		/* Construct new actions array. */
3833 		/* Replace QUEUE/RSS action. */
3834 		split_actions[qrss_idx] = (struct rte_flow_action){
3835 			.type = (enum rte_flow_action_type)
3836 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
3837 			.conf = set_tag,
3838 		};
3839 	}
3840 	/* JUMP action to jump to mreg copy table (CP_TBL). */
3841 	jump = (void *)(set_tag + 1);
3842 	*jump = (struct rte_flow_action_jump){
3843 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3844 	};
3845 	split_actions[actions_n - 2] = (struct rte_flow_action){
3846 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
3847 		.conf = jump,
3848 	};
3849 	split_actions[actions_n - 1] = (struct rte_flow_action){
3850 		.type = RTE_FLOW_ACTION_TYPE_END,
3851 	};
3852 	return flow_id;
3853 }
3854 
3855 /**
3856  * Extend the given action list for Tx metadata copy.
3857  *
3858  * Copy the given action list to the ext_actions and add flow metadata register
3859  * copy action in order to copy reg_a set by WQE to reg_c[0].
3860  *
3861  * @param[out] ext_actions
3862  *   Pointer to the extended action list.
3863  * @param[in] actions
3864  *   Pointer to the list of actions.
3865  * @param[in] actions_n
3866  *   Number of actions in the list.
3867  * @param[out] error
3868  *   Perform verbose error reporting if not NULL.
3869  * @param[in] encap_idx
3870  *   The encap action inndex.
3871  *
3872  * @return
3873  *   0 on success, negative value otherwise
3874  */
3875 static int
3876 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
3877 		       struct rte_flow_action *ext_actions,
3878 		       const struct rte_flow_action *actions,
3879 		       int actions_n, struct rte_flow_error *error,
3880 		       int encap_idx)
3881 {
3882 	struct mlx5_flow_action_copy_mreg *cp_mreg =
3883 		(struct mlx5_flow_action_copy_mreg *)
3884 			(ext_actions + actions_n + 1);
3885 	int ret;
3886 
3887 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3888 	if (ret < 0)
3889 		return ret;
3890 	cp_mreg->dst = ret;
3891 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
3892 	if (ret < 0)
3893 		return ret;
3894 	cp_mreg->src = ret;
3895 	if (encap_idx != 0)
3896 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
3897 	if (encap_idx == actions_n - 1) {
3898 		ext_actions[actions_n - 1] = (struct rte_flow_action){
3899 			.type = (enum rte_flow_action_type)
3900 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3901 			.conf = cp_mreg,
3902 		};
3903 		ext_actions[actions_n] = (struct rte_flow_action){
3904 			.type = RTE_FLOW_ACTION_TYPE_END,
3905 		};
3906 	} else {
3907 		ext_actions[encap_idx] = (struct rte_flow_action){
3908 			.type = (enum rte_flow_action_type)
3909 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3910 			.conf = cp_mreg,
3911 		};
3912 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
3913 				sizeof(*ext_actions) * (actions_n - encap_idx));
3914 	}
3915 	return 0;
3916 }
3917 
3918 /**
3919  * The splitting for metadata feature.
3920  *
3921  * - Q/RSS action on NIC Rx should be split in order to pass by
3922  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
3923  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
3924  *
3925  * - All the actions on NIC Tx should have a mreg copy action to
3926  *   copy reg_a from WQE to reg_c[0].
3927  *
3928  * @param dev
3929  *   Pointer to Ethernet device.
3930  * @param[in] flow
3931  *   Parent flow structure pointer.
3932  * @param[in] prefix_layers
3933  *   Prefix flow layer flags.
3934  * @param[in] attr
3935  *   Flow rule attributes.
3936  * @param[in] items
3937  *   Pattern specification (list terminated by the END pattern item).
3938  * @param[in] actions
3939  *   Associated actions (list terminated by the END action).
3940  * @param[in] external
3941  *   This flow rule is created by request external to PMD.
3942  * @param[in] flow_idx
3943  *   This memory pool index to the flow.
3944  * @param[out] error
3945  *   Perform verbose error reporting if not NULL.
3946  * @return
3947  *   0 on success, negative value otherwise
3948  */
3949 static int
3950 flow_create_split_metadata(struct rte_eth_dev *dev,
3951 			   struct rte_flow *flow,
3952 			   uint64_t prefix_layers,
3953 			   const struct rte_flow_attr *attr,
3954 			   const struct rte_flow_item items[],
3955 			   const struct rte_flow_action actions[],
3956 			   bool external, uint32_t flow_idx,
3957 			   struct rte_flow_error *error)
3958 {
3959 	struct mlx5_priv *priv = dev->data->dev_private;
3960 	struct mlx5_dev_config *config = &priv->config;
3961 	const struct rte_flow_action *qrss = NULL;
3962 	struct rte_flow_action *ext_actions = NULL;
3963 	struct mlx5_flow *dev_flow = NULL;
3964 	uint32_t qrss_id = 0;
3965 	int mtr_sfx = 0;
3966 	size_t act_size;
3967 	int actions_n;
3968 	int encap_idx;
3969 	int ret;
3970 
3971 	/* Check whether extensive metadata feature is engaged. */
3972 	if (!config->dv_flow_en ||
3973 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3974 	    !mlx5_flow_ext_mreg_supported(dev))
3975 		return flow_create_split_inner(dev, flow, NULL, prefix_layers,
3976 					       attr, items, actions, external,
3977 					       flow_idx, error);
3978 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
3979 							   &encap_idx);
3980 	if (qrss) {
3981 		/* Exclude hairpin flows from splitting. */
3982 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
3983 			const struct rte_flow_action_queue *queue;
3984 
3985 			queue = qrss->conf;
3986 			if (mlx5_rxq_get_type(dev, queue->index) ==
3987 			    MLX5_RXQ_TYPE_HAIRPIN)
3988 				qrss = NULL;
3989 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
3990 			const struct rte_flow_action_rss *rss;
3991 
3992 			rss = qrss->conf;
3993 			if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
3994 			    MLX5_RXQ_TYPE_HAIRPIN)
3995 				qrss = NULL;
3996 		}
3997 	}
3998 	if (qrss) {
3999 		/* Check if it is in meter suffix table. */
4000 		mtr_sfx = attr->group == (attr->transfer ?
4001 			  (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4002 			  MLX5_FLOW_TABLE_LEVEL_SUFFIX);
4003 		/*
4004 		 * Q/RSS action on NIC Rx should be split in order to pass by
4005 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
4006 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
4007 		 */
4008 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4009 			   sizeof(struct rte_flow_action_set_tag) +
4010 			   sizeof(struct rte_flow_action_jump);
4011 		ext_actions = rte_zmalloc(__func__, act_size, 0);
4012 		if (!ext_actions)
4013 			return rte_flow_error_set(error, ENOMEM,
4014 						  RTE_FLOW_ERROR_TYPE_ACTION,
4015 						  NULL, "no memory to split "
4016 						  "metadata flow");
4017 		/*
4018 		 * If we are the suffix flow of meter, tag already exist.
4019 		 * Set the tag action to void.
4020 		 */
4021 		if (mtr_sfx)
4022 			ext_actions[qrss - actions].type =
4023 						RTE_FLOW_ACTION_TYPE_VOID;
4024 		else
4025 			ext_actions[qrss - actions].type =
4026 						(enum rte_flow_action_type)
4027 						MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4028 		/*
4029 		 * Create the new actions list with removed Q/RSS action
4030 		 * and appended set tag and jump to register copy table
4031 		 * (RX_CP_TBL). We should preallocate unique tag ID here
4032 		 * in advance, because it is needed for set tag action.
4033 		 */
4034 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
4035 						    qrss, actions_n, error);
4036 		if (!mtr_sfx && !qrss_id) {
4037 			ret = -rte_errno;
4038 			goto exit;
4039 		}
4040 	} else if (attr->egress && !attr->transfer) {
4041 		/*
4042 		 * All the actions on NIC Tx should have a metadata register
4043 		 * copy action to copy reg_a from WQE to reg_c[meta]
4044 		 */
4045 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4046 			   sizeof(struct mlx5_flow_action_copy_mreg);
4047 		ext_actions = rte_zmalloc(__func__, act_size, 0);
4048 		if (!ext_actions)
4049 			return rte_flow_error_set(error, ENOMEM,
4050 						  RTE_FLOW_ERROR_TYPE_ACTION,
4051 						  NULL, "no memory to split "
4052 						  "metadata flow");
4053 		/* Create the action list appended with copy register. */
4054 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
4055 					     actions_n, error, encap_idx);
4056 		if (ret < 0)
4057 			goto exit;
4058 	}
4059 	/* Add the unmodified original or prefix subflow. */
4060 	ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
4061 				      items, ext_actions ? ext_actions :
4062 				      actions, external, flow_idx, error);
4063 	if (ret < 0)
4064 		goto exit;
4065 	MLX5_ASSERT(dev_flow);
4066 	if (qrss) {
4067 		const struct rte_flow_attr q_attr = {
4068 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4069 			.ingress = 1,
4070 		};
4071 		/* Internal PMD action to set register. */
4072 		struct mlx5_rte_flow_item_tag q_tag_spec = {
4073 			.data = qrss_id,
4074 			.id = 0,
4075 		};
4076 		struct rte_flow_item q_items[] = {
4077 			{
4078 				.type = (enum rte_flow_item_type)
4079 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4080 				.spec = &q_tag_spec,
4081 				.last = NULL,
4082 				.mask = NULL,
4083 			},
4084 			{
4085 				.type = RTE_FLOW_ITEM_TYPE_END,
4086 			},
4087 		};
4088 		struct rte_flow_action q_actions[] = {
4089 			{
4090 				.type = qrss->type,
4091 				.conf = qrss->conf,
4092 			},
4093 			{
4094 				.type = RTE_FLOW_ACTION_TYPE_END,
4095 			},
4096 		};
4097 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
4098 
4099 		/*
4100 		 * Configure the tag item only if there is no meter subflow.
4101 		 * Since tag is already marked in the meter suffix subflow
4102 		 * we can just use the meter suffix items as is.
4103 		 */
4104 		if (qrss_id) {
4105 			/* Not meter subflow. */
4106 			MLX5_ASSERT(!mtr_sfx);
4107 			/*
4108 			 * Put unique id in prefix flow due to it is destroyed
4109 			 * after suffix flow and id will be freed after there
4110 			 * is no actual flows with this id and identifier
4111 			 * reallocation becomes possible (for example, for
4112 			 * other flows in other threads).
4113 			 */
4114 			dev_flow->handle->split_flow_id = qrss_id;
4115 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
4116 						   error);
4117 			if (ret < 0)
4118 				goto exit;
4119 			q_tag_spec.id = ret;
4120 		}
4121 		dev_flow = NULL;
4122 		/* Add suffix subflow to execute Q/RSS. */
4123 		ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
4124 					      &q_attr, mtr_sfx ? items :
4125 					      q_items, q_actions,
4126 					      external, flow_idx, error);
4127 		if (ret < 0)
4128 			goto exit;
4129 		/* qrss ID should be freed if failed. */
4130 		qrss_id = 0;
4131 		MLX5_ASSERT(dev_flow);
4132 	}
4133 
4134 exit:
4135 	/*
4136 	 * We do not destroy the partially created sub_flows in case of error.
4137 	 * These ones are included into parent flow list and will be destroyed
4138 	 * by flow_drv_destroy.
4139 	 */
4140 	flow_qrss_free_id(dev, qrss_id);
4141 	rte_free(ext_actions);
4142 	return ret;
4143 }
4144 
4145 /**
4146  * The splitting for meter feature.
4147  *
4148  * - The meter flow will be split to two flows as prefix and
4149  *   suffix flow. The packets make sense only it pass the prefix
4150  *   meter action.
4151  *
4152  * - Reg_C_5 is used for the packet to match betweend prefix and
4153  *   suffix flow.
4154  *
4155  * @param dev
4156  *   Pointer to Ethernet device.
4157  * @param[in] flow
4158  *   Parent flow structure pointer.
4159  * @param[in] attr
4160  *   Flow rule attributes.
4161  * @param[in] items
4162  *   Pattern specification (list terminated by the END pattern item).
4163  * @param[in] actions
4164  *   Associated actions (list terminated by the END action).
4165  * @param[in] external
4166  *   This flow rule is created by request external to PMD.
4167  * @param[in] flow_idx
4168  *   This memory pool index to the flow.
4169  * @param[out] error
4170  *   Perform verbose error reporting if not NULL.
4171  * @return
4172  *   0 on success, negative value otherwise
4173  */
4174 static int
4175 flow_create_split_meter(struct rte_eth_dev *dev,
4176 			   struct rte_flow *flow,
4177 			   const struct rte_flow_attr *attr,
4178 			   const struct rte_flow_item items[],
4179 			   const struct rte_flow_action actions[],
4180 			   bool external, uint32_t flow_idx,
4181 			   struct rte_flow_error *error)
4182 {
4183 	struct mlx5_priv *priv = dev->data->dev_private;
4184 	struct rte_flow_action *sfx_actions = NULL;
4185 	struct rte_flow_action *pre_actions = NULL;
4186 	struct rte_flow_item *sfx_items = NULL;
4187 	struct mlx5_flow *dev_flow = NULL;
4188 	struct rte_flow_attr sfx_attr = *attr;
4189 	uint32_t mtr = 0;
4190 	uint32_t mtr_tag_id = 0;
4191 	size_t act_size;
4192 	size_t item_size;
4193 	int actions_n = 0;
4194 	int ret;
4195 
4196 	if (priv->mtr_en)
4197 		actions_n = flow_check_meter_action(actions, &mtr);
4198 	if (mtr) {
4199 		/* The five prefix actions: meter, decap, encap, tag, end. */
4200 		act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
4201 			   sizeof(struct mlx5_rte_flow_action_set_tag);
4202 		/* tag, vlan, port id, end. */
4203 #define METER_SUFFIX_ITEM 4
4204 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
4205 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
4206 		sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0);
4207 		if (!sfx_actions)
4208 			return rte_flow_error_set(error, ENOMEM,
4209 						  RTE_FLOW_ERROR_TYPE_ACTION,
4210 						  NULL, "no memory to split "
4211 						  "meter flow");
4212 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
4213 			     act_size);
4214 		pre_actions = sfx_actions + actions_n;
4215 		mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
4216 						   actions, sfx_actions,
4217 						   pre_actions);
4218 		if (!mtr_tag_id) {
4219 			ret = -rte_errno;
4220 			goto exit;
4221 		}
4222 		/* Add the prefix subflow. */
4223 		ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
4224 					      items, pre_actions, external,
4225 					      flow_idx, error);
4226 		if (ret) {
4227 			ret = -rte_errno;
4228 			goto exit;
4229 		}
4230 		dev_flow->handle->split_flow_id = mtr_tag_id;
4231 		/* Setting the sfx group atrr. */
4232 		sfx_attr.group = sfx_attr.transfer ?
4233 				(MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4234 				 MLX5_FLOW_TABLE_LEVEL_SUFFIX;
4235 	}
4236 	/* Add the prefix subflow. */
4237 	ret = flow_create_split_metadata(dev, flow, dev_flow ?
4238 					 flow_get_prefix_layer_flags(dev_flow) :
4239 					 0, &sfx_attr,
4240 					 sfx_items ? sfx_items : items,
4241 					 sfx_actions ? sfx_actions : actions,
4242 					 external, flow_idx, error);
4243 exit:
4244 	if (sfx_actions)
4245 		rte_free(sfx_actions);
4246 	return ret;
4247 }
4248 
4249 /**
4250  * Split the flow to subflow set. The splitters might be linked
4251  * in the chain, like this:
4252  * flow_create_split_outer() calls:
4253  *   flow_create_split_meter() calls:
4254  *     flow_create_split_metadata(meter_subflow_0) calls:
4255  *       flow_create_split_inner(metadata_subflow_0)
4256  *       flow_create_split_inner(metadata_subflow_1)
4257  *       flow_create_split_inner(metadata_subflow_2)
4258  *     flow_create_split_metadata(meter_subflow_1) calls:
4259  *       flow_create_split_inner(metadata_subflow_0)
4260  *       flow_create_split_inner(metadata_subflow_1)
4261  *       flow_create_split_inner(metadata_subflow_2)
4262  *
4263  * This provide flexible way to add new levels of flow splitting.
4264  * The all of successfully created subflows are included to the
4265  * parent flow dev_flow list.
4266  *
4267  * @param dev
4268  *   Pointer to Ethernet device.
4269  * @param[in] flow
4270  *   Parent flow structure pointer.
4271  * @param[in] attr
4272  *   Flow rule attributes.
4273  * @param[in] items
4274  *   Pattern specification (list terminated by the END pattern item).
4275  * @param[in] actions
4276  *   Associated actions (list terminated by the END action).
4277  * @param[in] external
4278  *   This flow rule is created by request external to PMD.
4279  * @param[in] flow_idx
4280  *   This memory pool index to the flow.
4281  * @param[out] error
4282  *   Perform verbose error reporting if not NULL.
4283  * @return
4284  *   0 on success, negative value otherwise
4285  */
4286 static int
4287 flow_create_split_outer(struct rte_eth_dev *dev,
4288 			struct rte_flow *flow,
4289 			const struct rte_flow_attr *attr,
4290 			const struct rte_flow_item items[],
4291 			const struct rte_flow_action actions[],
4292 			bool external, uint32_t flow_idx,
4293 			struct rte_flow_error *error)
4294 {
4295 	int ret;
4296 
4297 	ret = flow_create_split_meter(dev, flow, attr, items,
4298 					 actions, external, flow_idx, error);
4299 	MLX5_ASSERT(ret <= 0);
4300 	return ret;
4301 }
4302 
4303 /**
4304  * Create a flow and add it to @p list.
4305  *
4306  * @param dev
4307  *   Pointer to Ethernet device.
4308  * @param list
4309  *   Pointer to a TAILQ flow list. If this parameter NULL,
4310  *   no list insertion occurred, flow is just created,
4311  *   this is caller's responsibility to track the
4312  *   created flow.
4313  * @param[in] attr
4314  *   Flow rule attributes.
4315  * @param[in] items
4316  *   Pattern specification (list terminated by the END pattern item).
4317  * @param[in] actions
4318  *   Associated actions (list terminated by the END action).
4319  * @param[in] external
4320  *   This flow rule is created by request external to PMD.
4321  * @param[out] error
4322  *   Perform verbose error reporting if not NULL.
4323  *
4324  * @return
4325  *   A flow index on success, 0 otherwise and rte_errno is set.
4326  */
4327 static uint32_t
4328 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
4329 		 const struct rte_flow_attr *attr,
4330 		 const struct rte_flow_item items[],
4331 		 const struct rte_flow_action actions[],
4332 		 bool external, struct rte_flow_error *error)
4333 {
4334 	struct mlx5_priv *priv = dev->data->dev_private;
4335 	struct rte_flow *flow = NULL;
4336 	struct mlx5_flow *dev_flow;
4337 	const struct rte_flow_action_rss *rss;
4338 	union {
4339 		struct rte_flow_expand_rss buf;
4340 		uint8_t buffer[2048];
4341 	} expand_buffer;
4342 	union {
4343 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4344 		uint8_t buffer[2048];
4345 	} actions_rx;
4346 	union {
4347 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4348 		uint8_t buffer[2048];
4349 	} actions_hairpin_tx;
4350 	union {
4351 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
4352 		uint8_t buffer[2048];
4353 	} items_tx;
4354 	struct rte_flow_expand_rss *buf = &expand_buffer.buf;
4355 	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
4356 					      priv->rss_desc)[!!priv->flow_idx];
4357 	const struct rte_flow_action *p_actions_rx = actions;
4358 	uint32_t i;
4359 	uint32_t idx = 0;
4360 	int hairpin_flow;
4361 	uint32_t hairpin_id = 0;
4362 	struct rte_flow_attr attr_tx = { .priority = 0 };
4363 	int ret;
4364 
4365 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4366 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
4367 				external, hairpin_flow, error);
4368 	if (ret < 0)
4369 		return 0;
4370 	if (hairpin_flow > 0) {
4371 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
4372 			rte_errno = EINVAL;
4373 			return 0;
4374 		}
4375 		flow_hairpin_split(dev, actions, actions_rx.actions,
4376 				   actions_hairpin_tx.actions, items_tx.items,
4377 				   &hairpin_id);
4378 		p_actions_rx = actions_rx.actions;
4379 	}
4380 	flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
4381 	if (!flow) {
4382 		rte_errno = ENOMEM;
4383 		goto error_before_flow;
4384 	}
4385 	flow->drv_type = flow_get_drv_type(dev, attr);
4386 	if (hairpin_id != 0)
4387 		flow->hairpin_flow_id = hairpin_id;
4388 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
4389 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
4390 	memset(rss_desc, 0, sizeof(*rss_desc));
4391 	rss = flow_get_rss_action(p_actions_rx);
4392 	if (rss) {
4393 		/*
4394 		 * The following information is required by
4395 		 * mlx5_flow_hashfields_adjust() in advance.
4396 		 */
4397 		rss_desc->level = rss->level;
4398 		/* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
4399 		rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
4400 	}
4401 	flow->dev_handles = 0;
4402 	if (rss && rss->types) {
4403 		unsigned int graph_root;
4404 
4405 		graph_root = find_graph_root(items, rss->level);
4406 		ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
4407 					  items, rss->types,
4408 					  mlx5_support_expansion,
4409 					  graph_root);
4410 		MLX5_ASSERT(ret > 0 &&
4411 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
4412 	} else {
4413 		buf->entries = 1;
4414 		buf->entry[0].pattern = (void *)(uintptr_t)items;
4415 	}
4416 	/*
4417 	 * Record the start index when there is a nested call. All sub-flows
4418 	 * need to be translated before another calling.
4419 	 * No need to use ping-pong buffer to save memory here.
4420 	 */
4421 	if (priv->flow_idx) {
4422 		MLX5_ASSERT(!priv->flow_nested_idx);
4423 		priv->flow_nested_idx = priv->flow_idx;
4424 	}
4425 	for (i = 0; i < buf->entries; ++i) {
4426 		/*
4427 		 * The splitter may create multiple dev_flows,
4428 		 * depending on configuration. In the simplest
4429 		 * case it just creates unmodified original flow.
4430 		 */
4431 		ret = flow_create_split_outer(dev, flow, attr,
4432 					      buf->entry[i].pattern,
4433 					      p_actions_rx, external, idx,
4434 					      error);
4435 		if (ret < 0)
4436 			goto error;
4437 	}
4438 	/* Create the tx flow. */
4439 	if (hairpin_flow) {
4440 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
4441 		attr_tx.ingress = 0;
4442 		attr_tx.egress = 1;
4443 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
4444 					 actions_hairpin_tx.actions,
4445 					 idx, error);
4446 		if (!dev_flow)
4447 			goto error;
4448 		dev_flow->flow = flow;
4449 		dev_flow->external = 0;
4450 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4451 			      dev_flow->handle, next);
4452 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
4453 					 items_tx.items,
4454 					 actions_hairpin_tx.actions, error);
4455 		if (ret < 0)
4456 			goto error;
4457 	}
4458 	/*
4459 	 * Update the metadata register copy table. If extensive
4460 	 * metadata feature is enabled and registers are supported
4461 	 * we might create the extra rte_flow for each unique
4462 	 * MARK/FLAG action ID.
4463 	 *
4464 	 * The table is updated for ingress Flows only, because
4465 	 * the egress Flows belong to the different device and
4466 	 * copy table should be updated in peer NIC Rx domain.
4467 	 */
4468 	if (attr->ingress &&
4469 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
4470 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
4471 		if (ret)
4472 			goto error;
4473 	}
4474 	/*
4475 	 * If the flow is external (from application) OR device is started, then
4476 	 * the flow will be applied immediately.
4477 	 */
4478 	if (external || dev->data->dev_started) {
4479 		ret = flow_drv_apply(dev, flow, error);
4480 		if (ret < 0)
4481 			goto error;
4482 	}
4483 	if (list)
4484 		ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
4485 			     flow, next);
4486 	flow_rxq_flags_set(dev, flow);
4487 	/* Nested flow creation index recovery. */
4488 	priv->flow_idx = priv->flow_nested_idx;
4489 	if (priv->flow_nested_idx)
4490 		priv->flow_nested_idx = 0;
4491 	return idx;
4492 error:
4493 	MLX5_ASSERT(flow);
4494 	ret = rte_errno; /* Save rte_errno before cleanup. */
4495 	flow_mreg_del_copy_action(dev, flow);
4496 	flow_drv_destroy(dev, flow);
4497 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
4498 	rte_errno = ret; /* Restore rte_errno. */
4499 error_before_flow:
4500 	ret = rte_errno;
4501 	if (hairpin_id)
4502 		mlx5_flow_id_release(priv->sh->flow_id_pool,
4503 				     hairpin_id);
4504 	rte_errno = ret;
4505 	priv->flow_idx = priv->flow_nested_idx;
4506 	if (priv->flow_nested_idx)
4507 		priv->flow_nested_idx = 0;
4508 	return 0;
4509 }
4510 
4511 /**
4512  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
4513  * incoming packets to table 1.
4514  *
4515  * Other flow rules, requested for group n, will be created in
4516  * e-switch table n+1.
4517  * Jump action to e-switch group n will be created to group n+1.
4518  *
4519  * Used when working in switchdev mode, to utilise advantages of table 1
4520  * and above.
4521  *
4522  * @param dev
4523  *   Pointer to Ethernet device.
4524  *
4525  * @return
4526  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
4527  */
4528 struct rte_flow *
4529 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
4530 {
4531 	const struct rte_flow_attr attr = {
4532 		.group = 0,
4533 		.priority = 0,
4534 		.ingress = 1,
4535 		.egress = 0,
4536 		.transfer = 1,
4537 	};
4538 	const struct rte_flow_item pattern = {
4539 		.type = RTE_FLOW_ITEM_TYPE_END,
4540 	};
4541 	struct rte_flow_action_jump jump = {
4542 		.group = 1,
4543 	};
4544 	const struct rte_flow_action actions[] = {
4545 		{
4546 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4547 			.conf = &jump,
4548 		},
4549 		{
4550 			.type = RTE_FLOW_ACTION_TYPE_END,
4551 		},
4552 	};
4553 	struct mlx5_priv *priv = dev->data->dev_private;
4554 	struct rte_flow_error error;
4555 
4556 	return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
4557 						   &attr, &pattern,
4558 						   actions, false, &error);
4559 }
4560 
4561 /**
4562  * Validate a flow supported by the NIC.
4563  *
4564  * @see rte_flow_validate()
4565  * @see rte_flow_ops
4566  */
4567 int
4568 mlx5_flow_validate(struct rte_eth_dev *dev,
4569 		   const struct rte_flow_attr *attr,
4570 		   const struct rte_flow_item items[],
4571 		   const struct rte_flow_action actions[],
4572 		   struct rte_flow_error *error)
4573 {
4574 	int hairpin_flow;
4575 
4576 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4577 	return flow_drv_validate(dev, attr, items, actions,
4578 				true, hairpin_flow, error);
4579 }
4580 
4581 /**
4582  * Create a flow.
4583  *
4584  * @see rte_flow_create()
4585  * @see rte_flow_ops
4586  */
4587 struct rte_flow *
4588 mlx5_flow_create(struct rte_eth_dev *dev,
4589 		 const struct rte_flow_attr *attr,
4590 		 const struct rte_flow_item items[],
4591 		 const struct rte_flow_action actions[],
4592 		 struct rte_flow_error *error)
4593 {
4594 	struct mlx5_priv *priv = dev->data->dev_private;
4595 
4596 	/*
4597 	 * If the device is not started yet, it is not allowed to created a
4598 	 * flow from application. PMD default flows and traffic control flows
4599 	 * are not affected.
4600 	 */
4601 	if (unlikely(!dev->data->dev_started)) {
4602 		DRV_LOG(DEBUG, "port %u is not started when "
4603 			"inserting a flow", dev->data->port_id);
4604 		rte_flow_error_set(error, ENODEV,
4605 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4606 				   NULL,
4607 				   "port not started");
4608 		return NULL;
4609 	}
4610 	return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
4611 				  attr, items, actions, true, error);
4612 }
4613 
4614 /**
4615  * Destroy a flow in a list.
4616  *
4617  * @param dev
4618  *   Pointer to Ethernet device.
4619  * @param list
4620  *   Pointer to the Indexed flow list. If this parameter NULL,
4621  *   there is no flow removal from the list. Be noted that as
4622  *   flow is add to the indexed list, memory of the indexed
4623  *   list points to maybe changed as flow destroyed.
4624  * @param[in] flow_idx
4625  *   Index of flow to destroy.
4626  */
4627 static void
4628 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
4629 		  uint32_t flow_idx)
4630 {
4631 	struct mlx5_priv *priv = dev->data->dev_private;
4632 	struct mlx5_fdir_flow *priv_fdir_flow = NULL;
4633 	struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
4634 					       [MLX5_IPOOL_RTE_FLOW], flow_idx);
4635 
4636 	if (!flow)
4637 		return;
4638 	/*
4639 	 * Update RX queue flags only if port is started, otherwise it is
4640 	 * already clean.
4641 	 */
4642 	if (dev->data->dev_started)
4643 		flow_rxq_flags_trim(dev, flow);
4644 	if (flow->hairpin_flow_id)
4645 		mlx5_flow_id_release(priv->sh->flow_id_pool,
4646 				     flow->hairpin_flow_id);
4647 	flow_drv_destroy(dev, flow);
4648 	if (list)
4649 		ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
4650 			     flow_idx, flow, next);
4651 	flow_mreg_del_copy_action(dev, flow);
4652 	if (flow->fdir) {
4653 		LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
4654 			if (priv_fdir_flow->rix_flow == flow_idx)
4655 				break;
4656 		}
4657 		if (priv_fdir_flow) {
4658 			LIST_REMOVE(priv_fdir_flow, next);
4659 			rte_free(priv_fdir_flow->fdir);
4660 			rte_free(priv_fdir_flow);
4661 		}
4662 	}
4663 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
4664 }
4665 
4666 /**
4667  * Destroy all flows.
4668  *
4669  * @param dev
4670  *   Pointer to Ethernet device.
4671  * @param list
4672  *   Pointer to the Indexed flow list.
4673  * @param active
4674  *   If flushing is called avtively.
4675  */
4676 void
4677 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
4678 {
4679 	uint32_t num_flushed = 0;
4680 
4681 	while (*list) {
4682 		flow_list_destroy(dev, list, *list);
4683 		num_flushed++;
4684 	}
4685 	if (active) {
4686 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
4687 			dev->data->port_id, num_flushed);
4688 	}
4689 }
4690 
4691 /**
4692  * Remove all flows.
4693  *
4694  * @param dev
4695  *   Pointer to Ethernet device.
4696  * @param list
4697  *   Pointer to the Indexed flow list.
4698  */
4699 void
4700 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list)
4701 {
4702 	struct mlx5_priv *priv = dev->data->dev_private;
4703 	struct rte_flow *flow = NULL;
4704 	uint32_t idx;
4705 
4706 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
4707 		      flow, next) {
4708 		flow_drv_remove(dev, flow);
4709 		flow_mreg_stop_copy_action(dev, flow);
4710 	}
4711 	flow_mreg_del_default_copy_action(dev);
4712 	flow_rxq_flags_clear(dev);
4713 }
4714 
4715 /**
4716  * Add all flows.
4717  *
4718  * @param dev
4719  *   Pointer to Ethernet device.
4720  * @param list
4721  *   Pointer to the Indexed flow list.
4722  *
4723  * @return
4724  *   0 on success, a negative errno value otherwise and rte_errno is set.
4725  */
4726 int
4727 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list)
4728 {
4729 	struct mlx5_priv *priv = dev->data->dev_private;
4730 	struct rte_flow *flow = NULL;
4731 	struct rte_flow_error error;
4732 	uint32_t idx;
4733 	int ret = 0;
4734 
4735 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4736 	ret = flow_mreg_add_default_copy_action(dev, &error);
4737 	if (ret < 0)
4738 		return -rte_errno;
4739 	/* Apply Flows created by application. */
4740 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
4741 		      flow, next) {
4742 		ret = flow_mreg_start_copy_action(dev, flow);
4743 		if (ret < 0)
4744 			goto error;
4745 		ret = flow_drv_apply(dev, flow, &error);
4746 		if (ret < 0)
4747 			goto error;
4748 		flow_rxq_flags_set(dev, flow);
4749 	}
4750 	return 0;
4751 error:
4752 	ret = rte_errno; /* Save rte_errno before cleanup. */
4753 	mlx5_flow_stop(dev, list);
4754 	rte_errno = ret; /* Restore rte_errno. */
4755 	return -rte_errno;
4756 }
4757 
4758 /**
4759  * Stop all default actions for flows.
4760  *
4761  * @param dev
4762  *   Pointer to Ethernet device.
4763  */
4764 void
4765 mlx5_flow_stop_default(struct rte_eth_dev *dev)
4766 {
4767 	flow_mreg_del_default_copy_action(dev);
4768 	flow_rxq_flags_clear(dev);
4769 }
4770 
4771 /**
4772  * Start all default actions for flows.
4773  *
4774  * @param dev
4775  *   Pointer to Ethernet device.
4776  * @return
4777  *   0 on success, a negative errno value otherwise and rte_errno is set.
4778  */
4779 int
4780 mlx5_flow_start_default(struct rte_eth_dev *dev)
4781 {
4782 	struct rte_flow_error error;
4783 
4784 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4785 	return flow_mreg_add_default_copy_action(dev, &error);
4786 }
4787 
4788 /**
4789  * Allocate intermediate resources for flow creation.
4790  *
4791  * @param dev
4792  *   Pointer to Ethernet device.
4793  */
4794 void
4795 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev)
4796 {
4797 	struct mlx5_priv *priv = dev->data->dev_private;
4798 
4799 	if (!priv->inter_flows) {
4800 		priv->inter_flows = rte_calloc(__func__, 1,
4801 				    MLX5_NUM_MAX_DEV_FLOWS *
4802 				    sizeof(struct mlx5_flow) +
4803 				    (sizeof(struct mlx5_flow_rss_desc) +
4804 				    sizeof(uint16_t) * UINT16_MAX) * 2, 0);
4805 		if (!priv->inter_flows) {
4806 			DRV_LOG(ERR, "can't allocate intermediate memory.");
4807 			return;
4808 		}
4809 	}
4810 	priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows)
4811 			 [MLX5_NUM_MAX_DEV_FLOWS];
4812 	/* Reset the index. */
4813 	priv->flow_idx = 0;
4814 	priv->flow_nested_idx = 0;
4815 }
4816 
4817 /**
4818  * Free intermediate resources for flows.
4819  *
4820  * @param dev
4821  *   Pointer to Ethernet device.
4822  */
4823 void
4824 mlx5_flow_free_intermediate(struct rte_eth_dev *dev)
4825 {
4826 	struct mlx5_priv *priv = dev->data->dev_private;
4827 
4828 	rte_free(priv->inter_flows);
4829 	priv->inter_flows = NULL;
4830 }
4831 
4832 /**
4833  * Verify the flow list is empty
4834  *
4835  * @param dev
4836  *  Pointer to Ethernet device.
4837  *
4838  * @return the number of flows not released.
4839  */
4840 int
4841 mlx5_flow_verify(struct rte_eth_dev *dev)
4842 {
4843 	struct mlx5_priv *priv = dev->data->dev_private;
4844 	struct rte_flow *flow;
4845 	uint32_t idx;
4846 	int ret = 0;
4847 
4848 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
4849 		      flow, next) {
4850 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
4851 			dev->data->port_id, (void *)flow);
4852 		++ret;
4853 	}
4854 	return ret;
4855 }
4856 
4857 /**
4858  * Enable default hairpin egress flow.
4859  *
4860  * @param dev
4861  *   Pointer to Ethernet device.
4862  * @param queue
4863  *   The queue index.
4864  *
4865  * @return
4866  *   0 on success, a negative errno value otherwise and rte_errno is set.
4867  */
4868 int
4869 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
4870 			    uint32_t queue)
4871 {
4872 	struct mlx5_priv *priv = dev->data->dev_private;
4873 	const struct rte_flow_attr attr = {
4874 		.egress = 1,
4875 		.priority = 0,
4876 	};
4877 	struct mlx5_rte_flow_item_tx_queue queue_spec = {
4878 		.queue = queue,
4879 	};
4880 	struct mlx5_rte_flow_item_tx_queue queue_mask = {
4881 		.queue = UINT32_MAX,
4882 	};
4883 	struct rte_flow_item items[] = {
4884 		{
4885 			.type = (enum rte_flow_item_type)
4886 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
4887 			.spec = &queue_spec,
4888 			.last = NULL,
4889 			.mask = &queue_mask,
4890 		},
4891 		{
4892 			.type = RTE_FLOW_ITEM_TYPE_END,
4893 		},
4894 	};
4895 	struct rte_flow_action_jump jump = {
4896 		.group = MLX5_HAIRPIN_TX_TABLE,
4897 	};
4898 	struct rte_flow_action actions[2];
4899 	uint32_t flow_idx;
4900 	struct rte_flow_error error;
4901 
4902 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
4903 	actions[0].conf = &jump;
4904 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
4905 	flow_idx = flow_list_create(dev, &priv->ctrl_flows,
4906 				&attr, items, actions, false, &error);
4907 	if (!flow_idx) {
4908 		DRV_LOG(DEBUG,
4909 			"Failed to create ctrl flow: rte_errno(%d),"
4910 			" type(%d), message(%s)",
4911 			rte_errno, error.type,
4912 			error.message ? error.message : " (no stated reason)");
4913 		return -rte_errno;
4914 	}
4915 	return 0;
4916 }
4917 
4918 /**
4919  * Enable a control flow configured from the control plane.
4920  *
4921  * @param dev
4922  *   Pointer to Ethernet device.
4923  * @param eth_spec
4924  *   An Ethernet flow spec to apply.
4925  * @param eth_mask
4926  *   An Ethernet flow mask to apply.
4927  * @param vlan_spec
4928  *   A VLAN flow spec to apply.
4929  * @param vlan_mask
4930  *   A VLAN flow mask to apply.
4931  *
4932  * @return
4933  *   0 on success, a negative errno value otherwise and rte_errno is set.
4934  */
4935 int
4936 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
4937 		    struct rte_flow_item_eth *eth_spec,
4938 		    struct rte_flow_item_eth *eth_mask,
4939 		    struct rte_flow_item_vlan *vlan_spec,
4940 		    struct rte_flow_item_vlan *vlan_mask)
4941 {
4942 	struct mlx5_priv *priv = dev->data->dev_private;
4943 	const struct rte_flow_attr attr = {
4944 		.ingress = 1,
4945 		.priority = MLX5_FLOW_PRIO_RSVD,
4946 	};
4947 	struct rte_flow_item items[] = {
4948 		{
4949 			.type = RTE_FLOW_ITEM_TYPE_ETH,
4950 			.spec = eth_spec,
4951 			.last = NULL,
4952 			.mask = eth_mask,
4953 		},
4954 		{
4955 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
4956 					      RTE_FLOW_ITEM_TYPE_END,
4957 			.spec = vlan_spec,
4958 			.last = NULL,
4959 			.mask = vlan_mask,
4960 		},
4961 		{
4962 			.type = RTE_FLOW_ITEM_TYPE_END,
4963 		},
4964 	};
4965 	uint16_t queue[priv->reta_idx_n];
4966 	struct rte_flow_action_rss action_rss = {
4967 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
4968 		.level = 0,
4969 		.types = priv->rss_conf.rss_hf,
4970 		.key_len = priv->rss_conf.rss_key_len,
4971 		.queue_num = priv->reta_idx_n,
4972 		.key = priv->rss_conf.rss_key,
4973 		.queue = queue,
4974 	};
4975 	struct rte_flow_action actions[] = {
4976 		{
4977 			.type = RTE_FLOW_ACTION_TYPE_RSS,
4978 			.conf = &action_rss,
4979 		},
4980 		{
4981 			.type = RTE_FLOW_ACTION_TYPE_END,
4982 		},
4983 	};
4984 	uint32_t flow_idx;
4985 	struct rte_flow_error error;
4986 	unsigned int i;
4987 
4988 	if (!priv->reta_idx_n || !priv->rxqs_n) {
4989 		return 0;
4990 	}
4991 	if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
4992 		action_rss.types = 0;
4993 	for (i = 0; i != priv->reta_idx_n; ++i)
4994 		queue[i] = (*priv->reta_idx)[i];
4995 	flow_idx = flow_list_create(dev, &priv->ctrl_flows,
4996 				&attr, items, actions, false, &error);
4997 	if (!flow_idx)
4998 		return -rte_errno;
4999 	return 0;
5000 }
5001 
5002 /**
5003  * Enable a flow control configured from the control plane.
5004  *
5005  * @param dev
5006  *   Pointer to Ethernet device.
5007  * @param eth_spec
5008  *   An Ethernet flow spec to apply.
5009  * @param eth_mask
5010  *   An Ethernet flow mask to apply.
5011  *
5012  * @return
5013  *   0 on success, a negative errno value otherwise and rte_errno is set.
5014  */
5015 int
5016 mlx5_ctrl_flow(struct rte_eth_dev *dev,
5017 	       struct rte_flow_item_eth *eth_spec,
5018 	       struct rte_flow_item_eth *eth_mask)
5019 {
5020 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
5021 }
5022 
5023 /**
5024  * Create default miss flow rule matching lacp traffic
5025  *
5026  * @param dev
5027  *   Pointer to Ethernet device.
5028  * @param eth_spec
5029  *   An Ethernet flow spec to apply.
5030  *
5031  * @return
5032  *   0 on success, a negative errno value otherwise and rte_errno is set.
5033  */
5034 int
5035 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
5036 {
5037 	struct mlx5_priv *priv = dev->data->dev_private;
5038 	/*
5039 	 * The LACP matching is done by only using ether type since using
5040 	 * a multicast dst mac causes kernel to give low priority to this flow.
5041 	 */
5042 	static const struct rte_flow_item_eth lacp_spec = {
5043 		.type = RTE_BE16(0x8809),
5044 	};
5045 	static const struct rte_flow_item_eth lacp_mask = {
5046 		.type = 0xffff,
5047 	};
5048 	const struct rte_flow_attr attr = {
5049 		.ingress = 1,
5050 	};
5051 	struct rte_flow_item items[] = {
5052 		{
5053 			.type = RTE_FLOW_ITEM_TYPE_ETH,
5054 			.spec = &lacp_spec,
5055 			.mask = &lacp_mask,
5056 		},
5057 		{
5058 			.type = RTE_FLOW_ITEM_TYPE_END,
5059 		},
5060 	};
5061 	struct rte_flow_action actions[] = {
5062 		{
5063 			.type = (enum rte_flow_action_type)
5064 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
5065 		},
5066 		{
5067 			.type = RTE_FLOW_ACTION_TYPE_END,
5068 		},
5069 	};
5070 	struct rte_flow_error error;
5071 	uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5072 				&attr, items, actions, false, &error);
5073 
5074 	if (!flow_idx)
5075 		return -rte_errno;
5076 	return 0;
5077 }
5078 
5079 /**
5080  * Destroy a flow.
5081  *
5082  * @see rte_flow_destroy()
5083  * @see rte_flow_ops
5084  */
5085 int
5086 mlx5_flow_destroy(struct rte_eth_dev *dev,
5087 		  struct rte_flow *flow,
5088 		  struct rte_flow_error *error __rte_unused)
5089 {
5090 	struct mlx5_priv *priv = dev->data->dev_private;
5091 
5092 	flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
5093 	return 0;
5094 }
5095 
5096 /**
5097  * Destroy all flows.
5098  *
5099  * @see rte_flow_flush()
5100  * @see rte_flow_ops
5101  */
5102 int
5103 mlx5_flow_flush(struct rte_eth_dev *dev,
5104 		struct rte_flow_error *error __rte_unused)
5105 {
5106 	struct mlx5_priv *priv = dev->data->dev_private;
5107 
5108 	mlx5_flow_list_flush(dev, &priv->flows, false);
5109 	return 0;
5110 }
5111 
5112 /**
5113  * Isolated mode.
5114  *
5115  * @see rte_flow_isolate()
5116  * @see rte_flow_ops
5117  */
5118 int
5119 mlx5_flow_isolate(struct rte_eth_dev *dev,
5120 		  int enable,
5121 		  struct rte_flow_error *error)
5122 {
5123 	struct mlx5_priv *priv = dev->data->dev_private;
5124 
5125 	if (dev->data->dev_started) {
5126 		rte_flow_error_set(error, EBUSY,
5127 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5128 				   NULL,
5129 				   "port must be stopped first");
5130 		return -rte_errno;
5131 	}
5132 	priv->isolated = !!enable;
5133 	if (enable)
5134 		dev->dev_ops = &mlx5_os_dev_ops_isolate;
5135 	else
5136 		dev->dev_ops = &mlx5_os_dev_ops;
5137 	return 0;
5138 }
5139 
5140 /**
5141  * Query a flow.
5142  *
5143  * @see rte_flow_query()
5144  * @see rte_flow_ops
5145  */
5146 static int
5147 flow_drv_query(struct rte_eth_dev *dev,
5148 	       uint32_t flow_idx,
5149 	       const struct rte_flow_action *actions,
5150 	       void *data,
5151 	       struct rte_flow_error *error)
5152 {
5153 	struct mlx5_priv *priv = dev->data->dev_private;
5154 	const struct mlx5_flow_driver_ops *fops;
5155 	struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5156 					       [MLX5_IPOOL_RTE_FLOW],
5157 					       flow_idx);
5158 	enum mlx5_flow_drv_type ftype;
5159 
5160 	if (!flow) {
5161 		return rte_flow_error_set(error, ENOENT,
5162 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5163 			  NULL,
5164 			  "invalid flow handle");
5165 	}
5166 	ftype = flow->drv_type;
5167 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
5168 	fops = flow_get_drv_ops(ftype);
5169 
5170 	return fops->query(dev, flow, actions, data, error);
5171 }
5172 
5173 /**
5174  * Query a flow.
5175  *
5176  * @see rte_flow_query()
5177  * @see rte_flow_ops
5178  */
5179 int
5180 mlx5_flow_query(struct rte_eth_dev *dev,
5181 		struct rte_flow *flow,
5182 		const struct rte_flow_action *actions,
5183 		void *data,
5184 		struct rte_flow_error *error)
5185 {
5186 	int ret;
5187 
5188 	ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
5189 			     error);
5190 	if (ret < 0)
5191 		return ret;
5192 	return 0;
5193 }
5194 
5195 /**
5196  * Convert a flow director filter to a generic flow.
5197  *
5198  * @param dev
5199  *   Pointer to Ethernet device.
5200  * @param fdir_filter
5201  *   Flow director filter to add.
5202  * @param attributes
5203  *   Generic flow parameters structure.
5204  *
5205  * @return
5206  *   0 on success, a negative errno value otherwise and rte_errno is set.
5207  */
5208 static int
5209 flow_fdir_filter_convert(struct rte_eth_dev *dev,
5210 			 const struct rte_eth_fdir_filter *fdir_filter,
5211 			 struct mlx5_fdir *attributes)
5212 {
5213 	struct mlx5_priv *priv = dev->data->dev_private;
5214 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
5215 	const struct rte_eth_fdir_masks *mask =
5216 		&dev->data->dev_conf.fdir_conf.mask;
5217 
5218 	/* Validate queue number. */
5219 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
5220 		DRV_LOG(ERR, "port %u invalid queue number %d",
5221 			dev->data->port_id, fdir_filter->action.rx_queue);
5222 		rte_errno = EINVAL;
5223 		return -rte_errno;
5224 	}
5225 	attributes->attr.ingress = 1;
5226 	attributes->items[0] = (struct rte_flow_item) {
5227 		.type = RTE_FLOW_ITEM_TYPE_ETH,
5228 		.spec = &attributes->l2,
5229 		.mask = &attributes->l2_mask,
5230 	};
5231 	switch (fdir_filter->action.behavior) {
5232 	case RTE_ETH_FDIR_ACCEPT:
5233 		attributes->actions[0] = (struct rte_flow_action){
5234 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
5235 			.conf = &attributes->queue,
5236 		};
5237 		break;
5238 	case RTE_ETH_FDIR_REJECT:
5239 		attributes->actions[0] = (struct rte_flow_action){
5240 			.type = RTE_FLOW_ACTION_TYPE_DROP,
5241 		};
5242 		break;
5243 	default:
5244 		DRV_LOG(ERR, "port %u invalid behavior %d",
5245 			dev->data->port_id,
5246 			fdir_filter->action.behavior);
5247 		rte_errno = ENOTSUP;
5248 		return -rte_errno;
5249 	}
5250 	attributes->queue.index = fdir_filter->action.rx_queue;
5251 	/* Handle L3. */
5252 	switch (fdir_filter->input.flow_type) {
5253 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5254 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5255 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5256 		attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
5257 			.src_addr = input->flow.ip4_flow.src_ip,
5258 			.dst_addr = input->flow.ip4_flow.dst_ip,
5259 			.time_to_live = input->flow.ip4_flow.ttl,
5260 			.type_of_service = input->flow.ip4_flow.tos,
5261 		};
5262 		attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
5263 			.src_addr = mask->ipv4_mask.src_ip,
5264 			.dst_addr = mask->ipv4_mask.dst_ip,
5265 			.time_to_live = mask->ipv4_mask.ttl,
5266 			.type_of_service = mask->ipv4_mask.tos,
5267 			.next_proto_id = mask->ipv4_mask.proto,
5268 		};
5269 		attributes->items[1] = (struct rte_flow_item){
5270 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
5271 			.spec = &attributes->l3,
5272 			.mask = &attributes->l3_mask,
5273 		};
5274 		break;
5275 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5276 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5277 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5278 		attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
5279 			.hop_limits = input->flow.ipv6_flow.hop_limits,
5280 			.proto = input->flow.ipv6_flow.proto,
5281 		};
5282 
5283 		memcpy(attributes->l3.ipv6.hdr.src_addr,
5284 		       input->flow.ipv6_flow.src_ip,
5285 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5286 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
5287 		       input->flow.ipv6_flow.dst_ip,
5288 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5289 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
5290 		       mask->ipv6_mask.src_ip,
5291 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5292 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
5293 		       mask->ipv6_mask.dst_ip,
5294 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5295 		attributes->items[1] = (struct rte_flow_item){
5296 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
5297 			.spec = &attributes->l3,
5298 			.mask = &attributes->l3_mask,
5299 		};
5300 		break;
5301 	default:
5302 		DRV_LOG(ERR, "port %u invalid flow type%d",
5303 			dev->data->port_id, fdir_filter->input.flow_type);
5304 		rte_errno = ENOTSUP;
5305 		return -rte_errno;
5306 	}
5307 	/* Handle L4. */
5308 	switch (fdir_filter->input.flow_type) {
5309 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5310 		attributes->l4.udp.hdr = (struct rte_udp_hdr){
5311 			.src_port = input->flow.udp4_flow.src_port,
5312 			.dst_port = input->flow.udp4_flow.dst_port,
5313 		};
5314 		attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5315 			.src_port = mask->src_port_mask,
5316 			.dst_port = mask->dst_port_mask,
5317 		};
5318 		attributes->items[2] = (struct rte_flow_item){
5319 			.type = RTE_FLOW_ITEM_TYPE_UDP,
5320 			.spec = &attributes->l4,
5321 			.mask = &attributes->l4_mask,
5322 		};
5323 		break;
5324 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5325 		attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5326 			.src_port = input->flow.tcp4_flow.src_port,
5327 			.dst_port = input->flow.tcp4_flow.dst_port,
5328 		};
5329 		attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5330 			.src_port = mask->src_port_mask,
5331 			.dst_port = mask->dst_port_mask,
5332 		};
5333 		attributes->items[2] = (struct rte_flow_item){
5334 			.type = RTE_FLOW_ITEM_TYPE_TCP,
5335 			.spec = &attributes->l4,
5336 			.mask = &attributes->l4_mask,
5337 		};
5338 		break;
5339 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5340 		attributes->l4.udp.hdr = (struct rte_udp_hdr){
5341 			.src_port = input->flow.udp6_flow.src_port,
5342 			.dst_port = input->flow.udp6_flow.dst_port,
5343 		};
5344 		attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5345 			.src_port = mask->src_port_mask,
5346 			.dst_port = mask->dst_port_mask,
5347 		};
5348 		attributes->items[2] = (struct rte_flow_item){
5349 			.type = RTE_FLOW_ITEM_TYPE_UDP,
5350 			.spec = &attributes->l4,
5351 			.mask = &attributes->l4_mask,
5352 		};
5353 		break;
5354 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5355 		attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5356 			.src_port = input->flow.tcp6_flow.src_port,
5357 			.dst_port = input->flow.tcp6_flow.dst_port,
5358 		};
5359 		attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5360 			.src_port = mask->src_port_mask,
5361 			.dst_port = mask->dst_port_mask,
5362 		};
5363 		attributes->items[2] = (struct rte_flow_item){
5364 			.type = RTE_FLOW_ITEM_TYPE_TCP,
5365 			.spec = &attributes->l4,
5366 			.mask = &attributes->l4_mask,
5367 		};
5368 		break;
5369 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5370 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5371 		break;
5372 	default:
5373 		DRV_LOG(ERR, "port %u invalid flow type%d",
5374 			dev->data->port_id, fdir_filter->input.flow_type);
5375 		rte_errno = ENOTSUP;
5376 		return -rte_errno;
5377 	}
5378 	return 0;
5379 }
5380 
5381 #define FLOW_FDIR_CMP(f1, f2, fld) \
5382 	memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
5383 
5384 /**
5385  * Compare two FDIR flows. If items and actions are identical, the two flows are
5386  * regarded as same.
5387  *
5388  * @param dev
5389  *   Pointer to Ethernet device.
5390  * @param f1
5391  *   FDIR flow to compare.
5392  * @param f2
5393  *   FDIR flow to compare.
5394  *
5395  * @return
5396  *   Zero on match, 1 otherwise.
5397  */
5398 static int
5399 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
5400 {
5401 	if (FLOW_FDIR_CMP(f1, f2, attr) ||
5402 	    FLOW_FDIR_CMP(f1, f2, l2) ||
5403 	    FLOW_FDIR_CMP(f1, f2, l2_mask) ||
5404 	    FLOW_FDIR_CMP(f1, f2, l3) ||
5405 	    FLOW_FDIR_CMP(f1, f2, l3_mask) ||
5406 	    FLOW_FDIR_CMP(f1, f2, l4) ||
5407 	    FLOW_FDIR_CMP(f1, f2, l4_mask) ||
5408 	    FLOW_FDIR_CMP(f1, f2, actions[0].type))
5409 		return 1;
5410 	if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
5411 	    FLOW_FDIR_CMP(f1, f2, queue))
5412 		return 1;
5413 	return 0;
5414 }
5415 
5416 /**
5417  * Search device flow list to find out a matched FDIR flow.
5418  *
5419  * @param dev
5420  *   Pointer to Ethernet device.
5421  * @param fdir_flow
5422  *   FDIR flow to lookup.
5423  *
5424  * @return
5425  *   Index of flow if found, 0 otherwise.
5426  */
5427 static uint32_t
5428 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
5429 {
5430 	struct mlx5_priv *priv = dev->data->dev_private;
5431 	uint32_t flow_idx = 0;
5432 	struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5433 
5434 	MLX5_ASSERT(fdir_flow);
5435 	LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5436 		if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) {
5437 			DRV_LOG(DEBUG, "port %u found FDIR flow %u",
5438 				dev->data->port_id, flow_idx);
5439 			flow_idx = priv_fdir_flow->rix_flow;
5440 			break;
5441 		}
5442 	}
5443 	return flow_idx;
5444 }
5445 
5446 /**
5447  * Add new flow director filter and store it in list.
5448  *
5449  * @param dev
5450  *   Pointer to Ethernet device.
5451  * @param fdir_filter
5452  *   Flow director filter to add.
5453  *
5454  * @return
5455  *   0 on success, a negative errno value otherwise and rte_errno is set.
5456  */
5457 static int
5458 flow_fdir_filter_add(struct rte_eth_dev *dev,
5459 		     const struct rte_eth_fdir_filter *fdir_filter)
5460 {
5461 	struct mlx5_priv *priv = dev->data->dev_private;
5462 	struct mlx5_fdir *fdir_flow;
5463 	struct rte_flow *flow;
5464 	struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5465 	uint32_t flow_idx;
5466 	int ret;
5467 
5468 	fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0);
5469 	if (!fdir_flow) {
5470 		rte_errno = ENOMEM;
5471 		return -rte_errno;
5472 	}
5473 	ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
5474 	if (ret)
5475 		goto error;
5476 	flow_idx = flow_fdir_filter_lookup(dev, fdir_flow);
5477 	if (flow_idx) {
5478 		rte_errno = EEXIST;
5479 		goto error;
5480 	}
5481 	priv_fdir_flow = rte_zmalloc(__func__, sizeof(struct mlx5_fdir_flow),
5482 				     0);
5483 	if (!priv_fdir_flow) {
5484 		rte_errno = ENOMEM;
5485 		goto error;
5486 	}
5487 	flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
5488 				    fdir_flow->items, fdir_flow->actions, true,
5489 				    NULL);
5490 	flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5491 	if (!flow)
5492 		goto error;
5493 	flow->fdir = 1;
5494 	priv_fdir_flow->fdir = fdir_flow;
5495 	priv_fdir_flow->rix_flow = flow_idx;
5496 	LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next);
5497 	DRV_LOG(DEBUG, "port %u created FDIR flow %p",
5498 		dev->data->port_id, (void *)flow);
5499 	return 0;
5500 error:
5501 	rte_free(priv_fdir_flow);
5502 	rte_free(fdir_flow);
5503 	return -rte_errno;
5504 }
5505 
5506 /**
5507  * Delete specific filter.
5508  *
5509  * @param dev
5510  *   Pointer to Ethernet device.
5511  * @param fdir_filter
5512  *   Filter to be deleted.
5513  *
5514  * @return
5515  *   0 on success, a negative errno value otherwise and rte_errno is set.
5516  */
5517 static int
5518 flow_fdir_filter_delete(struct rte_eth_dev *dev,
5519 			const struct rte_eth_fdir_filter *fdir_filter)
5520 {
5521 	struct mlx5_priv *priv = dev->data->dev_private;
5522 	uint32_t flow_idx;
5523 	struct mlx5_fdir fdir_flow = {
5524 		.attr.group = 0,
5525 	};
5526 	struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5527 	int ret;
5528 
5529 	ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
5530 	if (ret)
5531 		return -rte_errno;
5532 	LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5533 		/* Find the fdir in priv list */
5534 		if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow))
5535 			break;
5536 	}
5537 	if (!priv_fdir_flow)
5538 		return 0;
5539 	LIST_REMOVE(priv_fdir_flow, next);
5540 	flow_idx = priv_fdir_flow->rix_flow;
5541 	flow_list_destroy(dev, &priv->flows, flow_idx);
5542 	rte_free(priv_fdir_flow->fdir);
5543 	rte_free(priv_fdir_flow);
5544 	DRV_LOG(DEBUG, "port %u deleted FDIR flow %u",
5545 		dev->data->port_id, flow_idx);
5546 	return 0;
5547 }
5548 
5549 /**
5550  * Update queue for specific filter.
5551  *
5552  * @param dev
5553  *   Pointer to Ethernet device.
5554  * @param fdir_filter
5555  *   Filter to be updated.
5556  *
5557  * @return
5558  *   0 on success, a negative errno value otherwise and rte_errno is set.
5559  */
5560 static int
5561 flow_fdir_filter_update(struct rte_eth_dev *dev,
5562 			const struct rte_eth_fdir_filter *fdir_filter)
5563 {
5564 	int ret;
5565 
5566 	ret = flow_fdir_filter_delete(dev, fdir_filter);
5567 	if (ret)
5568 		return ret;
5569 	return flow_fdir_filter_add(dev, fdir_filter);
5570 }
5571 
5572 /**
5573  * Flush all filters.
5574  *
5575  * @param dev
5576  *   Pointer to Ethernet device.
5577  */
5578 static void
5579 flow_fdir_filter_flush(struct rte_eth_dev *dev)
5580 {
5581 	struct mlx5_priv *priv = dev->data->dev_private;
5582 	struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5583 
5584 	while (!LIST_EMPTY(&priv->fdir_flows)) {
5585 		priv_fdir_flow = LIST_FIRST(&priv->fdir_flows);
5586 		LIST_REMOVE(priv_fdir_flow, next);
5587 		flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow);
5588 		rte_free(priv_fdir_flow->fdir);
5589 		rte_free(priv_fdir_flow);
5590 	}
5591 }
5592 
5593 /**
5594  * Get flow director information.
5595  *
5596  * @param dev
5597  *   Pointer to Ethernet device.
5598  * @param[out] fdir_info
5599  *   Resulting flow director information.
5600  */
5601 static void
5602 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
5603 {
5604 	struct rte_eth_fdir_masks *mask =
5605 		&dev->data->dev_conf.fdir_conf.mask;
5606 
5607 	fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
5608 	fdir_info->guarant_spc = 0;
5609 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
5610 	fdir_info->max_flexpayload = 0;
5611 	fdir_info->flow_types_mask[0] = 0;
5612 	fdir_info->flex_payload_unit = 0;
5613 	fdir_info->max_flex_payload_segment_num = 0;
5614 	fdir_info->flex_payload_limit = 0;
5615 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
5616 }
5617 
5618 /**
5619  * Deal with flow director operations.
5620  *
5621  * @param dev
5622  *   Pointer to Ethernet device.
5623  * @param filter_op
5624  *   Operation to perform.
5625  * @param arg
5626  *   Pointer to operation-specific structure.
5627  *
5628  * @return
5629  *   0 on success, a negative errno value otherwise and rte_errno is set.
5630  */
5631 static int
5632 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
5633 		    void *arg)
5634 {
5635 	enum rte_fdir_mode fdir_mode =
5636 		dev->data->dev_conf.fdir_conf.mode;
5637 
5638 	if (filter_op == RTE_ETH_FILTER_NOP)
5639 		return 0;
5640 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
5641 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
5642 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
5643 			dev->data->port_id, fdir_mode);
5644 		rte_errno = EINVAL;
5645 		return -rte_errno;
5646 	}
5647 	switch (filter_op) {
5648 	case RTE_ETH_FILTER_ADD:
5649 		return flow_fdir_filter_add(dev, arg);
5650 	case RTE_ETH_FILTER_UPDATE:
5651 		return flow_fdir_filter_update(dev, arg);
5652 	case RTE_ETH_FILTER_DELETE:
5653 		return flow_fdir_filter_delete(dev, arg);
5654 	case RTE_ETH_FILTER_FLUSH:
5655 		flow_fdir_filter_flush(dev);
5656 		break;
5657 	case RTE_ETH_FILTER_INFO:
5658 		flow_fdir_info_get(dev, arg);
5659 		break;
5660 	default:
5661 		DRV_LOG(DEBUG, "port %u unknown operation %u",
5662 			dev->data->port_id, filter_op);
5663 		rte_errno = EINVAL;
5664 		return -rte_errno;
5665 	}
5666 	return 0;
5667 }
5668 
5669 /**
5670  * Manage filter operations.
5671  *
5672  * @param dev
5673  *   Pointer to Ethernet device structure.
5674  * @param filter_type
5675  *   Filter type.
5676  * @param filter_op
5677  *   Operation to perform.
5678  * @param arg
5679  *   Pointer to operation-specific structure.
5680  *
5681  * @return
5682  *   0 on success, a negative errno value otherwise and rte_errno is set.
5683  */
5684 int
5685 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
5686 		     enum rte_filter_type filter_type,
5687 		     enum rte_filter_op filter_op,
5688 		     void *arg)
5689 {
5690 	switch (filter_type) {
5691 	case RTE_ETH_FILTER_GENERIC:
5692 		if (filter_op != RTE_ETH_FILTER_GET) {
5693 			rte_errno = EINVAL;
5694 			return -rte_errno;
5695 		}
5696 		*(const void **)arg = &mlx5_flow_ops;
5697 		return 0;
5698 	case RTE_ETH_FILTER_FDIR:
5699 		return flow_fdir_ctrl_func(dev, filter_op, arg);
5700 	default:
5701 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
5702 			dev->data->port_id, filter_type);
5703 		rte_errno = ENOTSUP;
5704 		return -rte_errno;
5705 	}
5706 	return 0;
5707 }
5708 
5709 /**
5710  * Create the needed meter and suffix tables.
5711  *
5712  * @param[in] dev
5713  *   Pointer to Ethernet device.
5714  * @param[in] fm
5715  *   Pointer to the flow meter.
5716  *
5717  * @return
5718  *   Pointer to table set on success, NULL otherwise.
5719  */
5720 struct mlx5_meter_domains_infos *
5721 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
5722 			  const struct mlx5_flow_meter *fm)
5723 {
5724 	const struct mlx5_flow_driver_ops *fops;
5725 
5726 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5727 	return fops->create_mtr_tbls(dev, fm);
5728 }
5729 
5730 /**
5731  * Destroy the meter table set.
5732  *
5733  * @param[in] dev
5734  *   Pointer to Ethernet device.
5735  * @param[in] tbl
5736  *   Pointer to the meter table set.
5737  *
5738  * @return
5739  *   0 on success.
5740  */
5741 int
5742 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
5743 			   struct mlx5_meter_domains_infos *tbls)
5744 {
5745 	const struct mlx5_flow_driver_ops *fops;
5746 
5747 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5748 	return fops->destroy_mtr_tbls(dev, tbls);
5749 }
5750 
5751 /**
5752  * Create policer rules.
5753  *
5754  * @param[in] dev
5755  *   Pointer to Ethernet device.
5756  * @param[in] fm
5757  *   Pointer to flow meter structure.
5758  * @param[in] attr
5759  *   Pointer to flow attributes.
5760  *
5761  * @return
5762  *   0 on success, -1 otherwise.
5763  */
5764 int
5765 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
5766 			       struct mlx5_flow_meter *fm,
5767 			       const struct rte_flow_attr *attr)
5768 {
5769 	const struct mlx5_flow_driver_ops *fops;
5770 
5771 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5772 	return fops->create_policer_rules(dev, fm, attr);
5773 }
5774 
5775 /**
5776  * Destroy policer rules.
5777  *
5778  * @param[in] fm
5779  *   Pointer to flow meter structure.
5780  * @param[in] attr
5781  *   Pointer to flow attributes.
5782  *
5783  * @return
5784  *   0 on success, -1 otherwise.
5785  */
5786 int
5787 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
5788 				struct mlx5_flow_meter *fm,
5789 				const struct rte_flow_attr *attr)
5790 {
5791 	const struct mlx5_flow_driver_ops *fops;
5792 
5793 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5794 	return fops->destroy_policer_rules(dev, fm, attr);
5795 }
5796 
5797 /**
5798  * Allocate a counter.
5799  *
5800  * @param[in] dev
5801  *   Pointer to Ethernet device structure.
5802  *
5803  * @return
5804  *   Index to allocated counter  on success, 0 otherwise.
5805  */
5806 uint32_t
5807 mlx5_counter_alloc(struct rte_eth_dev *dev)
5808 {
5809 	const struct mlx5_flow_driver_ops *fops;
5810 	struct rte_flow_attr attr = { .transfer = 0 };
5811 
5812 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5813 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5814 		return fops->counter_alloc(dev);
5815 	}
5816 	DRV_LOG(ERR,
5817 		"port %u counter allocate is not supported.",
5818 		 dev->data->port_id);
5819 	return 0;
5820 }
5821 
5822 /**
5823  * Free a counter.
5824  *
5825  * @param[in] dev
5826  *   Pointer to Ethernet device structure.
5827  * @param[in] cnt
5828  *   Index to counter to be free.
5829  */
5830 void
5831 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
5832 {
5833 	const struct mlx5_flow_driver_ops *fops;
5834 	struct rte_flow_attr attr = { .transfer = 0 };
5835 
5836 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5837 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5838 		fops->counter_free(dev, cnt);
5839 		return;
5840 	}
5841 	DRV_LOG(ERR,
5842 		"port %u counter free is not supported.",
5843 		 dev->data->port_id);
5844 }
5845 
5846 /**
5847  * Query counter statistics.
5848  *
5849  * @param[in] dev
5850  *   Pointer to Ethernet device structure.
5851  * @param[in] cnt
5852  *   Index to counter to query.
5853  * @param[in] clear
5854  *   Set to clear counter statistics.
5855  * @param[out] pkts
5856  *   The counter hits packets number to save.
5857  * @param[out] bytes
5858  *   The counter hits bytes number to save.
5859  *
5860  * @return
5861  *   0 on success, a negative errno value otherwise.
5862  */
5863 int
5864 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
5865 		   bool clear, uint64_t *pkts, uint64_t *bytes)
5866 {
5867 	const struct mlx5_flow_driver_ops *fops;
5868 	struct rte_flow_attr attr = { .transfer = 0 };
5869 
5870 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5871 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5872 		return fops->counter_query(dev, cnt, clear, pkts, bytes);
5873 	}
5874 	DRV_LOG(ERR,
5875 		"port %u counter query is not supported.",
5876 		 dev->data->port_id);
5877 	return -ENOTSUP;
5878 }
5879 
5880 #define MLX5_POOL_QUERY_FREQ_US 1000000
5881 
5882 /**
5883  * Get number of all validate pools.
5884  *
5885  * @param[in] sh
5886  *   Pointer to mlx5_dev_ctx_shared object.
5887  *
5888  * @return
5889  *   The number of all validate pools.
5890  */
5891 static uint32_t
5892 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
5893 {
5894 	int i;
5895 	uint32_t pools_n = 0;
5896 
5897 	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
5898 		pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
5899 	return pools_n;
5900 }
5901 
5902 /**
5903  * Set the periodic procedure for triggering asynchronous batch queries for all
5904  * the counter pools.
5905  *
5906  * @param[in] sh
5907  *   Pointer to mlx5_dev_ctx_shared object.
5908  */
5909 void
5910 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
5911 {
5912 	uint32_t pools_n, us;
5913 
5914 	pools_n = mlx5_get_all_valid_pool_count(sh);
5915 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
5916 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
5917 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
5918 		sh->cmng.query_thread_on = 0;
5919 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
5920 	} else {
5921 		sh->cmng.query_thread_on = 1;
5922 	}
5923 }
5924 
5925 /**
5926  * The periodic procedure for triggering asynchronous batch queries for all the
5927  * counter pools. This function is probably called by the host thread.
5928  *
5929  * @param[in] arg
5930  *   The parameter for the alarm process.
5931  */
5932 void
5933 mlx5_flow_query_alarm(void *arg)
5934 {
5935 	struct mlx5_dev_ctx_shared *sh = arg;
5936 	struct mlx5_devx_obj *dcs;
5937 	uint16_t offset;
5938 	int ret;
5939 	uint8_t batch = sh->cmng.batch;
5940 	uint8_t age = sh->cmng.age;
5941 	uint16_t pool_index = sh->cmng.pool_index;
5942 	struct mlx5_pools_container *cont;
5943 	struct mlx5_flow_counter_pool *pool;
5944 	int cont_loop = MLX5_CCONT_TYPE_MAX;
5945 
5946 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
5947 		goto set_alarm;
5948 next_container:
5949 	cont = MLX5_CNT_CONTAINER(sh, batch, age);
5950 	rte_spinlock_lock(&cont->resize_sl);
5951 	if (!cont->pools) {
5952 		rte_spinlock_unlock(&cont->resize_sl);
5953 		/* Check if all the containers are empty. */
5954 		if (unlikely(--cont_loop == 0))
5955 			goto set_alarm;
5956 		batch ^= 0x1;
5957 		pool_index = 0;
5958 		if (batch == 0 && pool_index == 0) {
5959 			age ^= 0x1;
5960 			sh->cmng.batch = batch;
5961 			sh->cmng.age = age;
5962 		}
5963 		goto next_container;
5964 	}
5965 	pool = cont->pools[pool_index];
5966 	rte_spinlock_unlock(&cont->resize_sl);
5967 	if (pool->raw_hw)
5968 		/* There is a pool query in progress. */
5969 		goto set_alarm;
5970 	pool->raw_hw =
5971 		LIST_FIRST(&sh->cmng.free_stat_raws);
5972 	if (!pool->raw_hw)
5973 		/* No free counter statistics raw memory. */
5974 		goto set_alarm;
5975 	dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
5976 							      (&pool->a64_dcs);
5977 	offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
5978 	/*
5979 	 * Identify the counters released between query trigger and query
5980 	 * handle more effiecntly. The counter released in this gap period
5981 	 * should wait for a new round of query as the new arrived packets
5982 	 * will not be taken into account.
5983 	 */
5984 	pool->query_gen++;
5985 	ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
5986 					       offset, NULL, NULL,
5987 					       pool->raw_hw->mem_mng->dm->id,
5988 					       (void *)(uintptr_t)
5989 					       (pool->raw_hw->data + offset),
5990 					       sh->devx_comp,
5991 					       (uint64_t)(uintptr_t)pool);
5992 	if (ret) {
5993 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
5994 			" %d", pool->min_dcs->id);
5995 		pool->raw_hw = NULL;
5996 		goto set_alarm;
5997 	}
5998 	pool->raw_hw->min_dcs_id = dcs->id;
5999 	LIST_REMOVE(pool->raw_hw, next);
6000 	sh->cmng.pending_queries++;
6001 	pool_index++;
6002 	if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
6003 		batch ^= 0x1;
6004 		pool_index = 0;
6005 		if (batch == 0 && pool_index == 0)
6006 			age ^= 0x1;
6007 	}
6008 set_alarm:
6009 	sh->cmng.batch = batch;
6010 	sh->cmng.pool_index = pool_index;
6011 	sh->cmng.age = age;
6012 	mlx5_set_query_alarm(sh);
6013 }
6014 
6015 /**
6016  * Check and callback event for new aged flow in the counter pool
6017  *
6018  * @param[in] sh
6019  *   Pointer to mlx5_dev_ctx_shared object.
6020  * @param[in] pool
6021  *   Pointer to Current counter pool.
6022  */
6023 static void
6024 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6025 		   struct mlx5_flow_counter_pool *pool)
6026 {
6027 	struct mlx5_priv *priv;
6028 	struct mlx5_flow_counter *cnt;
6029 	struct mlx5_age_info *age_info;
6030 	struct mlx5_age_param *age_param;
6031 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6032 	struct mlx5_counter_stats_raw *prev = pool->raw;
6033 	uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
6034 	uint32_t i;
6035 
6036 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6037 		cnt = MLX5_POOL_GET_CNT(pool, i);
6038 		age_param = MLX5_CNT_TO_AGE(cnt);
6039 		if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
6040 			continue;
6041 		if (cur->data[i].hits != prev->data[i].hits) {
6042 			age_param->expire = curr + age_param->timeout;
6043 			continue;
6044 		}
6045 		if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
6046 			continue;
6047 		/**
6048 		 * Hold the lock first, or if between the
6049 		 * state AGE_TMOUT and tailq operation the
6050 		 * release happened, the release procedure
6051 		 * may delete a non-existent tailq node.
6052 		 */
6053 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
6054 		age_info = GET_PORT_AGE_INFO(priv);
6055 		rte_spinlock_lock(&age_info->aged_sl);
6056 		/* If the cpmset fails, release happens. */
6057 		if (rte_atomic16_cmpset((volatile uint16_t *)
6058 					&age_param->state,
6059 					AGE_CANDIDATE,
6060 					AGE_TMOUT) ==
6061 					AGE_CANDIDATE) {
6062 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6063 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6064 		}
6065 		rte_spinlock_unlock(&age_info->aged_sl);
6066 	}
6067 	for (i = 0; i < sh->max_port; i++) {
6068 		age_info = &sh->port[i].age_info;
6069 		if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
6070 			continue;
6071 		if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER))
6072 			_rte_eth_dev_callback_process
6073 				(&rte_eth_devices[sh->port[i].devx_ih_port_id],
6074 				RTE_ETH_EVENT_FLOW_AGED, NULL);
6075 		age_info->flags = 0;
6076 	}
6077 }
6078 
6079 /**
6080  * Handler for the HW respond about ready values from an asynchronous batch
6081  * query. This function is probably called by the host thread.
6082  *
6083  * @param[in] sh
6084  *   The pointer to the shared device context.
6085  * @param[in] async_id
6086  *   The Devx async ID.
6087  * @param[in] status
6088  *   The status of the completion.
6089  */
6090 void
6091 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6092 				  uint64_t async_id, int status)
6093 {
6094 	struct mlx5_flow_counter_pool *pool =
6095 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6096 	struct mlx5_counter_stats_raw *raw_to_free;
6097 	uint8_t age = !!IS_AGE_POOL(pool);
6098 	uint8_t query_gen = pool->query_gen ^ 1;
6099 	struct mlx5_pools_container *cont =
6100 		MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age);
6101 
6102 	if (unlikely(status)) {
6103 		raw_to_free = pool->raw_hw;
6104 	} else {
6105 		raw_to_free = pool->raw;
6106 		if (IS_AGE_POOL(pool))
6107 			mlx5_flow_aging_check(sh, pool);
6108 		rte_spinlock_lock(&pool->sl);
6109 		pool->raw = pool->raw_hw;
6110 		rte_spinlock_unlock(&pool->sl);
6111 		/* Be sure the new raw counters data is updated in memory. */
6112 		rte_cio_wmb();
6113 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6114 			rte_spinlock_lock(&cont->csl);
6115 			TAILQ_CONCAT(&cont->counters,
6116 				     &pool->counters[query_gen], next);
6117 			rte_spinlock_unlock(&cont->csl);
6118 		}
6119 	}
6120 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
6121 	pool->raw_hw = NULL;
6122 	sh->cmng.pending_queries--;
6123 }
6124 
6125 /**
6126  * Translate the rte_flow group index to HW table value.
6127  *
6128  * @param[in] attributes
6129  *   Pointer to flow attributes
6130  * @param[in] external
6131  *   Value is part of flow rule created by request external to PMD.
6132  * @param[in] group
6133  *   rte_flow group index value.
6134  * @param[out] fdb_def_rule
6135  *   Whether fdb jump to table 1 is configured.
6136  * @param[out] table
6137  *   HW table value.
6138  * @param[out] error
6139  *   Pointer to error structure.
6140  *
6141  * @return
6142  *   0 on success, a negative errno value otherwise and rte_errno is set.
6143  */
6144 int
6145 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
6146 			 uint32_t group, bool fdb_def_rule, uint32_t *table,
6147 			 struct rte_flow_error *error)
6148 {
6149 	if (attributes->transfer && external && fdb_def_rule) {
6150 		if (group == UINT32_MAX)
6151 			return rte_flow_error_set
6152 						(error, EINVAL,
6153 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
6154 						 NULL,
6155 						 "group index not supported");
6156 		*table = group + 1;
6157 	} else {
6158 		*table = group;
6159 	}
6160 	return 0;
6161 }
6162 
6163 /**
6164  * Discover availability of metadata reg_c's.
6165  *
6166  * Iteratively use test flows to check availability.
6167  *
6168  * @param[in] dev
6169  *   Pointer to the Ethernet device structure.
6170  *
6171  * @return
6172  *   0 on success, a negative errno value otherwise and rte_errno is set.
6173  */
6174 int
6175 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
6176 {
6177 	struct mlx5_priv *priv = dev->data->dev_private;
6178 	struct mlx5_dev_config *config = &priv->config;
6179 	enum modify_reg idx;
6180 	int n = 0;
6181 
6182 	/* reg_c[0] and reg_c[1] are reserved. */
6183 	config->flow_mreg_c[n++] = REG_C_0;
6184 	config->flow_mreg_c[n++] = REG_C_1;
6185 	/* Discover availability of other reg_c's. */
6186 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
6187 		struct rte_flow_attr attr = {
6188 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
6189 			.priority = MLX5_FLOW_PRIO_RSVD,
6190 			.ingress = 1,
6191 		};
6192 		struct rte_flow_item items[] = {
6193 			[0] = {
6194 				.type = RTE_FLOW_ITEM_TYPE_END,
6195 			},
6196 		};
6197 		struct rte_flow_action actions[] = {
6198 			[0] = {
6199 				.type = (enum rte_flow_action_type)
6200 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6201 				.conf = &(struct mlx5_flow_action_copy_mreg){
6202 					.src = REG_C_1,
6203 					.dst = idx,
6204 				},
6205 			},
6206 			[1] = {
6207 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
6208 				.conf = &(struct rte_flow_action_jump){
6209 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6210 				},
6211 			},
6212 			[2] = {
6213 				.type = RTE_FLOW_ACTION_TYPE_END,
6214 			},
6215 		};
6216 		uint32_t flow_idx;
6217 		struct rte_flow *flow;
6218 		struct rte_flow_error error;
6219 
6220 		if (!config->dv_flow_en)
6221 			break;
6222 		/* Create internal flow, validation skips copy action. */
6223 		flow_idx = flow_list_create(dev, NULL, &attr, items,
6224 					    actions, false, &error);
6225 		flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
6226 				      flow_idx);
6227 		if (!flow)
6228 			continue;
6229 		if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
6230 			config->flow_mreg_c[n++] = idx;
6231 		flow_list_destroy(dev, NULL, flow_idx);
6232 	}
6233 	for (; n < MLX5_MREG_C_NUM; ++n)
6234 		config->flow_mreg_c[n] = REG_NONE;
6235 	return 0;
6236 }
6237 
6238 /**
6239  * Dump flow raw hw data to file
6240  *
6241  * @param[in] dev
6242  *    The pointer to Ethernet device.
6243  * @param[in] file
6244  *   A pointer to a file for output.
6245  * @param[out] error
6246  *   Perform verbose error reporting if not NULL. PMDs initialize this
6247  *   structure in case of error only.
6248  * @return
6249  *   0 on success, a nagative value otherwise.
6250  */
6251 int
6252 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
6253 		   FILE *file,
6254 		   struct rte_flow_error *error __rte_unused)
6255 {
6256 	struct mlx5_priv *priv = dev->data->dev_private;
6257 	struct mlx5_dev_ctx_shared *sh = priv->sh;
6258 
6259 	return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
6260 				       sh->tx_domain, file);
6261 }
6262 
6263 /**
6264  * Get aged-out flows.
6265  *
6266  * @param[in] dev
6267  *   Pointer to the Ethernet device structure.
6268  * @param[in] context
6269  *   The address of an array of pointers to the aged-out flows contexts.
6270  * @param[in] nb_countexts
6271  *   The length of context array pointers.
6272  * @param[out] error
6273  *   Perform verbose error reporting if not NULL. Initialized in case of
6274  *   error only.
6275  *
6276  * @return
6277  *   how many contexts get in success, otherwise negative errno value.
6278  *   if nb_contexts is 0, return the amount of all aged contexts.
6279  *   if nb_contexts is not 0 , return the amount of aged flows reported
6280  *   in the context array.
6281  */
6282 int
6283 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
6284 			uint32_t nb_contexts, struct rte_flow_error *error)
6285 {
6286 	const struct mlx5_flow_driver_ops *fops;
6287 	struct rte_flow_attr attr = { .transfer = 0 };
6288 
6289 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6290 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6291 		return fops->get_aged_flows(dev, contexts, nb_contexts,
6292 						    error);
6293 	}
6294 	DRV_LOG(ERR,
6295 		"port %u get aged flows is not supported.",
6296 		 dev->data->port_id);
6297 	return -ENOTSUP;
6298 }
6299