xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 34511c25d5fdc03ca972bc7fed50e2dd43e8c956)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36 
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41 
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44 	int dummy;
45 };
46 #endif
47 
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51 
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 	struct rte_eth_dev *dev; /** Ethernet device. */
55 	struct mlx5_flow_parse *parser; /** Parser context. */
56 	struct rte_flow_error *error; /** Error context. */
57 };
58 
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 		     const void *default_mask,
62 		     struct mlx5_flow_data *data);
63 
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 		      const void *default_mask,
67 		      struct mlx5_flow_data *data);
68 
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 		      const void *default_mask,
72 		      struct mlx5_flow_data *data);
73 
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 		      const void *default_mask,
77 		      struct mlx5_flow_data *data);
78 
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 		     const void *default_mask,
82 		     struct mlx5_flow_data *data);
83 
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 		     const void *default_mask,
87 		     struct mlx5_flow_data *data);
88 
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 		       const void *default_mask,
92 		       struct mlx5_flow_data *data);
93 
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 			   const void *default_mask,
97 			   struct mlx5_flow_data *data);
98 
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 		     const void *default_mask,
102 		     struct mlx5_flow_data *data);
103 
104 static int
105 mlx5_flow_create_mpls(const struct rte_flow_item *item,
106 		      const void *default_mask,
107 		      struct mlx5_flow_data *data);
108 
109 struct mlx5_flow_parse;
110 
111 static void
112 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
113 		      unsigned int size);
114 
115 static int
116 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
117 
118 static int
119 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
120 
121 /* Hash RX queue types. */
122 enum hash_rxq_type {
123 	HASH_RXQ_TCPV4,
124 	HASH_RXQ_UDPV4,
125 	HASH_RXQ_IPV4,
126 	HASH_RXQ_TCPV6,
127 	HASH_RXQ_UDPV6,
128 	HASH_RXQ_IPV6,
129 	HASH_RXQ_ETH,
130 	HASH_RXQ_TUNNEL,
131 };
132 
133 /* Initialization data for hash RX queue. */
134 struct hash_rxq_init {
135 	uint64_t hash_fields; /* Fields that participate in the hash. */
136 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
137 	unsigned int flow_priority; /* Flow priority to use. */
138 	unsigned int ip_version; /* Internet protocol. */
139 };
140 
141 /* Initialization data for hash RX queues. */
142 const struct hash_rxq_init hash_rxq_init[] = {
143 	[HASH_RXQ_TCPV4] = {
144 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 				IBV_RX_HASH_DST_IPV4 |
146 				IBV_RX_HASH_SRC_PORT_TCP |
147 				IBV_RX_HASH_DST_PORT_TCP),
148 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
149 		.flow_priority = 0,
150 		.ip_version = MLX5_IPV4,
151 	},
152 	[HASH_RXQ_UDPV4] = {
153 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
154 				IBV_RX_HASH_DST_IPV4 |
155 				IBV_RX_HASH_SRC_PORT_UDP |
156 				IBV_RX_HASH_DST_PORT_UDP),
157 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
158 		.flow_priority = 0,
159 		.ip_version = MLX5_IPV4,
160 	},
161 	[HASH_RXQ_IPV4] = {
162 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
163 				IBV_RX_HASH_DST_IPV4),
164 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
165 				ETH_RSS_FRAG_IPV4),
166 		.flow_priority = 1,
167 		.ip_version = MLX5_IPV4,
168 	},
169 	[HASH_RXQ_TCPV6] = {
170 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 				IBV_RX_HASH_DST_IPV6 |
172 				IBV_RX_HASH_SRC_PORT_TCP |
173 				IBV_RX_HASH_DST_PORT_TCP),
174 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
175 		.flow_priority = 0,
176 		.ip_version = MLX5_IPV6,
177 	},
178 	[HASH_RXQ_UDPV6] = {
179 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
180 				IBV_RX_HASH_DST_IPV6 |
181 				IBV_RX_HASH_SRC_PORT_UDP |
182 				IBV_RX_HASH_DST_PORT_UDP),
183 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
184 		.flow_priority = 0,
185 		.ip_version = MLX5_IPV6,
186 	},
187 	[HASH_RXQ_IPV6] = {
188 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
189 				IBV_RX_HASH_DST_IPV6),
190 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
191 				ETH_RSS_FRAG_IPV6),
192 		.flow_priority = 1,
193 		.ip_version = MLX5_IPV6,
194 	},
195 	[HASH_RXQ_ETH] = {
196 		.hash_fields = 0,
197 		.dpdk_rss_hf = 0,
198 		.flow_priority = 2,
199 	},
200 };
201 
202 /* Number of entries in hash_rxq_init[]. */
203 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
204 
205 /** Structure for holding counter stats. */
206 struct mlx5_flow_counter_stats {
207 	uint64_t hits; /**< Number of packets matched by the rule. */
208 	uint64_t bytes; /**< Number of bytes matched by the rule. */
209 };
210 
211 /** Structure for Drop queue. */
212 struct mlx5_hrxq_drop {
213 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
214 	struct ibv_qp *qp; /**< Verbs queue pair. */
215 	struct ibv_wq *wq; /**< Verbs work queue. */
216 	struct ibv_cq *cq; /**< Verbs completion queue. */
217 };
218 
219 /* Flows structures. */
220 struct mlx5_flow {
221 	uint64_t hash_fields; /**< Fields that participate in the hash. */
222 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
223 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
225 };
226 
227 /* Drop flows structures. */
228 struct mlx5_flow_drop {
229 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
230 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
231 };
232 
233 struct rte_flow {
234 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
235 	uint32_t mark:1; /**< Set if the flow is marked. */
236 	uint32_t drop:1; /**< Drop queue. */
237 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
238 	uint16_t (*queues)[]; /**< Queues indexes to use. */
239 	uint8_t rss_key[40]; /**< copy of the RSS key. */
240 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
241 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
242 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
243 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
244 	/**< Flow with Rx queue. */
245 };
246 
247 /** Static initializer for items. */
248 #define ITEMS(...) \
249 	(const enum rte_flow_item_type []){ \
250 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
251 	}
252 
253 #define IS_TUNNEL(type) ( \
254 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
255 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
256 	(type) == RTE_FLOW_ITEM_TYPE_GRE || \
257 	(type) == RTE_FLOW_ITEM_TYPE_MPLS)
258 
259 const uint32_t flow_ptype[] = {
260 	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
261 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
262 	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
263 	[RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
264 };
265 
266 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
267 
268 const uint32_t ptype_ext[] = {
269 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
270 					      RTE_PTYPE_L4_UDP,
271 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]	= RTE_PTYPE_TUNNEL_VXLAN_GPE |
272 						  RTE_PTYPE_L4_UDP,
273 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
274 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
275 		RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
276 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
277 		RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
278 };
279 
280 /** Structure to generate a simple graph of layers supported by the NIC. */
281 struct mlx5_flow_items {
282 	/** List of possible actions for these items. */
283 	const enum rte_flow_action_type *const actions;
284 	/** Bit-masks corresponding to the possibilities for the item. */
285 	const void *mask;
286 	/**
287 	 * Default bit-masks to use when item->mask is not provided. When
288 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
289 	 * used instead.
290 	 */
291 	const void *default_mask;
292 	/** Bit-masks size in bytes. */
293 	const unsigned int mask_sz;
294 	/**
295 	 * Conversion function from rte_flow to NIC specific flow.
296 	 *
297 	 * @param item
298 	 *   rte_flow item to convert.
299 	 * @param default_mask
300 	 *   Default bit-masks to use when item->mask is not provided.
301 	 * @param data
302 	 *   Internal structure to store the conversion.
303 	 *
304 	 * @return
305 	 *   0 on success, a negative errno value otherwise and rte_errno is
306 	 *   set.
307 	 */
308 	int (*convert)(const struct rte_flow_item *item,
309 		       const void *default_mask,
310 		       struct mlx5_flow_data *data);
311 	/** Size in bytes of the destination structure. */
312 	const unsigned int dst_sz;
313 	/** List of possible following items.  */
314 	const enum rte_flow_item_type *const items;
315 };
316 
317 /** Valid action for this PMD. */
318 static const enum rte_flow_action_type valid_actions[] = {
319 	RTE_FLOW_ACTION_TYPE_DROP,
320 	RTE_FLOW_ACTION_TYPE_QUEUE,
321 	RTE_FLOW_ACTION_TYPE_MARK,
322 	RTE_FLOW_ACTION_TYPE_FLAG,
323 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
324 	RTE_FLOW_ACTION_TYPE_COUNT,
325 #endif
326 	RTE_FLOW_ACTION_TYPE_END,
327 };
328 
329 /** Graph of supported items and associated actions. */
330 static const struct mlx5_flow_items mlx5_flow_items[] = {
331 	[RTE_FLOW_ITEM_TYPE_END] = {
332 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
333 			       RTE_FLOW_ITEM_TYPE_VXLAN,
334 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
335 			       RTE_FLOW_ITEM_TYPE_GRE),
336 	},
337 	[RTE_FLOW_ITEM_TYPE_ETH] = {
338 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
339 			       RTE_FLOW_ITEM_TYPE_IPV4,
340 			       RTE_FLOW_ITEM_TYPE_IPV6),
341 		.actions = valid_actions,
342 		.mask = &(const struct rte_flow_item_eth){
343 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
344 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
345 			.type = -1,
346 		},
347 		.default_mask = &rte_flow_item_eth_mask,
348 		.mask_sz = sizeof(struct rte_flow_item_eth),
349 		.convert = mlx5_flow_create_eth,
350 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
351 	},
352 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
353 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
354 			       RTE_FLOW_ITEM_TYPE_IPV6),
355 		.actions = valid_actions,
356 		.mask = &(const struct rte_flow_item_vlan){
357 			.tci = -1,
358 			.inner_type = -1,
359 		},
360 		.default_mask = &rte_flow_item_vlan_mask,
361 		.mask_sz = sizeof(struct rte_flow_item_vlan),
362 		.convert = mlx5_flow_create_vlan,
363 		.dst_sz = 0,
364 	},
365 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
366 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
367 			       RTE_FLOW_ITEM_TYPE_TCP,
368 			       RTE_FLOW_ITEM_TYPE_GRE),
369 		.actions = valid_actions,
370 		.mask = &(const struct rte_flow_item_ipv4){
371 			.hdr = {
372 				.src_addr = -1,
373 				.dst_addr = -1,
374 				.type_of_service = -1,
375 				.next_proto_id = -1,
376 			},
377 		},
378 		.default_mask = &rte_flow_item_ipv4_mask,
379 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
380 		.convert = mlx5_flow_create_ipv4,
381 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
382 	},
383 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
384 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
385 			       RTE_FLOW_ITEM_TYPE_TCP,
386 			       RTE_FLOW_ITEM_TYPE_GRE),
387 		.actions = valid_actions,
388 		.mask = &(const struct rte_flow_item_ipv6){
389 			.hdr = {
390 				.src_addr = {
391 					0xff, 0xff, 0xff, 0xff,
392 					0xff, 0xff, 0xff, 0xff,
393 					0xff, 0xff, 0xff, 0xff,
394 					0xff, 0xff, 0xff, 0xff,
395 				},
396 				.dst_addr = {
397 					0xff, 0xff, 0xff, 0xff,
398 					0xff, 0xff, 0xff, 0xff,
399 					0xff, 0xff, 0xff, 0xff,
400 					0xff, 0xff, 0xff, 0xff,
401 				},
402 				.vtc_flow = -1,
403 				.proto = -1,
404 				.hop_limits = -1,
405 			},
406 		},
407 		.default_mask = &rte_flow_item_ipv6_mask,
408 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
409 		.convert = mlx5_flow_create_ipv6,
410 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
411 	},
412 	[RTE_FLOW_ITEM_TYPE_UDP] = {
413 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
414 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
415 			       RTE_FLOW_ITEM_TYPE_MPLS),
416 		.actions = valid_actions,
417 		.mask = &(const struct rte_flow_item_udp){
418 			.hdr = {
419 				.src_port = -1,
420 				.dst_port = -1,
421 			},
422 		},
423 		.default_mask = &rte_flow_item_udp_mask,
424 		.mask_sz = sizeof(struct rte_flow_item_udp),
425 		.convert = mlx5_flow_create_udp,
426 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
427 	},
428 	[RTE_FLOW_ITEM_TYPE_TCP] = {
429 		.actions = valid_actions,
430 		.mask = &(const struct rte_flow_item_tcp){
431 			.hdr = {
432 				.src_port = -1,
433 				.dst_port = -1,
434 			},
435 		},
436 		.default_mask = &rte_flow_item_tcp_mask,
437 		.mask_sz = sizeof(struct rte_flow_item_tcp),
438 		.convert = mlx5_flow_create_tcp,
439 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
440 	},
441 	[RTE_FLOW_ITEM_TYPE_GRE] = {
442 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
443 			       RTE_FLOW_ITEM_TYPE_IPV4,
444 			       RTE_FLOW_ITEM_TYPE_IPV6,
445 			       RTE_FLOW_ITEM_TYPE_MPLS),
446 		.actions = valid_actions,
447 		.mask = &(const struct rte_flow_item_gre){
448 			.protocol = -1,
449 		},
450 		.default_mask = &rte_flow_item_gre_mask,
451 		.mask_sz = sizeof(struct rte_flow_item_gre),
452 		.convert = mlx5_flow_create_gre,
453 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
454 		.dst_sz = sizeof(struct ibv_flow_spec_gre),
455 #else
456 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
457 #endif
458 	},
459 	[RTE_FLOW_ITEM_TYPE_MPLS] = {
460 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
461 			       RTE_FLOW_ITEM_TYPE_IPV4,
462 			       RTE_FLOW_ITEM_TYPE_IPV6),
463 		.actions = valid_actions,
464 		.mask = &(const struct rte_flow_item_mpls){
465 			.label_tc_s = "\xff\xff\xf0",
466 		},
467 		.default_mask = &rte_flow_item_mpls_mask,
468 		.mask_sz = sizeof(struct rte_flow_item_mpls),
469 		.convert = mlx5_flow_create_mpls,
470 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
471 		.dst_sz = sizeof(struct ibv_flow_spec_mpls),
472 #endif
473 	},
474 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
475 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
476 			       RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
477 			       RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
478 		.actions = valid_actions,
479 		.mask = &(const struct rte_flow_item_vxlan){
480 			.vni = "\xff\xff\xff",
481 		},
482 		.default_mask = &rte_flow_item_vxlan_mask,
483 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
484 		.convert = mlx5_flow_create_vxlan,
485 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
486 	},
487 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
488 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
489 			       RTE_FLOW_ITEM_TYPE_IPV4,
490 			       RTE_FLOW_ITEM_TYPE_IPV6),
491 		.actions = valid_actions,
492 		.mask = &(const struct rte_flow_item_vxlan_gpe){
493 			.vni = "\xff\xff\xff",
494 		},
495 		.default_mask = &rte_flow_item_vxlan_gpe_mask,
496 		.mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
497 		.convert = mlx5_flow_create_vxlan_gpe,
498 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
499 	},
500 };
501 
502 /** Structure to pass to the conversion function. */
503 struct mlx5_flow_parse {
504 	uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
505 	uint32_t create:1;
506 	/**< Whether resources should remain after a validate. */
507 	uint32_t drop:1; /**< Target is a drop queue. */
508 	uint32_t mark:1; /**< Mark is present in the flow. */
509 	uint32_t count:1; /**< Count is present in the flow. */
510 	uint32_t mark_id; /**< Mark identifier. */
511 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
512 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
513 	uint8_t rss_key[40]; /**< copy of the RSS key. */
514 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
515 	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
516 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
517 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
518 	struct {
519 		struct ibv_flow_attr *ibv_attr;
520 		/**< Pointer to Verbs attributes. */
521 		unsigned int offset;
522 		/**< Current position or total size of the attribute. */
523 		uint64_t hash_fields; /**< Verbs hash fields. */
524 	} queue[RTE_DIM(hash_rxq_init)];
525 };
526 
527 static const struct rte_flow_ops mlx5_flow_ops = {
528 	.validate = mlx5_flow_validate,
529 	.create = mlx5_flow_create,
530 	.destroy = mlx5_flow_destroy,
531 	.flush = mlx5_flow_flush,
532 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
533 	.query = mlx5_flow_query,
534 #else
535 	.query = NULL,
536 #endif
537 	.isolate = mlx5_flow_isolate,
538 };
539 
540 /* Convert FDIR request to Generic flow. */
541 struct mlx5_fdir {
542 	struct rte_flow_attr attr;
543 	struct rte_flow_action actions[2];
544 	struct rte_flow_item items[4];
545 	struct rte_flow_item_eth l2;
546 	struct rte_flow_item_eth l2_mask;
547 	union {
548 		struct rte_flow_item_ipv4 ipv4;
549 		struct rte_flow_item_ipv6 ipv6;
550 	} l3;
551 	union {
552 		struct rte_flow_item_ipv4 ipv4;
553 		struct rte_flow_item_ipv6 ipv6;
554 	} l3_mask;
555 	union {
556 		struct rte_flow_item_udp udp;
557 		struct rte_flow_item_tcp tcp;
558 	} l4;
559 	union {
560 		struct rte_flow_item_udp udp;
561 		struct rte_flow_item_tcp tcp;
562 	} l4_mask;
563 	struct rte_flow_action_queue queue;
564 };
565 
566 /* Verbs specification header. */
567 struct ibv_spec_header {
568 	enum ibv_flow_spec_type type;
569 	uint16_t size;
570 };
571 
572 /**
573  * Check item is fully supported by the NIC matching capability.
574  *
575  * @param item[in]
576  *   Item specification.
577  * @param mask[in]
578  *   Bit-masks covering supported fields to compare with spec, last and mask in
579  *   \item.
580  * @param size
581  *   Bit-Mask size in bytes.
582  *
583  * @return
584  *   0 on success, a negative errno value otherwise and rte_errno is set.
585  */
586 static int
587 mlx5_flow_item_validate(const struct rte_flow_item *item,
588 			const uint8_t *mask, unsigned int size)
589 {
590 	unsigned int i;
591 	const uint8_t *spec = item->spec;
592 	const uint8_t *last = item->last;
593 	const uint8_t *m = item->mask ? item->mask : mask;
594 
595 	if (!spec && (item->mask || last))
596 		goto error;
597 	if (!spec)
598 		return 0;
599 	/*
600 	 * Single-pass check to make sure that:
601 	 * - item->mask is supported, no bits are set outside mask.
602 	 * - Both masked item->spec and item->last are equal (no range
603 	 *   supported).
604 	 */
605 	for (i = 0; i < size; i++) {
606 		if (!m[i])
607 			continue;
608 		if ((m[i] | mask[i]) != mask[i])
609 			goto error;
610 		if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
611 			goto error;
612 	}
613 	return 0;
614 error:
615 	rte_errno = ENOTSUP;
616 	return -rte_errno;
617 }
618 
619 /**
620  * Extract attribute to the parser.
621  *
622  * @param[in] attr
623  *   Flow rule attributes.
624  * @param[out] error
625  *   Perform verbose error reporting if not NULL.
626  *
627  * @return
628  *   0 on success, a negative errno value otherwise and rte_errno is set.
629  */
630 static int
631 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
632 			     struct rte_flow_error *error)
633 {
634 	if (attr->group) {
635 		rte_flow_error_set(error, ENOTSUP,
636 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
637 				   NULL,
638 				   "groups are not supported");
639 		return -rte_errno;
640 	}
641 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
642 		rte_flow_error_set(error, ENOTSUP,
643 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
644 				   NULL,
645 				   "priorities are not supported");
646 		return -rte_errno;
647 	}
648 	if (attr->egress) {
649 		rte_flow_error_set(error, ENOTSUP,
650 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
651 				   NULL,
652 				   "egress is not supported");
653 		return -rte_errno;
654 	}
655 	if (attr->transfer) {
656 		rte_flow_error_set(error, ENOTSUP,
657 				   RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
658 				   NULL,
659 				   "transfer is not supported");
660 		return -rte_errno;
661 	}
662 	if (!attr->ingress) {
663 		rte_flow_error_set(error, ENOTSUP,
664 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
665 				   NULL,
666 				   "only ingress is supported");
667 		return -rte_errno;
668 	}
669 	return 0;
670 }
671 
672 /**
673  * Extract actions request to the parser.
674  *
675  * @param dev
676  *   Pointer to Ethernet device.
677  * @param[in] actions
678  *   Associated actions (list terminated by the END action).
679  * @param[out] error
680  *   Perform verbose error reporting if not NULL.
681  * @param[in, out] parser
682  *   Internal parser structure.
683  *
684  * @return
685  *   0 on success, a negative errno value otherwise and rte_errno is set.
686  */
687 static int
688 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
689 			  const struct rte_flow_action actions[],
690 			  struct rte_flow_error *error,
691 			  struct mlx5_flow_parse *parser)
692 {
693 	enum { FATE = 1, MARK = 2, COUNT = 4, };
694 	uint32_t overlap = 0;
695 	struct priv *priv = dev->data->dev_private;
696 
697 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
698 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
699 			continue;
700 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
701 			if (overlap & FATE)
702 				goto exit_action_overlap;
703 			overlap |= FATE;
704 			parser->drop = 1;
705 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
706 			const struct rte_flow_action_queue *queue =
707 				(const struct rte_flow_action_queue *)
708 				actions->conf;
709 
710 			if (overlap & FATE)
711 				goto exit_action_overlap;
712 			overlap |= FATE;
713 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
714 				goto exit_action_not_supported;
715 			parser->queues[0] = queue->index;
716 			parser->rss_conf = (struct rte_flow_action_rss){
717 				.queue_num = 1,
718 				.queue = parser->queues,
719 			};
720 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
721 			const struct rte_flow_action_rss *rss =
722 				(const struct rte_flow_action_rss *)
723 				actions->conf;
724 			const uint8_t *rss_key;
725 			uint32_t rss_key_len;
726 			uint16_t n;
727 
728 			if (overlap & FATE)
729 				goto exit_action_overlap;
730 			overlap |= FATE;
731 			if (rss->func &&
732 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
733 				rte_flow_error_set(error, EINVAL,
734 						   RTE_FLOW_ERROR_TYPE_ACTION,
735 						   actions,
736 						   "the only supported RSS hash"
737 						   " function is Toeplitz");
738 				return -rte_errno;
739 			}
740 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
741 			if (parser->rss_conf.level > 1) {
742 				rte_flow_error_set(error, EINVAL,
743 						   RTE_FLOW_ERROR_TYPE_ACTION,
744 						   actions,
745 						   "a nonzero RSS encapsulation"
746 						   " level is not supported");
747 				return -rte_errno;
748 			}
749 #endif
750 			if (parser->rss_conf.level > 2) {
751 				rte_flow_error_set(error, EINVAL,
752 						   RTE_FLOW_ERROR_TYPE_ACTION,
753 						   actions,
754 						   "RSS encapsulation level"
755 						   " > 1 is not supported");
756 				return -rte_errno;
757 			}
758 			if (rss->types & MLX5_RSS_HF_MASK) {
759 				rte_flow_error_set(error, EINVAL,
760 						   RTE_FLOW_ERROR_TYPE_ACTION,
761 						   actions,
762 						   "unsupported RSS type"
763 						   " requested");
764 				return -rte_errno;
765 			}
766 			if (rss->key_len) {
767 				rss_key_len = rss->key_len;
768 				rss_key = rss->key;
769 			} else {
770 				rss_key_len = rss_hash_default_key_len;
771 				rss_key = rss_hash_default_key;
772 			}
773 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
774 				rte_flow_error_set(error, EINVAL,
775 						   RTE_FLOW_ERROR_TYPE_ACTION,
776 						   actions,
777 						   "RSS hash key must be"
778 						   " exactly 40 bytes long");
779 				return -rte_errno;
780 			}
781 			if (!rss->queue_num) {
782 				rte_flow_error_set(error, EINVAL,
783 						   RTE_FLOW_ERROR_TYPE_ACTION,
784 						   actions,
785 						   "no valid queues");
786 				return -rte_errno;
787 			}
788 			if (rss->queue_num > RTE_DIM(parser->queues)) {
789 				rte_flow_error_set(error, EINVAL,
790 						   RTE_FLOW_ERROR_TYPE_ACTION,
791 						   actions,
792 						   "too many queues for RSS"
793 						   " context");
794 				return -rte_errno;
795 			}
796 			for (n = 0; n < rss->queue_num; ++n) {
797 				if (rss->queue[n] >= priv->rxqs_n) {
798 					rte_flow_error_set(error, EINVAL,
799 						   RTE_FLOW_ERROR_TYPE_ACTION,
800 						   actions,
801 						   "queue id > number of"
802 						   " queues");
803 					return -rte_errno;
804 				}
805 			}
806 			parser->rss_conf = (struct rte_flow_action_rss){
807 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
808 				.level = rss->level,
809 				.types = rss->types,
810 				.key_len = rss_key_len,
811 				.queue_num = rss->queue_num,
812 				.key = memcpy(parser->rss_key, rss_key,
813 					      sizeof(*rss_key) * rss_key_len),
814 				.queue = memcpy(parser->queues, rss->queue,
815 						sizeof(*rss->queue) *
816 						rss->queue_num),
817 			};
818 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
819 			const struct rte_flow_action_mark *mark =
820 				(const struct rte_flow_action_mark *)
821 				actions->conf;
822 
823 			if (overlap & MARK)
824 				goto exit_action_overlap;
825 			overlap |= MARK;
826 			if (!mark) {
827 				rte_flow_error_set(error, EINVAL,
828 						   RTE_FLOW_ERROR_TYPE_ACTION,
829 						   actions,
830 						   "mark must be defined");
831 				return -rte_errno;
832 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
833 				rte_flow_error_set(error, ENOTSUP,
834 						   RTE_FLOW_ERROR_TYPE_ACTION,
835 						   actions,
836 						   "mark must be between 0"
837 						   " and 16777199");
838 				return -rte_errno;
839 			}
840 			parser->mark = 1;
841 			parser->mark_id = mark->id;
842 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
843 			if (overlap & MARK)
844 				goto exit_action_overlap;
845 			overlap |= MARK;
846 			parser->mark = 1;
847 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
848 			   priv->config.flow_counter_en) {
849 			if (overlap & COUNT)
850 				goto exit_action_overlap;
851 			overlap |= COUNT;
852 			parser->count = 1;
853 		} else {
854 			goto exit_action_not_supported;
855 		}
856 	}
857 	/* When fate is unknown, drop traffic. */
858 	if (!(overlap & FATE))
859 		parser->drop = 1;
860 	if (parser->drop && parser->mark)
861 		parser->mark = 0;
862 	if (!parser->rss_conf.queue_num && !parser->drop) {
863 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
864 				   NULL, "no valid action");
865 		return -rte_errno;
866 	}
867 	return 0;
868 exit_action_not_supported:
869 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
870 			   actions, "action not supported");
871 	return -rte_errno;
872 exit_action_overlap:
873 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
874 			   actions, "overlapping actions are not supported");
875 	return -rte_errno;
876 }
877 
878 /**
879  * Validate items.
880  *
881  * @param[in] items
882  *   Pattern specification (list terminated by the END pattern item).
883  * @param[out] error
884  *   Perform verbose error reporting if not NULL.
885  * @param[in, out] parser
886  *   Internal parser structure.
887  *
888  * @return
889  *   0 on success, a negative errno value otherwise and rte_errno is set.
890  */
891 static int
892 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
893 				 const struct rte_flow_item items[],
894 				 struct rte_flow_error *error,
895 				 struct mlx5_flow_parse *parser)
896 {
897 	struct priv *priv = dev->data->dev_private;
898 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
899 	unsigned int i;
900 	unsigned int last_voids = 0;
901 	int ret = 0;
902 
903 	/* Initialise the offsets to start after verbs attribute. */
904 	for (i = 0; i != hash_rxq_init_n; ++i)
905 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
906 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
907 		const struct mlx5_flow_items *token = NULL;
908 		unsigned int n;
909 
910 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
911 			last_voids++;
912 			continue;
913 		}
914 		for (i = 0;
915 		     cur_item->items &&
916 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
917 		     ++i) {
918 			if (cur_item->items[i] == items->type) {
919 				token = &mlx5_flow_items[items->type];
920 				break;
921 			}
922 		}
923 		if (!token) {
924 			ret = -ENOTSUP;
925 			goto exit_item_not_supported;
926 		}
927 		cur_item = token;
928 		ret = mlx5_flow_item_validate(items,
929 					      (const uint8_t *)cur_item->mask,
930 					      cur_item->mask_sz);
931 		if (ret)
932 			goto exit_item_not_supported;
933 		if (IS_TUNNEL(items->type)) {
934 			if (parser->tunnel &&
935 			    !((items - last_voids - 1)->type ==
936 			      RTE_FLOW_ITEM_TYPE_GRE && items->type ==
937 			      RTE_FLOW_ITEM_TYPE_MPLS)) {
938 				rte_flow_error_set(error, ENOTSUP,
939 						   RTE_FLOW_ERROR_TYPE_ITEM,
940 						   items,
941 						   "Cannot recognize multiple"
942 						   " tunnel encapsulations.");
943 				return -rte_errno;
944 			}
945 			if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
946 			    !priv->config.mpls_en) {
947 				rte_flow_error_set(error, ENOTSUP,
948 						   RTE_FLOW_ERROR_TYPE_ITEM,
949 						   items,
950 						   "MPLS not supported or"
951 						   " disabled in firmware"
952 						   " configuration.");
953 				return -rte_errno;
954 			}
955 			if (!priv->config.tunnel_en &&
956 			    parser->rss_conf.level > 1) {
957 				rte_flow_error_set(error, ENOTSUP,
958 					RTE_FLOW_ERROR_TYPE_ITEM,
959 					items,
960 					"RSS on tunnel is not supported");
961 				return -rte_errno;
962 			}
963 			parser->inner = IBV_FLOW_SPEC_INNER;
964 			parser->tunnel = flow_ptype[items->type];
965 		}
966 		if (parser->drop) {
967 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
968 		} else {
969 			for (n = 0; n != hash_rxq_init_n; ++n)
970 				parser->queue[n].offset += cur_item->dst_sz;
971 		}
972 		last_voids = 0;
973 	}
974 	if (parser->drop) {
975 		parser->queue[HASH_RXQ_ETH].offset +=
976 			sizeof(struct ibv_flow_spec_action_drop);
977 	}
978 	if (parser->mark) {
979 		for (i = 0; i != hash_rxq_init_n; ++i)
980 			parser->queue[i].offset +=
981 				sizeof(struct ibv_flow_spec_action_tag);
982 	}
983 	if (parser->count) {
984 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
985 
986 		for (i = 0; i != hash_rxq_init_n; ++i)
987 			parser->queue[i].offset += size;
988 	}
989 	return 0;
990 exit_item_not_supported:
991 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
992 				  items, "item not supported");
993 }
994 
995 /**
996  * Allocate memory space to store verbs flow attributes.
997  *
998  * @param[in] size
999  *   Amount of byte to allocate.
1000  * @param[out] error
1001  *   Perform verbose error reporting if not NULL.
1002  *
1003  * @return
1004  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
1005  */
1006 static struct ibv_flow_attr *
1007 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
1008 {
1009 	struct ibv_flow_attr *ibv_attr;
1010 
1011 	ibv_attr = rte_calloc(__func__, 1, size, 0);
1012 	if (!ibv_attr) {
1013 		rte_flow_error_set(error, ENOMEM,
1014 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1015 				   NULL,
1016 				   "cannot allocate verbs spec attributes");
1017 		return NULL;
1018 	}
1019 	return ibv_attr;
1020 }
1021 
1022 /**
1023  * Make inner packet matching with an higher priority from the non Inner
1024  * matching.
1025  *
1026  * @param dev
1027  *   Pointer to Ethernet device.
1028  * @param[in, out] parser
1029  *   Internal parser structure.
1030  * @param attr
1031  *   User flow attribute.
1032  */
1033 static void
1034 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1035 			  struct mlx5_flow_parse *parser,
1036 			  const struct rte_flow_attr *attr)
1037 {
1038 	struct priv *priv = dev->data->dev_private;
1039 	unsigned int i;
1040 	uint16_t priority;
1041 
1042 	/*			8 priorities	>= 16 priorities
1043 	 * Control flow:	4-7		8-15
1044 	 * User normal flow:	1-3		4-7
1045 	 * User tunnel flow:	0-2		0-3
1046 	 */
1047 	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1048 	if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1049 		priority /= 2;
1050 	/*
1051 	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1052 	 * priorities, lower 4 otherwise.
1053 	 */
1054 	if (!parser->inner) {
1055 		if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1056 			priority += 1;
1057 		else
1058 			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1059 	}
1060 	if (parser->drop) {
1061 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1062 				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1063 		return;
1064 	}
1065 	for (i = 0; i != hash_rxq_init_n; ++i) {
1066 		if (!parser->queue[i].ibv_attr)
1067 			continue;
1068 		parser->queue[i].ibv_attr->priority = priority +
1069 				hash_rxq_init[i].flow_priority;
1070 	}
1071 }
1072 
1073 /**
1074  * Finalise verbs flow attributes.
1075  *
1076  * @param[in, out] parser
1077  *   Internal parser structure.
1078  */
1079 static void
1080 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1081 {
1082 	unsigned int i;
1083 	uint32_t inner = parser->inner;
1084 
1085 	/* Don't create extra flows for outer RSS. */
1086 	if (parser->tunnel && parser->rss_conf.level < 2)
1087 		return;
1088 	/*
1089 	 * Fill missing layers in verbs specifications, or compute the correct
1090 	 * offset to allocate the memory space for the attributes and
1091 	 * specifications.
1092 	 */
1093 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1094 		union {
1095 			struct ibv_flow_spec_ipv4_ext ipv4;
1096 			struct ibv_flow_spec_ipv6 ipv6;
1097 			struct ibv_flow_spec_tcp_udp udp_tcp;
1098 			struct ibv_flow_spec_eth eth;
1099 		} specs;
1100 		void *dst;
1101 		uint16_t size;
1102 
1103 		if (i == parser->layer)
1104 			continue;
1105 		if (parser->layer == HASH_RXQ_ETH ||
1106 		    parser->layer == HASH_RXQ_TUNNEL) {
1107 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1108 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1109 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1110 					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1111 					.size = size,
1112 				};
1113 			} else {
1114 				size = sizeof(struct ibv_flow_spec_ipv6);
1115 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1116 					.type = inner | IBV_FLOW_SPEC_IPV6,
1117 					.size = size,
1118 				};
1119 			}
1120 			if (parser->queue[i].ibv_attr) {
1121 				dst = (void *)((uintptr_t)
1122 					       parser->queue[i].ibv_attr +
1123 					       parser->queue[i].offset);
1124 				memcpy(dst, &specs, size);
1125 				++parser->queue[i].ibv_attr->num_of_specs;
1126 			}
1127 			parser->queue[i].offset += size;
1128 		}
1129 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1130 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1131 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1132 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1133 				.type = inner | ((i == HASH_RXQ_UDPV4 ||
1134 					  i == HASH_RXQ_UDPV6) ?
1135 					 IBV_FLOW_SPEC_UDP :
1136 					 IBV_FLOW_SPEC_TCP),
1137 				.size = size,
1138 			};
1139 			if (parser->queue[i].ibv_attr) {
1140 				dst = (void *)((uintptr_t)
1141 					       parser->queue[i].ibv_attr +
1142 					       parser->queue[i].offset);
1143 				memcpy(dst, &specs, size);
1144 				++parser->queue[i].ibv_attr->num_of_specs;
1145 			}
1146 			parser->queue[i].offset += size;
1147 		}
1148 	}
1149 }
1150 
1151 /**
1152  * Update flows according to pattern and RSS hash fields.
1153  *
1154  * @param[in, out] parser
1155  *   Internal parser structure.
1156  *
1157  * @return
1158  *   0 on success, a negative errno value otherwise and rte_errno is set.
1159  */
1160 static int
1161 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1162 {
1163 	unsigned int i;
1164 	enum hash_rxq_type start;
1165 	enum hash_rxq_type layer;
1166 	int outer = parser->tunnel && parser->rss_conf.level < 2;
1167 	uint64_t rss = parser->rss_conf.types;
1168 
1169 	/* Default to outer RSS. */
1170 	if (!parser->rss_conf.level)
1171 		parser->rss_conf.level = 1;
1172 	layer = outer ? parser->out_layer : parser->layer;
1173 	if (layer == HASH_RXQ_TUNNEL)
1174 		layer = HASH_RXQ_ETH;
1175 	if (outer) {
1176 		/* Only one hash type for outer RSS. */
1177 		if (rss && layer == HASH_RXQ_ETH) {
1178 			start = HASH_RXQ_TCPV4;
1179 		} else if (rss && layer != HASH_RXQ_ETH &&
1180 			   !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1181 			/* If RSS not match L4 pattern, try L3 RSS. */
1182 			if (layer < HASH_RXQ_IPV4)
1183 				layer = HASH_RXQ_IPV4;
1184 			else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1185 				layer = HASH_RXQ_IPV6;
1186 			start = layer;
1187 		} else {
1188 			start = layer;
1189 		}
1190 		/* Scan first valid hash type. */
1191 		for (i = start; rss && i <= layer; ++i) {
1192 			if (!parser->queue[i].ibv_attr)
1193 				continue;
1194 			if (hash_rxq_init[i].dpdk_rss_hf & rss)
1195 				break;
1196 		}
1197 		if (rss && i <= layer)
1198 			parser->queue[layer].hash_fields =
1199 					hash_rxq_init[i].hash_fields;
1200 		/* Trim unused hash types. */
1201 		for (i = 0; i != hash_rxq_init_n; ++i) {
1202 			if (parser->queue[i].ibv_attr && i != layer) {
1203 				rte_free(parser->queue[i].ibv_attr);
1204 				parser->queue[i].ibv_attr = NULL;
1205 			}
1206 		}
1207 	} else {
1208 		/* Expand for inner or normal RSS. */
1209 		if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1210 			start = HASH_RXQ_TCPV4;
1211 		else if (rss && layer == HASH_RXQ_IPV6)
1212 			start = HASH_RXQ_TCPV6;
1213 		else
1214 			start = layer;
1215 		/* For L4 pattern, try L3 RSS if no L4 RSS. */
1216 		/* Trim unused hash types. */
1217 		for (i = 0; i != hash_rxq_init_n; ++i) {
1218 			if (!parser->queue[i].ibv_attr)
1219 				continue;
1220 			if (i < start || i > layer) {
1221 				rte_free(parser->queue[i].ibv_attr);
1222 				parser->queue[i].ibv_attr = NULL;
1223 				continue;
1224 			}
1225 			if (!rss)
1226 				continue;
1227 			if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1228 				parser->queue[i].hash_fields =
1229 						hash_rxq_init[i].hash_fields;
1230 			} else if (i != layer) {
1231 				/* Remove unused RSS expansion. */
1232 				rte_free(parser->queue[i].ibv_attr);
1233 				parser->queue[i].ibv_attr = NULL;
1234 			} else if (layer < HASH_RXQ_IPV4 &&
1235 				   (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1236 				    rss)) {
1237 				/* Allow IPv4 RSS on L4 pattern. */
1238 				parser->queue[i].hash_fields =
1239 					hash_rxq_init[HASH_RXQ_IPV4]
1240 						.hash_fields;
1241 			} else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1242 				   (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1243 				    rss)) {
1244 				/* Allow IPv4 RSS on L4 pattern. */
1245 				parser->queue[i].hash_fields =
1246 					hash_rxq_init[HASH_RXQ_IPV6]
1247 						.hash_fields;
1248 			}
1249 		}
1250 	}
1251 	return 0;
1252 }
1253 
1254 /**
1255  * Validate and convert a flow supported by the NIC.
1256  *
1257  * @param dev
1258  *   Pointer to Ethernet device.
1259  * @param[in] attr
1260  *   Flow rule attributes.
1261  * @param[in] pattern
1262  *   Pattern specification (list terminated by the END pattern item).
1263  * @param[in] actions
1264  *   Associated actions (list terminated by the END action).
1265  * @param[out] error
1266  *   Perform verbose error reporting if not NULL.
1267  * @param[in, out] parser
1268  *   Internal parser structure.
1269  *
1270  * @return
1271  *   0 on success, a negative errno value otherwise and rte_errno is set.
1272  */
1273 static int
1274 mlx5_flow_convert(struct rte_eth_dev *dev,
1275 		  const struct rte_flow_attr *attr,
1276 		  const struct rte_flow_item items[],
1277 		  const struct rte_flow_action actions[],
1278 		  struct rte_flow_error *error,
1279 		  struct mlx5_flow_parse *parser)
1280 {
1281 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1282 	unsigned int i;
1283 	int ret;
1284 
1285 	/* First step. Validate the attributes, items and actions. */
1286 	*parser = (struct mlx5_flow_parse){
1287 		.create = parser->create,
1288 		.layer = HASH_RXQ_ETH,
1289 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1290 	};
1291 	ret = mlx5_flow_convert_attributes(attr, error);
1292 	if (ret)
1293 		return ret;
1294 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1295 	if (ret)
1296 		return ret;
1297 	ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1298 	if (ret)
1299 		return ret;
1300 	mlx5_flow_convert_finalise(parser);
1301 	/*
1302 	 * Second step.
1303 	 * Allocate the memory space to store verbs specifications.
1304 	 */
1305 	if (parser->drop) {
1306 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1307 
1308 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1309 			mlx5_flow_convert_allocate(offset, error);
1310 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1311 			goto exit_enomem;
1312 		parser->queue[HASH_RXQ_ETH].offset =
1313 			sizeof(struct ibv_flow_attr);
1314 	} else {
1315 		for (i = 0; i != hash_rxq_init_n; ++i) {
1316 			unsigned int offset;
1317 
1318 			offset = parser->queue[i].offset;
1319 			parser->queue[i].ibv_attr =
1320 				mlx5_flow_convert_allocate(offset, error);
1321 			if (!parser->queue[i].ibv_attr)
1322 				goto exit_enomem;
1323 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1324 		}
1325 	}
1326 	/* Third step. Conversion parse, fill the specifications. */
1327 	parser->inner = 0;
1328 	parser->tunnel = 0;
1329 	parser->layer = HASH_RXQ_ETH;
1330 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1331 		struct mlx5_flow_data data = {
1332 			.dev = dev,
1333 			.parser = parser,
1334 			.error = error,
1335 		};
1336 
1337 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1338 			continue;
1339 		cur_item = &mlx5_flow_items[items->type];
1340 		ret = cur_item->convert(items,
1341 					(cur_item->default_mask ?
1342 					 cur_item->default_mask :
1343 					 cur_item->mask),
1344 					 &data);
1345 		if (ret)
1346 			goto exit_free;
1347 	}
1348 	if (!parser->drop) {
1349 		/* RSS check, remove unused hash types. */
1350 		ret = mlx5_flow_convert_rss(parser);
1351 		if (ret)
1352 			goto exit_free;
1353 		/* Complete missing specification. */
1354 		mlx5_flow_convert_finalise(parser);
1355 	}
1356 	mlx5_flow_update_priority(dev, parser, attr);
1357 	if (parser->mark)
1358 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1359 	if (parser->count && parser->create) {
1360 		mlx5_flow_create_count(dev, parser);
1361 		if (!parser->cs)
1362 			goto exit_count_error;
1363 	}
1364 exit_free:
1365 	/* Only verification is expected, all resources should be released. */
1366 	if (!parser->create) {
1367 		for (i = 0; i != hash_rxq_init_n; ++i) {
1368 			if (parser->queue[i].ibv_attr) {
1369 				rte_free(parser->queue[i].ibv_attr);
1370 				parser->queue[i].ibv_attr = NULL;
1371 			}
1372 		}
1373 	}
1374 	return ret;
1375 exit_enomem:
1376 	for (i = 0; i != hash_rxq_init_n; ++i) {
1377 		if (parser->queue[i].ibv_attr) {
1378 			rte_free(parser->queue[i].ibv_attr);
1379 			parser->queue[i].ibv_attr = NULL;
1380 		}
1381 	}
1382 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1383 			   NULL, "cannot allocate verbs spec attributes");
1384 	return -rte_errno;
1385 exit_count_error:
1386 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1387 			   NULL, "cannot create counter");
1388 	return -rte_errno;
1389 }
1390 
1391 /**
1392  * Copy the specification created into the flow.
1393  *
1394  * @param parser
1395  *   Internal parser structure.
1396  * @param src
1397  *   Create specification.
1398  * @param size
1399  *   Size in bytes of the specification to copy.
1400  */
1401 static void
1402 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1403 		      unsigned int size)
1404 {
1405 	unsigned int i;
1406 	void *dst;
1407 
1408 	for (i = 0; i != hash_rxq_init_n; ++i) {
1409 		if (!parser->queue[i].ibv_attr)
1410 			continue;
1411 		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1412 				parser->queue[i].offset);
1413 		memcpy(dst, src, size);
1414 		++parser->queue[i].ibv_attr->num_of_specs;
1415 		parser->queue[i].offset += size;
1416 	}
1417 }
1418 
1419 /**
1420  * Convert Ethernet item to Verbs specification.
1421  *
1422  * @param item[in]
1423  *   Item specification.
1424  * @param default_mask[in]
1425  *   Default bit-masks to use when item->mask is not provided.
1426  * @param data[in, out]
1427  *   User structure.
1428  *
1429  * @return
1430  *   0 on success, a negative errno value otherwise and rte_errno is set.
1431  */
1432 static int
1433 mlx5_flow_create_eth(const struct rte_flow_item *item,
1434 		     const void *default_mask,
1435 		     struct mlx5_flow_data *data)
1436 {
1437 	const struct rte_flow_item_eth *spec = item->spec;
1438 	const struct rte_flow_item_eth *mask = item->mask;
1439 	struct mlx5_flow_parse *parser = data->parser;
1440 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1441 	struct ibv_flow_spec_eth eth = {
1442 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1443 		.size = eth_size,
1444 	};
1445 
1446 	parser->layer = HASH_RXQ_ETH;
1447 	if (spec) {
1448 		unsigned int i;
1449 
1450 		if (!mask)
1451 			mask = default_mask;
1452 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1453 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1454 		eth.val.ether_type = spec->type;
1455 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1456 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1457 		eth.mask.ether_type = mask->type;
1458 		/* Remove unwanted bits from values. */
1459 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1460 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1461 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1462 		}
1463 		eth.val.ether_type &= eth.mask.ether_type;
1464 	}
1465 	mlx5_flow_create_copy(parser, &eth, eth_size);
1466 	return 0;
1467 }
1468 
1469 /**
1470  * Convert VLAN item to Verbs specification.
1471  *
1472  * @param item[in]
1473  *   Item specification.
1474  * @param default_mask[in]
1475  *   Default bit-masks to use when item->mask is not provided.
1476  * @param data[in, out]
1477  *   User structure.
1478  *
1479  * @return
1480  *   0 on success, a negative errno value otherwise and rte_errno is set.
1481  */
1482 static int
1483 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1484 		      const void *default_mask,
1485 		      struct mlx5_flow_data *data)
1486 {
1487 	const struct rte_flow_item_vlan *spec = item->spec;
1488 	const struct rte_flow_item_vlan *mask = item->mask;
1489 	struct mlx5_flow_parse *parser = data->parser;
1490 	struct ibv_flow_spec_eth *eth;
1491 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1492 	const char *msg = "VLAN cannot be empty";
1493 
1494 	if (spec) {
1495 		unsigned int i;
1496 		if (!mask)
1497 			mask = default_mask;
1498 
1499 		for (i = 0; i != hash_rxq_init_n; ++i) {
1500 			if (!parser->queue[i].ibv_attr)
1501 				continue;
1502 
1503 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1504 				       parser->queue[i].offset - eth_size);
1505 			eth->val.vlan_tag = spec->tci;
1506 			eth->mask.vlan_tag = mask->tci;
1507 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1508 			/*
1509 			 * From verbs perspective an empty VLAN is equivalent
1510 			 * to a packet without VLAN layer.
1511 			 */
1512 			if (!eth->mask.vlan_tag)
1513 				goto error;
1514 			/* Outer TPID cannot be matched. */
1515 			if (eth->mask.ether_type) {
1516 				msg = "VLAN TPID matching is not supported";
1517 				goto error;
1518 			}
1519 			eth->val.ether_type = spec->inner_type;
1520 			eth->mask.ether_type = mask->inner_type;
1521 			eth->val.ether_type &= eth->mask.ether_type;
1522 		}
1523 		return 0;
1524 	}
1525 error:
1526 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1527 				  item, msg);
1528 }
1529 
1530 /**
1531  * Convert IPv4 item to Verbs specification.
1532  *
1533  * @param item[in]
1534  *   Item specification.
1535  * @param default_mask[in]
1536  *   Default bit-masks to use when item->mask is not provided.
1537  * @param data[in, out]
1538  *   User structure.
1539  *
1540  * @return
1541  *   0 on success, a negative errno value otherwise and rte_errno is set.
1542  */
1543 static int
1544 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1545 		      const void *default_mask,
1546 		      struct mlx5_flow_data *data)
1547 {
1548 	struct priv *priv = data->dev->data->dev_private;
1549 	const struct rte_flow_item_ipv4 *spec = item->spec;
1550 	const struct rte_flow_item_ipv4 *mask = item->mask;
1551 	struct mlx5_flow_parse *parser = data->parser;
1552 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1553 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1554 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1555 		.size = ipv4_size,
1556 	};
1557 
1558 	if (parser->layer == HASH_RXQ_TUNNEL &&
1559 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1560 	    !priv->config.l3_vxlan_en)
1561 		return rte_flow_error_set(data->error, EINVAL,
1562 					  RTE_FLOW_ERROR_TYPE_ITEM,
1563 					  item,
1564 					  "L3 VXLAN not enabled by device"
1565 					  " parameter and/or not configured"
1566 					  " in firmware");
1567 	parser->layer = HASH_RXQ_IPV4;
1568 	if (spec) {
1569 		if (!mask)
1570 			mask = default_mask;
1571 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1572 			.src_ip = spec->hdr.src_addr,
1573 			.dst_ip = spec->hdr.dst_addr,
1574 			.proto = spec->hdr.next_proto_id,
1575 			.tos = spec->hdr.type_of_service,
1576 		};
1577 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1578 			.src_ip = mask->hdr.src_addr,
1579 			.dst_ip = mask->hdr.dst_addr,
1580 			.proto = mask->hdr.next_proto_id,
1581 			.tos = mask->hdr.type_of_service,
1582 		};
1583 		/* Remove unwanted bits from values. */
1584 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1585 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1586 		ipv4.val.proto &= ipv4.mask.proto;
1587 		ipv4.val.tos &= ipv4.mask.tos;
1588 	}
1589 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1590 	return 0;
1591 }
1592 
1593 /**
1594  * Convert IPv6 item to Verbs specification.
1595  *
1596  * @param item[in]
1597  *   Item specification.
1598  * @param default_mask[in]
1599  *   Default bit-masks to use when item->mask is not provided.
1600  * @param data[in, out]
1601  *   User structure.
1602  *
1603  * @return
1604  *   0 on success, a negative errno value otherwise and rte_errno is set.
1605  */
1606 static int
1607 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1608 		      const void *default_mask,
1609 		      struct mlx5_flow_data *data)
1610 {
1611 	struct priv *priv = data->dev->data->dev_private;
1612 	const struct rte_flow_item_ipv6 *spec = item->spec;
1613 	const struct rte_flow_item_ipv6 *mask = item->mask;
1614 	struct mlx5_flow_parse *parser = data->parser;
1615 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1616 	struct ibv_flow_spec_ipv6 ipv6 = {
1617 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1618 		.size = ipv6_size,
1619 	};
1620 
1621 	if (parser->layer == HASH_RXQ_TUNNEL &&
1622 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1623 	    !priv->config.l3_vxlan_en)
1624 		return rte_flow_error_set(data->error, EINVAL,
1625 					  RTE_FLOW_ERROR_TYPE_ITEM,
1626 					  item,
1627 					  "L3 VXLAN not enabled by device"
1628 					  " parameter and/or not configured"
1629 					  " in firmware");
1630 	parser->layer = HASH_RXQ_IPV6;
1631 	if (spec) {
1632 		unsigned int i;
1633 		uint32_t vtc_flow_val;
1634 		uint32_t vtc_flow_mask;
1635 
1636 		if (!mask)
1637 			mask = default_mask;
1638 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1639 		       RTE_DIM(ipv6.val.src_ip));
1640 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1641 		       RTE_DIM(ipv6.val.dst_ip));
1642 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1643 		       RTE_DIM(ipv6.mask.src_ip));
1644 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1645 		       RTE_DIM(ipv6.mask.dst_ip));
1646 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1647 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1648 		ipv6.val.flow_label =
1649 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1650 					 IPV6_HDR_FL_SHIFT);
1651 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1652 					 IPV6_HDR_TC_SHIFT;
1653 		ipv6.val.next_hdr = spec->hdr.proto;
1654 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1655 		ipv6.mask.flow_label =
1656 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1657 					 IPV6_HDR_FL_SHIFT);
1658 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1659 					  IPV6_HDR_TC_SHIFT;
1660 		ipv6.mask.next_hdr = mask->hdr.proto;
1661 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1662 		/* Remove unwanted bits from values. */
1663 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1664 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1665 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1666 		}
1667 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1668 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1669 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1670 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1671 	}
1672 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1673 	return 0;
1674 }
1675 
1676 /**
1677  * Convert UDP item to Verbs specification.
1678  *
1679  * @param item[in]
1680  *   Item specification.
1681  * @param default_mask[in]
1682  *   Default bit-masks to use when item->mask is not provided.
1683  * @param data[in, out]
1684  *   User structure.
1685  *
1686  * @return
1687  *   0 on success, a negative errno value otherwise and rte_errno is set.
1688  */
1689 static int
1690 mlx5_flow_create_udp(const struct rte_flow_item *item,
1691 		     const void *default_mask,
1692 		     struct mlx5_flow_data *data)
1693 {
1694 	const struct rte_flow_item_udp *spec = item->spec;
1695 	const struct rte_flow_item_udp *mask = item->mask;
1696 	struct mlx5_flow_parse *parser = data->parser;
1697 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1698 	struct ibv_flow_spec_tcp_udp udp = {
1699 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1700 		.size = udp_size,
1701 	};
1702 
1703 	if (parser->layer == HASH_RXQ_IPV4)
1704 		parser->layer = HASH_RXQ_UDPV4;
1705 	else
1706 		parser->layer = HASH_RXQ_UDPV6;
1707 	if (spec) {
1708 		if (!mask)
1709 			mask = default_mask;
1710 		udp.val.dst_port = spec->hdr.dst_port;
1711 		udp.val.src_port = spec->hdr.src_port;
1712 		udp.mask.dst_port = mask->hdr.dst_port;
1713 		udp.mask.src_port = mask->hdr.src_port;
1714 		/* Remove unwanted bits from values. */
1715 		udp.val.src_port &= udp.mask.src_port;
1716 		udp.val.dst_port &= udp.mask.dst_port;
1717 	}
1718 	mlx5_flow_create_copy(parser, &udp, udp_size);
1719 	return 0;
1720 }
1721 
1722 /**
1723  * Convert TCP item to Verbs specification.
1724  *
1725  * @param item[in]
1726  *   Item specification.
1727  * @param default_mask[in]
1728  *   Default bit-masks to use when item->mask is not provided.
1729  * @param data[in, out]
1730  *   User structure.
1731  *
1732  * @return
1733  *   0 on success, a negative errno value otherwise and rte_errno is set.
1734  */
1735 static int
1736 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1737 		     const void *default_mask,
1738 		     struct mlx5_flow_data *data)
1739 {
1740 	const struct rte_flow_item_tcp *spec = item->spec;
1741 	const struct rte_flow_item_tcp *mask = item->mask;
1742 	struct mlx5_flow_parse *parser = data->parser;
1743 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1744 	struct ibv_flow_spec_tcp_udp tcp = {
1745 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1746 		.size = tcp_size,
1747 	};
1748 
1749 	if (parser->layer == HASH_RXQ_IPV4)
1750 		parser->layer = HASH_RXQ_TCPV4;
1751 	else
1752 		parser->layer = HASH_RXQ_TCPV6;
1753 	if (spec) {
1754 		if (!mask)
1755 			mask = default_mask;
1756 		tcp.val.dst_port = spec->hdr.dst_port;
1757 		tcp.val.src_port = spec->hdr.src_port;
1758 		tcp.mask.dst_port = mask->hdr.dst_port;
1759 		tcp.mask.src_port = mask->hdr.src_port;
1760 		/* Remove unwanted bits from values. */
1761 		tcp.val.src_port &= tcp.mask.src_port;
1762 		tcp.val.dst_port &= tcp.mask.dst_port;
1763 	}
1764 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1765 	return 0;
1766 }
1767 
1768 /**
1769  * Convert VXLAN item to Verbs specification.
1770  *
1771  * @param item[in]
1772  *   Item specification.
1773  * @param default_mask[in]
1774  *   Default bit-masks to use when item->mask is not provided.
1775  * @param data[in, out]
1776  *   User structure.
1777  *
1778  * @return
1779  *   0 on success, a negative errno value otherwise and rte_errno is set.
1780  */
1781 static int
1782 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1783 		       const void *default_mask,
1784 		       struct mlx5_flow_data *data)
1785 {
1786 	const struct rte_flow_item_vxlan *spec = item->spec;
1787 	const struct rte_flow_item_vxlan *mask = item->mask;
1788 	struct mlx5_flow_parse *parser = data->parser;
1789 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1790 	struct ibv_flow_spec_tunnel vxlan = {
1791 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1792 		.size = size,
1793 	};
1794 	union vni {
1795 		uint32_t vlan_id;
1796 		uint8_t vni[4];
1797 	} id;
1798 
1799 	id.vni[0] = 0;
1800 	parser->inner = IBV_FLOW_SPEC_INNER;
1801 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1802 	parser->out_layer = parser->layer;
1803 	parser->layer = HASH_RXQ_TUNNEL;
1804 	/* Default VXLAN to outer RSS. */
1805 	if (!parser->rss_conf.level)
1806 		parser->rss_conf.level = 1;
1807 	if (spec) {
1808 		if (!mask)
1809 			mask = default_mask;
1810 		memcpy(&id.vni[1], spec->vni, 3);
1811 		vxlan.val.tunnel_id = id.vlan_id;
1812 		memcpy(&id.vni[1], mask->vni, 3);
1813 		vxlan.mask.tunnel_id = id.vlan_id;
1814 		/* Remove unwanted bits from values. */
1815 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1816 	}
1817 	/*
1818 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1819 	 * layer is defined in the Verbs specification it is interpreted as
1820 	 * wildcard and all packets will match this rule, if it follows a full
1821 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1822 	 * before will also match this rule.
1823 	 * To avoid such situation, VNI 0 is currently refused.
1824 	 */
1825 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1826 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1827 		return rte_flow_error_set(data->error, EINVAL,
1828 					  RTE_FLOW_ERROR_TYPE_ITEM,
1829 					  item,
1830 					  "VxLAN vni cannot be 0");
1831 	mlx5_flow_create_copy(parser, &vxlan, size);
1832 	return 0;
1833 }
1834 
1835 /**
1836  * Convert VXLAN-GPE item to Verbs specification.
1837  *
1838  * @param item[in]
1839  *   Item specification.
1840  * @param default_mask[in]
1841  *   Default bit-masks to use when item->mask is not provided.
1842  * @param data[in, out]
1843  *   User structure.
1844  *
1845  * @return
1846  *   0 on success, a negative errno value otherwise and rte_errno is set.
1847  */
1848 static int
1849 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1850 			   const void *default_mask,
1851 			   struct mlx5_flow_data *data)
1852 {
1853 	struct priv *priv = data->dev->data->dev_private;
1854 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1855 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1856 	struct mlx5_flow_parse *parser = data->parser;
1857 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1858 	struct ibv_flow_spec_tunnel vxlan = {
1859 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1860 		.size = size,
1861 	};
1862 	union vni {
1863 		uint32_t vlan_id;
1864 		uint8_t vni[4];
1865 	} id;
1866 
1867 	if (!priv->config.l3_vxlan_en)
1868 		return rte_flow_error_set(data->error, EINVAL,
1869 					  RTE_FLOW_ERROR_TYPE_ITEM,
1870 					  item,
1871 					  "L3 VXLAN not enabled by device"
1872 					  " parameter and/or not configured"
1873 					  " in firmware");
1874 	id.vni[0] = 0;
1875 	parser->inner = IBV_FLOW_SPEC_INNER;
1876 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1877 	parser->out_layer = parser->layer;
1878 	parser->layer = HASH_RXQ_TUNNEL;
1879 	/* Default VXLAN-GPE to outer RSS. */
1880 	if (!parser->rss_conf.level)
1881 		parser->rss_conf.level = 1;
1882 	if (spec) {
1883 		if (!mask)
1884 			mask = default_mask;
1885 		memcpy(&id.vni[1], spec->vni, 3);
1886 		vxlan.val.tunnel_id = id.vlan_id;
1887 		memcpy(&id.vni[1], mask->vni, 3);
1888 		vxlan.mask.tunnel_id = id.vlan_id;
1889 		if (spec->protocol)
1890 			return rte_flow_error_set(data->error, EINVAL,
1891 						  RTE_FLOW_ERROR_TYPE_ITEM,
1892 						  item,
1893 						  "VxLAN-GPE protocol not"
1894 						  " supported");
1895 		/* Remove unwanted bits from values. */
1896 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1897 	}
1898 	/*
1899 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1900 	 * layer is defined in the Verbs specification it is interpreted as
1901 	 * wildcard and all packets will match this rule, if it follows a full
1902 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1903 	 * before will also match this rule.
1904 	 * To avoid such situation, VNI 0 is currently refused.
1905 	 */
1906 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1907 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1908 		return rte_flow_error_set(data->error, EINVAL,
1909 					  RTE_FLOW_ERROR_TYPE_ITEM,
1910 					  item,
1911 					  "VxLAN-GPE vni cannot be 0");
1912 	mlx5_flow_create_copy(parser, &vxlan, size);
1913 	return 0;
1914 }
1915 
1916 /**
1917  * Convert GRE item to Verbs specification.
1918  *
1919  * @param item[in]
1920  *   Item specification.
1921  * @param default_mask[in]
1922  *   Default bit-masks to use when item->mask is not provided.
1923  * @param data[in, out]
1924  *   User structure.
1925  *
1926  * @return
1927  *   0 on success, a negative errno value otherwise and rte_errno is set.
1928  */
1929 static int
1930 mlx5_flow_create_gre(const struct rte_flow_item *item,
1931 		     const void *default_mask,
1932 		     struct mlx5_flow_data *data)
1933 {
1934 	struct mlx5_flow_parse *parser = data->parser;
1935 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
1936 	(void)default_mask;
1937 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1938 	struct ibv_flow_spec_tunnel tunnel = {
1939 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1940 		.size = size,
1941 	};
1942 #else
1943 	const struct rte_flow_item_gre *spec = item->spec;
1944 	const struct rte_flow_item_gre *mask = item->mask;
1945 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
1946 	struct ibv_flow_spec_gre tunnel = {
1947 		.type = parser->inner | IBV_FLOW_SPEC_GRE,
1948 		.size = size,
1949 	};
1950 #endif
1951 	struct ibv_flow_spec_ipv4_ext *ipv4;
1952 	struct ibv_flow_spec_ipv6 *ipv6;
1953 	unsigned int i;
1954 
1955 	parser->inner = IBV_FLOW_SPEC_INNER;
1956 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1957 	parser->out_layer = parser->layer;
1958 	parser->layer = HASH_RXQ_TUNNEL;
1959 	/* Default GRE to inner RSS. */
1960 	if (!parser->rss_conf.level)
1961 		parser->rss_conf.level = 2;
1962 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1963 	if (spec) {
1964 		if (!mask)
1965 			mask = default_mask;
1966 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1967 		tunnel.val.protocol = spec->protocol;
1968 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1969 		tunnel.mask.protocol = mask->protocol;
1970 		/* Remove unwanted bits from values. */
1971 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1972 		tunnel.val.protocol &= tunnel.mask.protocol;
1973 		tunnel.val.key &= tunnel.mask.key;
1974 	}
1975 #endif
1976 	/* Update encapsulation IP layer protocol. */
1977 	for (i = 0; i != hash_rxq_init_n; ++i) {
1978 		if (!parser->queue[i].ibv_attr)
1979 			continue;
1980 		if (parser->out_layer == HASH_RXQ_IPV4) {
1981 			ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1982 				parser->queue[i].offset -
1983 				sizeof(struct ibv_flow_spec_ipv4_ext));
1984 			if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1985 				break;
1986 			ipv4->val.proto = MLX5_GRE;
1987 			ipv4->mask.proto = 0xff;
1988 		} else if (parser->out_layer == HASH_RXQ_IPV6) {
1989 			ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1990 				parser->queue[i].offset -
1991 				sizeof(struct ibv_flow_spec_ipv6));
1992 			if (ipv6->mask.next_hdr &&
1993 			    ipv6->val.next_hdr != MLX5_GRE)
1994 				break;
1995 			ipv6->val.next_hdr = MLX5_GRE;
1996 			ipv6->mask.next_hdr = 0xff;
1997 		}
1998 	}
1999 	if (i != hash_rxq_init_n)
2000 		return rte_flow_error_set(data->error, EINVAL,
2001 					  RTE_FLOW_ERROR_TYPE_ITEM,
2002 					  item,
2003 					  "IP protocol of GRE must be 47");
2004 	mlx5_flow_create_copy(parser, &tunnel, size);
2005 	return 0;
2006 }
2007 
2008 /**
2009  * Convert MPLS item to Verbs specification.
2010  * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
2011  *
2012  * @param item[in]
2013  *   Item specification.
2014  * @param default_mask[in]
2015  *   Default bit-masks to use when item->mask is not provided.
2016  * @param data[in, out]
2017  *   User structure.
2018  *
2019  * @return
2020  *   0 on success, a negative errno value otherwise and rte_errno is set.
2021  */
2022 static int
2023 mlx5_flow_create_mpls(const struct rte_flow_item *item,
2024 		      const void *default_mask,
2025 		      struct mlx5_flow_data *data)
2026 {
2027 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2028 	(void)default_mask;
2029 	return rte_flow_error_set(data->error, ENOTSUP,
2030 				  RTE_FLOW_ERROR_TYPE_ITEM,
2031 				  item,
2032 				  "MPLS is not supported by driver");
2033 #else
2034 	const struct rte_flow_item_mpls *spec = item->spec;
2035 	const struct rte_flow_item_mpls *mask = item->mask;
2036 	struct mlx5_flow_parse *parser = data->parser;
2037 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
2038 	struct ibv_flow_spec_mpls mpls = {
2039 		.type = IBV_FLOW_SPEC_MPLS,
2040 		.size = size,
2041 	};
2042 
2043 	parser->inner = IBV_FLOW_SPEC_INNER;
2044 	if (parser->layer == HASH_RXQ_UDPV4 ||
2045 	    parser->layer == HASH_RXQ_UDPV6) {
2046 		parser->tunnel =
2047 			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
2048 		parser->out_layer = parser->layer;
2049 	} else {
2050 		parser->tunnel =
2051 			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
2052 		/* parser->out_layer stays as in GRE out_layer. */
2053 	}
2054 	parser->layer = HASH_RXQ_TUNNEL;
2055 	/*
2056 	 * For MPLS-in-GRE, RSS level should have been set.
2057 	 * For MPLS-in-UDP, use outer RSS.
2058 	 */
2059 	if (!parser->rss_conf.level)
2060 		parser->rss_conf.level = 1;
2061 	if (spec) {
2062 		if (!mask)
2063 			mask = default_mask;
2064 		/*
2065 		 * The verbs label field includes the entire MPLS header:
2066 		 * bits 0:19 - label value field.
2067 		 * bits 20:22 - traffic class field.
2068 		 * bits 23 - bottom of stack bit.
2069 		 * bits 24:31 - ttl field.
2070 		 */
2071 		mpls.val.label = *(const uint32_t *)spec;
2072 		mpls.mask.label = *(const uint32_t *)mask;
2073 		/* Remove unwanted bits from values. */
2074 		mpls.val.label &= mpls.mask.label;
2075 	}
2076 	mlx5_flow_create_copy(parser, &mpls, size);
2077 	return 0;
2078 #endif
2079 }
2080 
2081 /**
2082  * Convert mark/flag action to Verbs specification.
2083  *
2084  * @param parser
2085  *   Internal parser structure.
2086  * @param mark_id
2087  *   Mark identifier.
2088  *
2089  * @return
2090  *   0 on success, a negative errno value otherwise and rte_errno is set.
2091  */
2092 static int
2093 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
2094 {
2095 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2096 	struct ibv_flow_spec_action_tag tag = {
2097 		.type = IBV_FLOW_SPEC_ACTION_TAG,
2098 		.size = size,
2099 		.tag_id = mlx5_flow_mark_set(mark_id),
2100 	};
2101 
2102 	assert(parser->mark);
2103 	mlx5_flow_create_copy(parser, &tag, size);
2104 	return 0;
2105 }
2106 
2107 /**
2108  * Convert count action to Verbs specification.
2109  *
2110  * @param dev
2111  *   Pointer to Ethernet device.
2112  * @param parser
2113  *   Pointer to MLX5 flow parser structure.
2114  *
2115  * @return
2116  *   0 on success, a negative errno value otherwise and rte_errno is set.
2117  */
2118 static int
2119 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2120 		       struct mlx5_flow_parse *parser __rte_unused)
2121 {
2122 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2123 	struct priv *priv = dev->data->dev_private;
2124 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2125 	struct ibv_counter_set_init_attr init_attr = {0};
2126 	struct ibv_flow_spec_counter_action counter = {
2127 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
2128 		.size = size,
2129 		.counter_set_handle = 0,
2130 	};
2131 
2132 	init_attr.counter_set_id = 0;
2133 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2134 	if (!parser->cs) {
2135 		rte_errno = EINVAL;
2136 		return -rte_errno;
2137 	}
2138 	counter.counter_set_handle = parser->cs->handle;
2139 	mlx5_flow_create_copy(parser, &counter, size);
2140 #endif
2141 	return 0;
2142 }
2143 
2144 /**
2145  * Complete flow rule creation with a drop queue.
2146  *
2147  * @param dev
2148  *   Pointer to Ethernet device.
2149  * @param parser
2150  *   Internal parser structure.
2151  * @param flow
2152  *   Pointer to the rte_flow.
2153  * @param[out] error
2154  *   Perform verbose error reporting if not NULL.
2155  *
2156  * @return
2157  *   0 on success, a negative errno value otherwise and rte_errno is set.
2158  */
2159 static int
2160 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2161 				   struct mlx5_flow_parse *parser,
2162 				   struct rte_flow *flow,
2163 				   struct rte_flow_error *error)
2164 {
2165 	struct priv *priv = dev->data->dev_private;
2166 	struct ibv_flow_spec_action_drop *drop;
2167 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2168 
2169 	assert(priv->pd);
2170 	assert(priv->ctx);
2171 	flow->drop = 1;
2172 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2173 			parser->queue[HASH_RXQ_ETH].offset);
2174 	*drop = (struct ibv_flow_spec_action_drop){
2175 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2176 			.size = size,
2177 	};
2178 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2179 	parser->queue[HASH_RXQ_ETH].offset += size;
2180 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
2181 		parser->queue[HASH_RXQ_ETH].ibv_attr;
2182 	if (parser->count)
2183 		flow->cs = parser->cs;
2184 	if (!dev->data->dev_started)
2185 		return 0;
2186 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2187 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
2188 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2189 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
2190 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2191 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2192 				   NULL, "flow rule creation failure");
2193 		goto error;
2194 	}
2195 	return 0;
2196 error:
2197 	assert(flow);
2198 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2199 		claim_zero(mlx5_glue->destroy_flow
2200 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2201 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2202 	}
2203 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2204 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2205 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2206 	}
2207 	if (flow->cs) {
2208 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2209 		flow->cs = NULL;
2210 		parser->cs = NULL;
2211 	}
2212 	return -rte_errno;
2213 }
2214 
2215 /**
2216  * Create hash Rx queues when RSS is enabled.
2217  *
2218  * @param dev
2219  *   Pointer to Ethernet device.
2220  * @param parser
2221  *   Internal parser structure.
2222  * @param flow
2223  *   Pointer to the rte_flow.
2224  * @param[out] error
2225  *   Perform verbose error reporting if not NULL.
2226  *
2227  * @return
2228  *   0 on success, a negative errno value otherwise and rte_errno is set.
2229  */
2230 static int
2231 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2232 				  struct mlx5_flow_parse *parser,
2233 				  struct rte_flow *flow,
2234 				  struct rte_flow_error *error)
2235 {
2236 	unsigned int i;
2237 
2238 	for (i = 0; i != hash_rxq_init_n; ++i) {
2239 		if (!parser->queue[i].ibv_attr)
2240 			continue;
2241 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2242 		parser->queue[i].ibv_attr = NULL;
2243 		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2244 		if (!dev->data->dev_started)
2245 			continue;
2246 		flow->frxq[i].hrxq =
2247 			mlx5_hrxq_get(dev,
2248 				      parser->rss_conf.key,
2249 				      parser->rss_conf.key_len,
2250 				      flow->frxq[i].hash_fields,
2251 				      parser->rss_conf.queue,
2252 				      parser->rss_conf.queue_num,
2253 				      parser->tunnel,
2254 				      parser->rss_conf.level);
2255 		if (flow->frxq[i].hrxq)
2256 			continue;
2257 		flow->frxq[i].hrxq =
2258 			mlx5_hrxq_new(dev,
2259 				      parser->rss_conf.key,
2260 				      parser->rss_conf.key_len,
2261 				      flow->frxq[i].hash_fields,
2262 				      parser->rss_conf.queue,
2263 				      parser->rss_conf.queue_num,
2264 				      parser->tunnel,
2265 				      parser->rss_conf.level);
2266 		if (!flow->frxq[i].hrxq) {
2267 			return rte_flow_error_set(error, ENOMEM,
2268 						  RTE_FLOW_ERROR_TYPE_HANDLE,
2269 						  NULL,
2270 						  "cannot create hash rxq");
2271 		}
2272 	}
2273 	return 0;
2274 }
2275 
2276 /**
2277  * RXQ update after flow rule creation.
2278  *
2279  * @param dev
2280  *   Pointer to Ethernet device.
2281  * @param flow
2282  *   Pointer to the flow rule.
2283  */
2284 static void
2285 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2286 {
2287 	struct priv *priv = dev->data->dev_private;
2288 	unsigned int i;
2289 	unsigned int j;
2290 
2291 	if (!dev->data->dev_started)
2292 		return;
2293 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2294 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2295 						 [(*flow->queues)[i]];
2296 		struct mlx5_rxq_ctrl *rxq_ctrl =
2297 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2298 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2299 
2300 		rxq_data->mark |= flow->mark;
2301 		if (!tunnel)
2302 			continue;
2303 		rxq_ctrl->tunnel_types[tunnel] += 1;
2304 		/* Clear tunnel type if more than one tunnel types set. */
2305 		for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2306 			if (j == tunnel)
2307 				continue;
2308 			if (rxq_ctrl->tunnel_types[j] > 0) {
2309 				rxq_data->tunnel = 0;
2310 				break;
2311 			}
2312 		}
2313 		if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2314 			rxq_data->tunnel = flow->tunnel;
2315 	}
2316 }
2317 
2318 /**
2319  * Dump flow hash RX queue detail.
2320  *
2321  * @param dev
2322  *   Pointer to Ethernet device.
2323  * @param flow
2324  *   Pointer to the rte_flow.
2325  * @param hrxq_idx
2326  *   Hash RX queue index.
2327  */
2328 static void
2329 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2330 	       struct rte_flow *flow __rte_unused,
2331 	       unsigned int hrxq_idx __rte_unused)
2332 {
2333 #ifndef NDEBUG
2334 	uintptr_t spec_ptr;
2335 	uint16_t j;
2336 	char buf[256];
2337 	uint8_t off;
2338 	uint64_t extra_hash_fields = 0;
2339 
2340 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2341 	if (flow->tunnel && flow->rss_conf.level > 1)
2342 		extra_hash_fields = (uint32_t)IBV_RX_HASH_INNER;
2343 #endif
2344 	spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2345 	for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2346 	     j++) {
2347 		struct ibv_flow_spec *spec = (void *)spec_ptr;
2348 		off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2349 			       spec->hdr.size);
2350 		spec_ptr += spec->hdr.size;
2351 	}
2352 	DRV_LOG(DEBUG,
2353 		"port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2354 		" hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2355 		" flags:%x, comp_mask:%x specs:%s",
2356 		dev->data->port_id, (void *)flow, hrxq_idx,
2357 		(void *)flow->frxq[hrxq_idx].hrxq,
2358 		(void *)flow->frxq[hrxq_idx].hrxq->qp,
2359 		(void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2360 		(flow->frxq[hrxq_idx].hash_fields | extra_hash_fields),
2361 		flow->rss_conf.queue_num,
2362 		flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2363 		flow->frxq[hrxq_idx].ibv_attr->size,
2364 		flow->frxq[hrxq_idx].ibv_attr->priority,
2365 		flow->frxq[hrxq_idx].ibv_attr->type,
2366 		flow->frxq[hrxq_idx].ibv_attr->flags,
2367 		flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2368 		buf);
2369 #endif
2370 }
2371 
2372 /**
2373  * Complete flow rule creation.
2374  *
2375  * @param dev
2376  *   Pointer to Ethernet device.
2377  * @param parser
2378  *   Internal parser structure.
2379  * @param flow
2380  *   Pointer to the rte_flow.
2381  * @param[out] error
2382  *   Perform verbose error reporting if not NULL.
2383  *
2384  * @return
2385  *   0 on success, a negative errno value otherwise and rte_errno is set.
2386  */
2387 static int
2388 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2389 			      struct mlx5_flow_parse *parser,
2390 			      struct rte_flow *flow,
2391 			      struct rte_flow_error *error)
2392 {
2393 	struct priv *priv __rte_unused = dev->data->dev_private;
2394 	int ret;
2395 	unsigned int i;
2396 	unsigned int flows_n = 0;
2397 
2398 	assert(priv->pd);
2399 	assert(priv->ctx);
2400 	assert(!parser->drop);
2401 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2402 	if (ret)
2403 		goto error;
2404 	if (parser->count)
2405 		flow->cs = parser->cs;
2406 	if (!dev->data->dev_started)
2407 		return 0;
2408 	for (i = 0; i != hash_rxq_init_n; ++i) {
2409 		if (!flow->frxq[i].hrxq)
2410 			continue;
2411 		flow->frxq[i].ibv_flow =
2412 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2413 					       flow->frxq[i].ibv_attr);
2414 		mlx5_flow_dump(dev, flow, i);
2415 		if (!flow->frxq[i].ibv_flow) {
2416 			rte_flow_error_set(error, ENOMEM,
2417 					   RTE_FLOW_ERROR_TYPE_HANDLE,
2418 					   NULL, "flow rule creation failure");
2419 			goto error;
2420 		}
2421 		++flows_n;
2422 	}
2423 	if (!flows_n) {
2424 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2425 				   NULL, "internal error in flow creation");
2426 		goto error;
2427 	}
2428 	mlx5_flow_create_update_rxqs(dev, flow);
2429 	return 0;
2430 error:
2431 	ret = rte_errno; /* Save rte_errno before cleanup. */
2432 	assert(flow);
2433 	for (i = 0; i != hash_rxq_init_n; ++i) {
2434 		if (flow->frxq[i].ibv_flow) {
2435 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2436 
2437 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2438 		}
2439 		if (flow->frxq[i].hrxq)
2440 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2441 		if (flow->frxq[i].ibv_attr)
2442 			rte_free(flow->frxq[i].ibv_attr);
2443 	}
2444 	if (flow->cs) {
2445 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2446 		flow->cs = NULL;
2447 		parser->cs = NULL;
2448 	}
2449 	rte_errno = ret; /* Restore rte_errno. */
2450 	return -rte_errno;
2451 }
2452 
2453 /**
2454  * Convert a flow.
2455  *
2456  * @param dev
2457  *   Pointer to Ethernet device.
2458  * @param list
2459  *   Pointer to a TAILQ flow list.
2460  * @param[in] attr
2461  *   Flow rule attributes.
2462  * @param[in] pattern
2463  *   Pattern specification (list terminated by the END pattern item).
2464  * @param[in] actions
2465  *   Associated actions (list terminated by the END action).
2466  * @param[out] error
2467  *   Perform verbose error reporting if not NULL.
2468  *
2469  * @return
2470  *   A flow on success, NULL otherwise and rte_errno is set.
2471  */
2472 static struct rte_flow *
2473 mlx5_flow_list_create(struct rte_eth_dev *dev,
2474 		      struct mlx5_flows *list,
2475 		      const struct rte_flow_attr *attr,
2476 		      const struct rte_flow_item items[],
2477 		      const struct rte_flow_action actions[],
2478 		      struct rte_flow_error *error)
2479 {
2480 	struct mlx5_flow_parse parser = { .create = 1, };
2481 	struct rte_flow *flow = NULL;
2482 	unsigned int i;
2483 	int ret;
2484 
2485 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2486 	if (ret)
2487 		goto exit;
2488 	flow = rte_calloc(__func__, 1,
2489 			  sizeof(*flow) +
2490 			  parser.rss_conf.queue_num * sizeof(uint16_t),
2491 			  0);
2492 	if (!flow) {
2493 		rte_flow_error_set(error, ENOMEM,
2494 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2495 				   NULL,
2496 				   "cannot allocate flow memory");
2497 		return NULL;
2498 	}
2499 	/* Copy configuration. */
2500 	flow->queues = (uint16_t (*)[])(flow + 1);
2501 	flow->tunnel = parser.tunnel;
2502 	flow->rss_conf = (struct rte_flow_action_rss){
2503 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2504 		.level = 0,
2505 		.types = parser.rss_conf.types,
2506 		.key_len = parser.rss_conf.key_len,
2507 		.queue_num = parser.rss_conf.queue_num,
2508 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
2509 			      sizeof(*parser.rss_conf.key) *
2510 			      parser.rss_conf.key_len),
2511 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
2512 				sizeof(*parser.rss_conf.queue) *
2513 				parser.rss_conf.queue_num),
2514 	};
2515 	flow->mark = parser.mark;
2516 	/* finalise the flow. */
2517 	if (parser.drop)
2518 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2519 							 error);
2520 	else
2521 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2522 	if (ret)
2523 		goto exit;
2524 	TAILQ_INSERT_TAIL(list, flow, next);
2525 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2526 		(void *)flow);
2527 	return flow;
2528 exit:
2529 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2530 		error->message);
2531 	for (i = 0; i != hash_rxq_init_n; ++i) {
2532 		if (parser.queue[i].ibv_attr)
2533 			rte_free(parser.queue[i].ibv_attr);
2534 	}
2535 	rte_free(flow);
2536 	return NULL;
2537 }
2538 
2539 /**
2540  * Validate a flow supported by the NIC.
2541  *
2542  * @see rte_flow_validate()
2543  * @see rte_flow_ops
2544  */
2545 int
2546 mlx5_flow_validate(struct rte_eth_dev *dev,
2547 		   const struct rte_flow_attr *attr,
2548 		   const struct rte_flow_item items[],
2549 		   const struct rte_flow_action actions[],
2550 		   struct rte_flow_error *error)
2551 {
2552 	struct mlx5_flow_parse parser = { .create = 0, };
2553 
2554 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2555 }
2556 
2557 /**
2558  * Create a flow.
2559  *
2560  * @see rte_flow_create()
2561  * @see rte_flow_ops
2562  */
2563 struct rte_flow *
2564 mlx5_flow_create(struct rte_eth_dev *dev,
2565 		 const struct rte_flow_attr *attr,
2566 		 const struct rte_flow_item items[],
2567 		 const struct rte_flow_action actions[],
2568 		 struct rte_flow_error *error)
2569 {
2570 	struct priv *priv = dev->data->dev_private;
2571 
2572 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2573 				     error);
2574 }
2575 
2576 /**
2577  * Destroy a flow in a list.
2578  *
2579  * @param dev
2580  *   Pointer to Ethernet device.
2581  * @param list
2582  *   Pointer to a TAILQ flow list.
2583  * @param[in] flow
2584  *   Flow to destroy.
2585  */
2586 static void
2587 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2588 		       struct rte_flow *flow)
2589 {
2590 	struct priv *priv = dev->data->dev_private;
2591 	unsigned int i;
2592 
2593 	if (flow->drop || !dev->data->dev_started)
2594 		goto free;
2595 	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2596 		/* Update queue tunnel type. */
2597 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2598 						 [(*flow->queues)[i]];
2599 		struct mlx5_rxq_ctrl *rxq_ctrl =
2600 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2601 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2602 
2603 		assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2604 		rxq_ctrl->tunnel_types[tunnel] -= 1;
2605 		if (!rxq_ctrl->tunnel_types[tunnel]) {
2606 			/* Update tunnel type. */
2607 			uint8_t j;
2608 			uint8_t types = 0;
2609 			uint8_t last;
2610 
2611 			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2612 				if (rxq_ctrl->tunnel_types[j]) {
2613 					types += 1;
2614 					last = j;
2615 				}
2616 			/* Keep same if more than one tunnel types left. */
2617 			if (types == 1)
2618 				rxq_data->tunnel = ptype_ext[last];
2619 			else if (types == 0)
2620 				/* No tunnel type left. */
2621 				rxq_data->tunnel = 0;
2622 		}
2623 	}
2624 	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2625 		struct rte_flow *tmp;
2626 		int mark = 0;
2627 
2628 		/*
2629 		 * To remove the mark from the queue, the queue must not be
2630 		 * present in any other marked flow (RSS or not).
2631 		 */
2632 		TAILQ_FOREACH(tmp, list, next) {
2633 			unsigned int j;
2634 			uint16_t *tqs = NULL;
2635 			uint16_t tq_n = 0;
2636 
2637 			if (!tmp->mark)
2638 				continue;
2639 			for (j = 0; j != hash_rxq_init_n; ++j) {
2640 				if (!tmp->frxq[j].hrxq)
2641 					continue;
2642 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2643 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2644 			}
2645 			if (!tq_n)
2646 				continue;
2647 			for (j = 0; (j != tq_n) && !mark; j++)
2648 				if (tqs[j] == (*flow->queues)[i])
2649 					mark = 1;
2650 		}
2651 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2652 	}
2653 free:
2654 	if (flow->drop) {
2655 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2656 			claim_zero(mlx5_glue->destroy_flow
2657 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2658 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2659 	} else {
2660 		for (i = 0; i != hash_rxq_init_n; ++i) {
2661 			struct mlx5_flow *frxq = &flow->frxq[i];
2662 
2663 			if (frxq->ibv_flow)
2664 				claim_zero(mlx5_glue->destroy_flow
2665 					   (frxq->ibv_flow));
2666 			if (frxq->hrxq)
2667 				mlx5_hrxq_release(dev, frxq->hrxq);
2668 			if (frxq->ibv_attr)
2669 				rte_free(frxq->ibv_attr);
2670 		}
2671 	}
2672 	if (flow->cs) {
2673 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2674 		flow->cs = NULL;
2675 	}
2676 	TAILQ_REMOVE(list, flow, next);
2677 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2678 		(void *)flow);
2679 	rte_free(flow);
2680 }
2681 
2682 /**
2683  * Destroy all flows.
2684  *
2685  * @param dev
2686  *   Pointer to Ethernet device.
2687  * @param list
2688  *   Pointer to a TAILQ flow list.
2689  */
2690 void
2691 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2692 {
2693 	while (!TAILQ_EMPTY(list)) {
2694 		struct rte_flow *flow;
2695 
2696 		flow = TAILQ_FIRST(list);
2697 		mlx5_flow_list_destroy(dev, list, flow);
2698 	}
2699 }
2700 
2701 /**
2702  * Create drop queue.
2703  *
2704  * @param dev
2705  *   Pointer to Ethernet device.
2706  *
2707  * @return
2708  *   0 on success, a negative errno value otherwise and rte_errno is set.
2709  */
2710 int
2711 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2712 {
2713 	struct priv *priv = dev->data->dev_private;
2714 	struct mlx5_hrxq_drop *fdq = NULL;
2715 
2716 	assert(priv->pd);
2717 	assert(priv->ctx);
2718 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2719 	if (!fdq) {
2720 		DRV_LOG(WARNING,
2721 			"port %u cannot allocate memory for drop queue",
2722 			dev->data->port_id);
2723 		rte_errno = ENOMEM;
2724 		return -rte_errno;
2725 	}
2726 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2727 	if (!fdq->cq) {
2728 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2729 			dev->data->port_id);
2730 		rte_errno = errno;
2731 		goto error;
2732 	}
2733 	fdq->wq = mlx5_glue->create_wq
2734 		(priv->ctx,
2735 		 &(struct ibv_wq_init_attr){
2736 			.wq_type = IBV_WQT_RQ,
2737 			.max_wr = 1,
2738 			.max_sge = 1,
2739 			.pd = priv->pd,
2740 			.cq = fdq->cq,
2741 		 });
2742 	if (!fdq->wq) {
2743 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2744 			dev->data->port_id);
2745 		rte_errno = errno;
2746 		goto error;
2747 	}
2748 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2749 		(priv->ctx,
2750 		 &(struct ibv_rwq_ind_table_init_attr){
2751 			.log_ind_tbl_size = 0,
2752 			.ind_tbl = &fdq->wq,
2753 			.comp_mask = 0,
2754 		 });
2755 	if (!fdq->ind_table) {
2756 		DRV_LOG(WARNING,
2757 			"port %u cannot allocate indirection table for drop"
2758 			" queue",
2759 			dev->data->port_id);
2760 		rte_errno = errno;
2761 		goto error;
2762 	}
2763 	fdq->qp = mlx5_glue->create_qp_ex
2764 		(priv->ctx,
2765 		 &(struct ibv_qp_init_attr_ex){
2766 			.qp_type = IBV_QPT_RAW_PACKET,
2767 			.comp_mask =
2768 				IBV_QP_INIT_ATTR_PD |
2769 				IBV_QP_INIT_ATTR_IND_TABLE |
2770 				IBV_QP_INIT_ATTR_RX_HASH,
2771 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2772 				.rx_hash_function =
2773 					IBV_RX_HASH_FUNC_TOEPLITZ,
2774 				.rx_hash_key_len = rss_hash_default_key_len,
2775 				.rx_hash_key = rss_hash_default_key,
2776 				.rx_hash_fields_mask = 0,
2777 				},
2778 			.rwq_ind_tbl = fdq->ind_table,
2779 			.pd = priv->pd
2780 		 });
2781 	if (!fdq->qp) {
2782 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2783 			dev->data->port_id);
2784 		rte_errno = errno;
2785 		goto error;
2786 	}
2787 	priv->flow_drop_queue = fdq;
2788 	return 0;
2789 error:
2790 	if (fdq->qp)
2791 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2792 	if (fdq->ind_table)
2793 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2794 	if (fdq->wq)
2795 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2796 	if (fdq->cq)
2797 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2798 	if (fdq)
2799 		rte_free(fdq);
2800 	priv->flow_drop_queue = NULL;
2801 	return -rte_errno;
2802 }
2803 
2804 /**
2805  * Delete drop queue.
2806  *
2807  * @param dev
2808  *   Pointer to Ethernet device.
2809  */
2810 void
2811 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2812 {
2813 	struct priv *priv = dev->data->dev_private;
2814 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2815 
2816 	if (!fdq)
2817 		return;
2818 	if (fdq->qp)
2819 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2820 	if (fdq->ind_table)
2821 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2822 	if (fdq->wq)
2823 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2824 	if (fdq->cq)
2825 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2826 	rte_free(fdq);
2827 	priv->flow_drop_queue = NULL;
2828 }
2829 
2830 /**
2831  * Remove all flows.
2832  *
2833  * @param dev
2834  *   Pointer to Ethernet device.
2835  * @param list
2836  *   Pointer to a TAILQ flow list.
2837  */
2838 void
2839 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2840 {
2841 	struct priv *priv = dev->data->dev_private;
2842 	struct rte_flow *flow;
2843 	unsigned int i;
2844 
2845 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2846 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2847 
2848 		if (flow->drop) {
2849 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2850 				continue;
2851 			claim_zero(mlx5_glue->destroy_flow
2852 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2853 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2854 			DRV_LOG(DEBUG, "port %u flow %p removed",
2855 				dev->data->port_id, (void *)flow);
2856 			/* Next flow. */
2857 			continue;
2858 		}
2859 		/* Verify the flow has not already been cleaned. */
2860 		for (i = 0; i != hash_rxq_init_n; ++i) {
2861 			if (!flow->frxq[i].ibv_flow)
2862 				continue;
2863 			/*
2864 			 * Indirection table may be necessary to remove the
2865 			 * flags in the Rx queues.
2866 			 * This helps to speed-up the process by avoiding
2867 			 * another loop.
2868 			 */
2869 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2870 			break;
2871 		}
2872 		if (i == hash_rxq_init_n)
2873 			return;
2874 		if (flow->mark) {
2875 			assert(ind_tbl);
2876 			for (i = 0; i != ind_tbl->queues_n; ++i)
2877 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2878 		}
2879 		for (i = 0; i != hash_rxq_init_n; ++i) {
2880 			if (!flow->frxq[i].ibv_flow)
2881 				continue;
2882 			claim_zero(mlx5_glue->destroy_flow
2883 				   (flow->frxq[i].ibv_flow));
2884 			flow->frxq[i].ibv_flow = NULL;
2885 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2886 			flow->frxq[i].hrxq = NULL;
2887 		}
2888 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2889 			(void *)flow);
2890 	}
2891 	/* Cleanup Rx queue tunnel info. */
2892 	for (i = 0; i != priv->rxqs_n; ++i) {
2893 		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2894 		struct mlx5_rxq_ctrl *rxq_ctrl =
2895 			container_of(q, struct mlx5_rxq_ctrl, rxq);
2896 
2897 		if (!q)
2898 			continue;
2899 		memset((void *)rxq_ctrl->tunnel_types, 0,
2900 		       sizeof(rxq_ctrl->tunnel_types));
2901 		q->tunnel = 0;
2902 	}
2903 }
2904 
2905 /**
2906  * Add all flows.
2907  *
2908  * @param dev
2909  *   Pointer to Ethernet device.
2910  * @param list
2911  *   Pointer to a TAILQ flow list.
2912  *
2913  * @return
2914  *   0 on success, a negative errno value otherwise and rte_errno is set.
2915  */
2916 int
2917 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2918 {
2919 	struct priv *priv = dev->data->dev_private;
2920 	struct rte_flow *flow;
2921 
2922 	TAILQ_FOREACH(flow, list, next) {
2923 		unsigned int i;
2924 
2925 		if (flow->drop) {
2926 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2927 				mlx5_glue->create_flow
2928 				(priv->flow_drop_queue->qp,
2929 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2930 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2931 				DRV_LOG(DEBUG,
2932 					"port %u flow %p cannot be applied",
2933 					dev->data->port_id, (void *)flow);
2934 				rte_errno = EINVAL;
2935 				return -rte_errno;
2936 			}
2937 			DRV_LOG(DEBUG, "port %u flow %p applied",
2938 				dev->data->port_id, (void *)flow);
2939 			/* Next flow. */
2940 			continue;
2941 		}
2942 		for (i = 0; i != hash_rxq_init_n; ++i) {
2943 			if (!flow->frxq[i].ibv_attr)
2944 				continue;
2945 			flow->frxq[i].hrxq =
2946 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2947 					      flow->rss_conf.key_len,
2948 					      flow->frxq[i].hash_fields,
2949 					      flow->rss_conf.queue,
2950 					      flow->rss_conf.queue_num,
2951 					      flow->tunnel,
2952 					      flow->rss_conf.level);
2953 			if (flow->frxq[i].hrxq)
2954 				goto flow_create;
2955 			flow->frxq[i].hrxq =
2956 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2957 					      flow->rss_conf.key_len,
2958 					      flow->frxq[i].hash_fields,
2959 					      flow->rss_conf.queue,
2960 					      flow->rss_conf.queue_num,
2961 					      flow->tunnel,
2962 					      flow->rss_conf.level);
2963 			if (!flow->frxq[i].hrxq) {
2964 				DRV_LOG(DEBUG,
2965 					"port %u flow %p cannot create hash"
2966 					" rxq",
2967 					dev->data->port_id, (void *)flow);
2968 				rte_errno = EINVAL;
2969 				return -rte_errno;
2970 			}
2971 flow_create:
2972 			mlx5_flow_dump(dev, flow, i);
2973 			flow->frxq[i].ibv_flow =
2974 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2975 						       flow->frxq[i].ibv_attr);
2976 			if (!flow->frxq[i].ibv_flow) {
2977 				DRV_LOG(DEBUG,
2978 					"port %u flow %p type %u cannot be"
2979 					" applied",
2980 					dev->data->port_id, (void *)flow, i);
2981 				rte_errno = EINVAL;
2982 				return -rte_errno;
2983 			}
2984 		}
2985 		mlx5_flow_create_update_rxqs(dev, flow);
2986 	}
2987 	return 0;
2988 }
2989 
2990 /**
2991  * Verify the flow list is empty
2992  *
2993  * @param dev
2994  *  Pointer to Ethernet device.
2995  *
2996  * @return the number of flows not released.
2997  */
2998 int
2999 mlx5_flow_verify(struct rte_eth_dev *dev)
3000 {
3001 	struct priv *priv = dev->data->dev_private;
3002 	struct rte_flow *flow;
3003 	int ret = 0;
3004 
3005 	TAILQ_FOREACH(flow, &priv->flows, next) {
3006 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
3007 			dev->data->port_id, (void *)flow);
3008 		++ret;
3009 	}
3010 	return ret;
3011 }
3012 
3013 /**
3014  * Enable a control flow configured from the control plane.
3015  *
3016  * @param dev
3017  *   Pointer to Ethernet device.
3018  * @param eth_spec
3019  *   An Ethernet flow spec to apply.
3020  * @param eth_mask
3021  *   An Ethernet flow mask to apply.
3022  * @param vlan_spec
3023  *   A VLAN flow spec to apply.
3024  * @param vlan_mask
3025  *   A VLAN flow mask to apply.
3026  *
3027  * @return
3028  *   0 on success, a negative errno value otherwise and rte_errno is set.
3029  */
3030 int
3031 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3032 		    struct rte_flow_item_eth *eth_spec,
3033 		    struct rte_flow_item_eth *eth_mask,
3034 		    struct rte_flow_item_vlan *vlan_spec,
3035 		    struct rte_flow_item_vlan *vlan_mask)
3036 {
3037 	struct priv *priv = dev->data->dev_private;
3038 	const struct rte_flow_attr attr = {
3039 		.ingress = 1,
3040 		.priority = MLX5_CTRL_FLOW_PRIORITY,
3041 	};
3042 	struct rte_flow_item items[] = {
3043 		{
3044 			.type = RTE_FLOW_ITEM_TYPE_ETH,
3045 			.spec = eth_spec,
3046 			.last = NULL,
3047 			.mask = eth_mask,
3048 		},
3049 		{
3050 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3051 				RTE_FLOW_ITEM_TYPE_END,
3052 			.spec = vlan_spec,
3053 			.last = NULL,
3054 			.mask = vlan_mask,
3055 		},
3056 		{
3057 			.type = RTE_FLOW_ITEM_TYPE_END,
3058 		},
3059 	};
3060 	uint16_t queue[priv->reta_idx_n];
3061 	struct rte_flow_action_rss action_rss = {
3062 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3063 		.level = 0,
3064 		.types = priv->rss_conf.rss_hf,
3065 		.key_len = priv->rss_conf.rss_key_len,
3066 		.queue_num = priv->reta_idx_n,
3067 		.key = priv->rss_conf.rss_key,
3068 		.queue = queue,
3069 	};
3070 	struct rte_flow_action actions[] = {
3071 		{
3072 			.type = RTE_FLOW_ACTION_TYPE_RSS,
3073 			.conf = &action_rss,
3074 		},
3075 		{
3076 			.type = RTE_FLOW_ACTION_TYPE_END,
3077 		},
3078 	};
3079 	struct rte_flow *flow;
3080 	struct rte_flow_error error;
3081 	unsigned int i;
3082 
3083 	if (!priv->reta_idx_n) {
3084 		rte_errno = EINVAL;
3085 		return -rte_errno;
3086 	}
3087 	for (i = 0; i != priv->reta_idx_n; ++i)
3088 		queue[i] = (*priv->reta_idx)[i];
3089 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3090 				     actions, &error);
3091 	if (!flow)
3092 		return -rte_errno;
3093 	return 0;
3094 }
3095 
3096 /**
3097  * Enable a flow control configured from the control plane.
3098  *
3099  * @param dev
3100  *   Pointer to Ethernet device.
3101  * @param eth_spec
3102  *   An Ethernet flow spec to apply.
3103  * @param eth_mask
3104  *   An Ethernet flow mask to apply.
3105  *
3106  * @return
3107  *   0 on success, a negative errno value otherwise and rte_errno is set.
3108  */
3109 int
3110 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3111 	       struct rte_flow_item_eth *eth_spec,
3112 	       struct rte_flow_item_eth *eth_mask)
3113 {
3114 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3115 }
3116 
3117 /**
3118  * Destroy a flow.
3119  *
3120  * @see rte_flow_destroy()
3121  * @see rte_flow_ops
3122  */
3123 int
3124 mlx5_flow_destroy(struct rte_eth_dev *dev,
3125 		  struct rte_flow *flow,
3126 		  struct rte_flow_error *error __rte_unused)
3127 {
3128 	struct priv *priv = dev->data->dev_private;
3129 
3130 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
3131 	return 0;
3132 }
3133 
3134 /**
3135  * Destroy all flows.
3136  *
3137  * @see rte_flow_flush()
3138  * @see rte_flow_ops
3139  */
3140 int
3141 mlx5_flow_flush(struct rte_eth_dev *dev,
3142 		struct rte_flow_error *error __rte_unused)
3143 {
3144 	struct priv *priv = dev->data->dev_private;
3145 
3146 	mlx5_flow_list_flush(dev, &priv->flows);
3147 	return 0;
3148 }
3149 
3150 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3151 /**
3152  * Query flow counter.
3153  *
3154  * @param cs
3155  *   the counter set.
3156  * @param counter_value
3157  *   returned data from the counter.
3158  *
3159  * @return
3160  *   0 on success, a negative errno value otherwise and rte_errno is set.
3161  */
3162 static int
3163 mlx5_flow_query_count(struct ibv_counter_set *cs,
3164 		      struct mlx5_flow_counter_stats *counter_stats,
3165 		      struct rte_flow_query_count *query_count,
3166 		      struct rte_flow_error *error)
3167 {
3168 	uint64_t counters[2];
3169 	struct ibv_query_counter_set_attr query_cs_attr = {
3170 		.cs = cs,
3171 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3172 	};
3173 	struct ibv_counter_set_data query_out = {
3174 		.out = counters,
3175 		.outlen = 2 * sizeof(uint64_t),
3176 	};
3177 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3178 
3179 	if (err)
3180 		return rte_flow_error_set(error, err,
3181 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3182 					  NULL,
3183 					  "cannot read counter");
3184 	query_count->hits_set = 1;
3185 	query_count->bytes_set = 1;
3186 	query_count->hits = counters[0] - counter_stats->hits;
3187 	query_count->bytes = counters[1] - counter_stats->bytes;
3188 	if (query_count->reset) {
3189 		counter_stats->hits = counters[0];
3190 		counter_stats->bytes = counters[1];
3191 	}
3192 	return 0;
3193 }
3194 
3195 /**
3196  * Query a flows.
3197  *
3198  * @see rte_flow_query()
3199  * @see rte_flow_ops
3200  */
3201 int
3202 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3203 		struct rte_flow *flow,
3204 		const struct rte_flow_action *action __rte_unused,
3205 		void *data,
3206 		struct rte_flow_error *error)
3207 {
3208 	if (flow->cs) {
3209 		int ret;
3210 
3211 		ret = mlx5_flow_query_count(flow->cs,
3212 					    &flow->counter_stats,
3213 					    (struct rte_flow_query_count *)data,
3214 					    error);
3215 		if (ret)
3216 			return ret;
3217 	} else {
3218 		return rte_flow_error_set(error, EINVAL,
3219 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3220 					  NULL,
3221 					  "no counter found for flow");
3222 	}
3223 	return 0;
3224 }
3225 #endif
3226 
3227 /**
3228  * Isolated mode.
3229  *
3230  * @see rte_flow_isolate()
3231  * @see rte_flow_ops
3232  */
3233 int
3234 mlx5_flow_isolate(struct rte_eth_dev *dev,
3235 		  int enable,
3236 		  struct rte_flow_error *error)
3237 {
3238 	struct priv *priv = dev->data->dev_private;
3239 
3240 	if (dev->data->dev_started) {
3241 		rte_flow_error_set(error, EBUSY,
3242 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3243 				   NULL,
3244 				   "port must be stopped first");
3245 		return -rte_errno;
3246 	}
3247 	priv->isolated = !!enable;
3248 	if (enable)
3249 		dev->dev_ops = &mlx5_dev_ops_isolate;
3250 	else
3251 		dev->dev_ops = &mlx5_dev_ops;
3252 	return 0;
3253 }
3254 
3255 /**
3256  * Convert a flow director filter to a generic flow.
3257  *
3258  * @param dev
3259  *   Pointer to Ethernet device.
3260  * @param fdir_filter
3261  *   Flow director filter to add.
3262  * @param attributes
3263  *   Generic flow parameters structure.
3264  *
3265  * @return
3266  *   0 on success, a negative errno value otherwise and rte_errno is set.
3267  */
3268 static int
3269 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3270 			 const struct rte_eth_fdir_filter *fdir_filter,
3271 			 struct mlx5_fdir *attributes)
3272 {
3273 	struct priv *priv = dev->data->dev_private;
3274 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
3275 	const struct rte_eth_fdir_masks *mask =
3276 		&dev->data->dev_conf.fdir_conf.mask;
3277 
3278 	/* Validate queue number. */
3279 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3280 		DRV_LOG(ERR, "port %u invalid queue number %d",
3281 			dev->data->port_id, fdir_filter->action.rx_queue);
3282 		rte_errno = EINVAL;
3283 		return -rte_errno;
3284 	}
3285 	attributes->attr.ingress = 1;
3286 	attributes->items[0] = (struct rte_flow_item) {
3287 		.type = RTE_FLOW_ITEM_TYPE_ETH,
3288 		.spec = &attributes->l2,
3289 		.mask = &attributes->l2_mask,
3290 	};
3291 	switch (fdir_filter->action.behavior) {
3292 	case RTE_ETH_FDIR_ACCEPT:
3293 		attributes->actions[0] = (struct rte_flow_action){
3294 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
3295 			.conf = &attributes->queue,
3296 		};
3297 		break;
3298 	case RTE_ETH_FDIR_REJECT:
3299 		attributes->actions[0] = (struct rte_flow_action){
3300 			.type = RTE_FLOW_ACTION_TYPE_DROP,
3301 		};
3302 		break;
3303 	default:
3304 		DRV_LOG(ERR, "port %u invalid behavior %d",
3305 			dev->data->port_id,
3306 			fdir_filter->action.behavior);
3307 		rte_errno = ENOTSUP;
3308 		return -rte_errno;
3309 	}
3310 	attributes->queue.index = fdir_filter->action.rx_queue;
3311 	/* Handle L3. */
3312 	switch (fdir_filter->input.flow_type) {
3313 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3314 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3315 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3316 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3317 			.src_addr = input->flow.ip4_flow.src_ip,
3318 			.dst_addr = input->flow.ip4_flow.dst_ip,
3319 			.time_to_live = input->flow.ip4_flow.ttl,
3320 			.type_of_service = input->flow.ip4_flow.tos,
3321 			.next_proto_id = input->flow.ip4_flow.proto,
3322 		};
3323 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3324 			.src_addr = mask->ipv4_mask.src_ip,
3325 			.dst_addr = mask->ipv4_mask.dst_ip,
3326 			.time_to_live = mask->ipv4_mask.ttl,
3327 			.type_of_service = mask->ipv4_mask.tos,
3328 			.next_proto_id = mask->ipv4_mask.proto,
3329 		};
3330 		attributes->items[1] = (struct rte_flow_item){
3331 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
3332 			.spec = &attributes->l3,
3333 			.mask = &attributes->l3_mask,
3334 		};
3335 		break;
3336 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3337 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3338 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3339 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3340 			.hop_limits = input->flow.ipv6_flow.hop_limits,
3341 			.proto = input->flow.ipv6_flow.proto,
3342 		};
3343 
3344 		memcpy(attributes->l3.ipv6.hdr.src_addr,
3345 		       input->flow.ipv6_flow.src_ip,
3346 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3347 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
3348 		       input->flow.ipv6_flow.dst_ip,
3349 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3350 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3351 		       mask->ipv6_mask.src_ip,
3352 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3353 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3354 		       mask->ipv6_mask.dst_ip,
3355 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3356 		attributes->items[1] = (struct rte_flow_item){
3357 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
3358 			.spec = &attributes->l3,
3359 			.mask = &attributes->l3_mask,
3360 		};
3361 		break;
3362 	default:
3363 		DRV_LOG(ERR, "port %u invalid flow type%d",
3364 			dev->data->port_id, fdir_filter->input.flow_type);
3365 		rte_errno = ENOTSUP;
3366 		return -rte_errno;
3367 	}
3368 	/* Handle L4. */
3369 	switch (fdir_filter->input.flow_type) {
3370 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3371 		attributes->l4.udp.hdr = (struct udp_hdr){
3372 			.src_port = input->flow.udp4_flow.src_port,
3373 			.dst_port = input->flow.udp4_flow.dst_port,
3374 		};
3375 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3376 			.src_port = mask->src_port_mask,
3377 			.dst_port = mask->dst_port_mask,
3378 		};
3379 		attributes->items[2] = (struct rte_flow_item){
3380 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3381 			.spec = &attributes->l4,
3382 			.mask = &attributes->l4_mask,
3383 		};
3384 		break;
3385 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3386 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3387 			.src_port = input->flow.tcp4_flow.src_port,
3388 			.dst_port = input->flow.tcp4_flow.dst_port,
3389 		};
3390 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3391 			.src_port = mask->src_port_mask,
3392 			.dst_port = mask->dst_port_mask,
3393 		};
3394 		attributes->items[2] = (struct rte_flow_item){
3395 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3396 			.spec = &attributes->l4,
3397 			.mask = &attributes->l4_mask,
3398 		};
3399 		break;
3400 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3401 		attributes->l4.udp.hdr = (struct udp_hdr){
3402 			.src_port = input->flow.udp6_flow.src_port,
3403 			.dst_port = input->flow.udp6_flow.dst_port,
3404 		};
3405 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3406 			.src_port = mask->src_port_mask,
3407 			.dst_port = mask->dst_port_mask,
3408 		};
3409 		attributes->items[2] = (struct rte_flow_item){
3410 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3411 			.spec = &attributes->l4,
3412 			.mask = &attributes->l4_mask,
3413 		};
3414 		break;
3415 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3416 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3417 			.src_port = input->flow.tcp6_flow.src_port,
3418 			.dst_port = input->flow.tcp6_flow.dst_port,
3419 		};
3420 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3421 			.src_port = mask->src_port_mask,
3422 			.dst_port = mask->dst_port_mask,
3423 		};
3424 		attributes->items[2] = (struct rte_flow_item){
3425 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3426 			.spec = &attributes->l4,
3427 			.mask = &attributes->l4_mask,
3428 		};
3429 		break;
3430 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3431 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3432 		break;
3433 	default:
3434 		DRV_LOG(ERR, "port %u invalid flow type%d",
3435 			dev->data->port_id, fdir_filter->input.flow_type);
3436 		rte_errno = ENOTSUP;
3437 		return -rte_errno;
3438 	}
3439 	return 0;
3440 }
3441 
3442 /**
3443  * Add new flow director filter and store it in list.
3444  *
3445  * @param dev
3446  *   Pointer to Ethernet device.
3447  * @param fdir_filter
3448  *   Flow director filter to add.
3449  *
3450  * @return
3451  *   0 on success, a negative errno value otherwise and rte_errno is set.
3452  */
3453 static int
3454 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3455 		     const struct rte_eth_fdir_filter *fdir_filter)
3456 {
3457 	struct priv *priv = dev->data->dev_private;
3458 	struct mlx5_fdir attributes = {
3459 		.attr.group = 0,
3460 		.l2_mask = {
3461 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3462 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3463 			.type = 0,
3464 		},
3465 	};
3466 	struct mlx5_flow_parse parser = {
3467 		.layer = HASH_RXQ_ETH,
3468 	};
3469 	struct rte_flow_error error;
3470 	struct rte_flow *flow;
3471 	int ret;
3472 
3473 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3474 	if (ret)
3475 		return ret;
3476 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3477 				attributes.actions, &error, &parser);
3478 	if (ret)
3479 		return ret;
3480 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3481 				     attributes.items, attributes.actions,
3482 				     &error);
3483 	if (flow) {
3484 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3485 			(void *)flow);
3486 		return 0;
3487 	}
3488 	return -rte_errno;
3489 }
3490 
3491 /**
3492  * Delete specific filter.
3493  *
3494  * @param dev
3495  *   Pointer to Ethernet device.
3496  * @param fdir_filter
3497  *   Filter to be deleted.
3498  *
3499  * @return
3500  *   0 on success, a negative errno value otherwise and rte_errno is set.
3501  */
3502 static int
3503 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3504 			const struct rte_eth_fdir_filter *fdir_filter)
3505 {
3506 	struct priv *priv = dev->data->dev_private;
3507 	struct mlx5_fdir attributes = {
3508 		.attr.group = 0,
3509 	};
3510 	struct mlx5_flow_parse parser = {
3511 		.create = 1,
3512 		.layer = HASH_RXQ_ETH,
3513 	};
3514 	struct rte_flow_error error;
3515 	struct rte_flow *flow;
3516 	unsigned int i;
3517 	int ret;
3518 
3519 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3520 	if (ret)
3521 		return ret;
3522 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3523 				attributes.actions, &error, &parser);
3524 	if (ret)
3525 		goto exit;
3526 	/*
3527 	 * Special case for drop action which is only set in the
3528 	 * specifications when the flow is created.  In this situation the
3529 	 * drop specification is missing.
3530 	 */
3531 	if (parser.drop) {
3532 		struct ibv_flow_spec_action_drop *drop;
3533 
3534 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3535 				parser.queue[HASH_RXQ_ETH].offset);
3536 		*drop = (struct ibv_flow_spec_action_drop){
3537 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3538 			.size = sizeof(struct ibv_flow_spec_action_drop),
3539 		};
3540 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3541 	}
3542 	TAILQ_FOREACH(flow, &priv->flows, next) {
3543 		struct ibv_flow_attr *attr;
3544 		struct ibv_spec_header *attr_h;
3545 		void *spec;
3546 		struct ibv_flow_attr *flow_attr;
3547 		struct ibv_spec_header *flow_h;
3548 		void *flow_spec;
3549 		unsigned int specs_n;
3550 		unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
3551 						      parser.layer;
3552 
3553 		attr = parser.queue[queue_id].ibv_attr;
3554 		flow_attr = flow->frxq[queue_id].ibv_attr;
3555 		/* Compare first the attributes. */
3556 		if (!flow_attr ||
3557 		    memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3558 			continue;
3559 		if (attr->num_of_specs == 0)
3560 			continue;
3561 		spec = (void *)((uintptr_t)attr +
3562 				sizeof(struct ibv_flow_attr));
3563 		flow_spec = (void *)((uintptr_t)flow_attr +
3564 				     sizeof(struct ibv_flow_attr));
3565 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3566 		for (i = 0; i != specs_n; ++i) {
3567 			attr_h = spec;
3568 			flow_h = flow_spec;
3569 			if (memcmp(spec, flow_spec,
3570 				   RTE_MIN(attr_h->size, flow_h->size)))
3571 				goto wrong_flow;
3572 			spec = (void *)((uintptr_t)spec + attr_h->size);
3573 			flow_spec = (void *)((uintptr_t)flow_spec +
3574 					     flow_h->size);
3575 		}
3576 		/* At this point, the flow match. */
3577 		break;
3578 wrong_flow:
3579 		/* The flow does not match. */
3580 		continue;
3581 	}
3582 	ret = rte_errno; /* Save rte_errno before cleanup. */
3583 	if (flow)
3584 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
3585 exit:
3586 	for (i = 0; i != hash_rxq_init_n; ++i) {
3587 		if (parser.queue[i].ibv_attr)
3588 			rte_free(parser.queue[i].ibv_attr);
3589 	}
3590 	rte_errno = ret; /* Restore rte_errno. */
3591 	return -rte_errno;
3592 }
3593 
3594 /**
3595  * Update queue for specific filter.
3596  *
3597  * @param dev
3598  *   Pointer to Ethernet device.
3599  * @param fdir_filter
3600  *   Filter to be updated.
3601  *
3602  * @return
3603  *   0 on success, a negative errno value otherwise and rte_errno is set.
3604  */
3605 static int
3606 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3607 			const struct rte_eth_fdir_filter *fdir_filter)
3608 {
3609 	int ret;
3610 
3611 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3612 	if (ret)
3613 		return ret;
3614 	return mlx5_fdir_filter_add(dev, fdir_filter);
3615 }
3616 
3617 /**
3618  * Flush all filters.
3619  *
3620  * @param dev
3621  *   Pointer to Ethernet device.
3622  */
3623 static void
3624 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3625 {
3626 	struct priv *priv = dev->data->dev_private;
3627 
3628 	mlx5_flow_list_flush(dev, &priv->flows);
3629 }
3630 
3631 /**
3632  * Get flow director information.
3633  *
3634  * @param dev
3635  *   Pointer to Ethernet device.
3636  * @param[out] fdir_info
3637  *   Resulting flow director information.
3638  */
3639 static void
3640 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3641 {
3642 	struct rte_eth_fdir_masks *mask =
3643 		&dev->data->dev_conf.fdir_conf.mask;
3644 
3645 	fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3646 	fdir_info->guarant_spc = 0;
3647 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3648 	fdir_info->max_flexpayload = 0;
3649 	fdir_info->flow_types_mask[0] = 0;
3650 	fdir_info->flex_payload_unit = 0;
3651 	fdir_info->max_flex_payload_segment_num = 0;
3652 	fdir_info->flex_payload_limit = 0;
3653 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3654 }
3655 
3656 /**
3657  * Deal with flow director operations.
3658  *
3659  * @param dev
3660  *   Pointer to Ethernet device.
3661  * @param filter_op
3662  *   Operation to perform.
3663  * @param arg
3664  *   Pointer to operation-specific structure.
3665  *
3666  * @return
3667  *   0 on success, a negative errno value otherwise and rte_errno is set.
3668  */
3669 static int
3670 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3671 		    void *arg)
3672 {
3673 	enum rte_fdir_mode fdir_mode =
3674 		dev->data->dev_conf.fdir_conf.mode;
3675 
3676 	if (filter_op == RTE_ETH_FILTER_NOP)
3677 		return 0;
3678 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3679 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3680 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3681 			dev->data->port_id, fdir_mode);
3682 		rte_errno = EINVAL;
3683 		return -rte_errno;
3684 	}
3685 	switch (filter_op) {
3686 	case RTE_ETH_FILTER_ADD:
3687 		return mlx5_fdir_filter_add(dev, arg);
3688 	case RTE_ETH_FILTER_UPDATE:
3689 		return mlx5_fdir_filter_update(dev, arg);
3690 	case RTE_ETH_FILTER_DELETE:
3691 		return mlx5_fdir_filter_delete(dev, arg);
3692 	case RTE_ETH_FILTER_FLUSH:
3693 		mlx5_fdir_filter_flush(dev);
3694 		break;
3695 	case RTE_ETH_FILTER_INFO:
3696 		mlx5_fdir_info_get(dev, arg);
3697 		break;
3698 	default:
3699 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3700 			dev->data->port_id, filter_op);
3701 		rte_errno = EINVAL;
3702 		return -rte_errno;
3703 	}
3704 	return 0;
3705 }
3706 
3707 /**
3708  * Manage filter operations.
3709  *
3710  * @param dev
3711  *   Pointer to Ethernet device structure.
3712  * @param filter_type
3713  *   Filter type.
3714  * @param filter_op
3715  *   Operation to perform.
3716  * @param arg
3717  *   Pointer to operation-specific structure.
3718  *
3719  * @return
3720  *   0 on success, a negative errno value otherwise and rte_errno is set.
3721  */
3722 int
3723 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3724 		     enum rte_filter_type filter_type,
3725 		     enum rte_filter_op filter_op,
3726 		     void *arg)
3727 {
3728 	switch (filter_type) {
3729 	case RTE_ETH_FILTER_GENERIC:
3730 		if (filter_op != RTE_ETH_FILTER_GET) {
3731 			rte_errno = EINVAL;
3732 			return -rte_errno;
3733 		}
3734 		*(const void **)arg = &mlx5_flow_ops;
3735 		return 0;
3736 	case RTE_ETH_FILTER_FDIR:
3737 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3738 	default:
3739 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3740 			dev->data->port_id, filter_type);
3741 		rte_errno = ENOTSUP;
3742 		return -rte_errno;
3743 	}
3744 	return 0;
3745 }
3746 
3747 /**
3748  * Detect number of Verbs flow priorities supported.
3749  *
3750  * @param dev
3751  *   Pointer to Ethernet device.
3752  *
3753  * @return
3754  *   number of supported Verbs flow priority.
3755  */
3756 unsigned int
3757 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3758 {
3759 	struct priv *priv = dev->data->dev_private;
3760 	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3761 	struct {
3762 		struct ibv_flow_attr attr;
3763 		struct ibv_flow_spec_eth eth;
3764 		struct ibv_flow_spec_action_drop drop;
3765 	} flow_attr = {
3766 		.attr = {
3767 			.num_of_specs = 2,
3768 		},
3769 		.eth = {
3770 			.type = IBV_FLOW_SPEC_ETH,
3771 			.size = sizeof(struct ibv_flow_spec_eth),
3772 		},
3773 		.drop = {
3774 			.size = sizeof(struct ibv_flow_spec_action_drop),
3775 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3776 		},
3777 	};
3778 	struct ibv_flow *flow;
3779 
3780 	do {
3781 		flow_attr.attr.priority = verb_priorities - 1;
3782 		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3783 					      &flow_attr.attr);
3784 		if (flow) {
3785 			claim_zero(mlx5_glue->destroy_flow(flow));
3786 			/* Try more priorities. */
3787 			verb_priorities *= 2;
3788 		} else {
3789 			/* Failed, restore last right number. */
3790 			verb_priorities /= 2;
3791 			break;
3792 		}
3793 	} while (1);
3794 	DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3795 		" user flow priorities: %d",
3796 		dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3797 	return verb_priorities;
3798 }
3799