xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 2f82d143fb318042f47a50694baa4507b51b7381)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36 
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41 
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44 	int dummy;
45 };
46 #endif
47 
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51 
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 	struct rte_eth_dev *dev; /** Ethernet device. */
55 	struct mlx5_flow_parse *parser; /** Parser context. */
56 	struct rte_flow_error *error; /** Error context. */
57 };
58 
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 		     const void *default_mask,
62 		     struct mlx5_flow_data *data);
63 
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 		      const void *default_mask,
67 		      struct mlx5_flow_data *data);
68 
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 		      const void *default_mask,
72 		      struct mlx5_flow_data *data);
73 
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 		      const void *default_mask,
77 		      struct mlx5_flow_data *data);
78 
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 		     const void *default_mask,
82 		     struct mlx5_flow_data *data);
83 
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 		     const void *default_mask,
87 		     struct mlx5_flow_data *data);
88 
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 		       const void *default_mask,
92 		       struct mlx5_flow_data *data);
93 
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 			   const void *default_mask,
97 			   struct mlx5_flow_data *data);
98 
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 		     const void *default_mask,
102 		     struct mlx5_flow_data *data);
103 
104 struct mlx5_flow_parse;
105 
106 static void
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
108 		      unsigned int size);
109 
110 static int
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
112 
113 static int
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
115 
116 /* Hash RX queue types. */
117 enum hash_rxq_type {
118 	HASH_RXQ_TCPV4,
119 	HASH_RXQ_UDPV4,
120 	HASH_RXQ_IPV4,
121 	HASH_RXQ_TCPV6,
122 	HASH_RXQ_UDPV6,
123 	HASH_RXQ_IPV6,
124 	HASH_RXQ_ETH,
125 	HASH_RXQ_TUNNEL,
126 };
127 
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130 	uint64_t hash_fields; /* Fields that participate in the hash. */
131 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132 	unsigned int flow_priority; /* Flow priority to use. */
133 	unsigned int ip_version; /* Internet protocol. */
134 };
135 
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
138 	[HASH_RXQ_TCPV4] = {
139 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140 				IBV_RX_HASH_DST_IPV4 |
141 				IBV_RX_HASH_SRC_PORT_TCP |
142 				IBV_RX_HASH_DST_PORT_TCP),
143 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
144 		.flow_priority = 0,
145 		.ip_version = MLX5_IPV4,
146 	},
147 	[HASH_RXQ_UDPV4] = {
148 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149 				IBV_RX_HASH_DST_IPV4 |
150 				IBV_RX_HASH_SRC_PORT_UDP |
151 				IBV_RX_HASH_DST_PORT_UDP),
152 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
153 		.flow_priority = 0,
154 		.ip_version = MLX5_IPV4,
155 	},
156 	[HASH_RXQ_IPV4] = {
157 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158 				IBV_RX_HASH_DST_IPV4),
159 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
160 				ETH_RSS_FRAG_IPV4),
161 		.flow_priority = 1,
162 		.ip_version = MLX5_IPV4,
163 	},
164 	[HASH_RXQ_TCPV6] = {
165 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166 				IBV_RX_HASH_DST_IPV6 |
167 				IBV_RX_HASH_SRC_PORT_TCP |
168 				IBV_RX_HASH_DST_PORT_TCP),
169 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
170 		.flow_priority = 0,
171 		.ip_version = MLX5_IPV6,
172 	},
173 	[HASH_RXQ_UDPV6] = {
174 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175 				IBV_RX_HASH_DST_IPV6 |
176 				IBV_RX_HASH_SRC_PORT_UDP |
177 				IBV_RX_HASH_DST_PORT_UDP),
178 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
179 		.flow_priority = 0,
180 		.ip_version = MLX5_IPV6,
181 	},
182 	[HASH_RXQ_IPV6] = {
183 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184 				IBV_RX_HASH_DST_IPV6),
185 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
186 				ETH_RSS_FRAG_IPV6),
187 		.flow_priority = 1,
188 		.ip_version = MLX5_IPV6,
189 	},
190 	[HASH_RXQ_ETH] = {
191 		.hash_fields = 0,
192 		.dpdk_rss_hf = 0,
193 		.flow_priority = 2,
194 	},
195 };
196 
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
199 
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202 	uint64_t hits; /**< Number of packets matched by the rule. */
203 	uint64_t bytes; /**< Number of bytes matched by the rule. */
204 };
205 
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209 	struct ibv_qp *qp; /**< Verbs queue pair. */
210 	struct ibv_wq *wq; /**< Verbs work queue. */
211 	struct ibv_cq *cq; /**< Verbs completion queue. */
212 };
213 
214 /* Flows structures. */
215 struct mlx5_flow {
216 	uint64_t hash_fields; /**< Fields that participate in the hash. */
217 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
219 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
220 };
221 
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
226 };
227 
228 struct rte_flow {
229 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230 	uint32_t mark:1; /**< Set if the flow is marked. */
231 	uint32_t drop:1; /**< Drop queue. */
232 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233 	uint16_t (*queues)[]; /**< Queues indexes to use. */
234 	uint8_t rss_key[40]; /**< copy of the RSS key. */
235 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239 	/**< Flow with Rx queue. */
240 };
241 
242 /** Static initializer for items. */
243 #define ITEMS(...) \
244 	(const enum rte_flow_item_type []){ \
245 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
246 	}
247 
248 #define IS_TUNNEL(type) ( \
249 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251 	(type) == RTE_FLOW_ITEM_TYPE_GRE)
252 
253 const uint32_t flow_ptype[] = {
254 	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256 	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
257 };
258 
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
260 
261 const uint32_t ptype_ext[] = {
262 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
263 					      RTE_PTYPE_L4_UDP,
264 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]	= RTE_PTYPE_TUNNEL_VXLAN_GPE |
265 						  RTE_PTYPE_L4_UDP,
266 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
267 };
268 
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271 	/** List of possible actions for these items. */
272 	const enum rte_flow_action_type *const actions;
273 	/** Bit-masks corresponding to the possibilities for the item. */
274 	const void *mask;
275 	/**
276 	 * Default bit-masks to use when item->mask is not provided. When
277 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
278 	 * used instead.
279 	 */
280 	const void *default_mask;
281 	/** Bit-masks size in bytes. */
282 	const unsigned int mask_sz;
283 	/**
284 	 * Conversion function from rte_flow to NIC specific flow.
285 	 *
286 	 * @param item
287 	 *   rte_flow item to convert.
288 	 * @param default_mask
289 	 *   Default bit-masks to use when item->mask is not provided.
290 	 * @param data
291 	 *   Internal structure to store the conversion.
292 	 *
293 	 * @return
294 	 *   0 on success, a negative errno value otherwise and rte_errno is
295 	 *   set.
296 	 */
297 	int (*convert)(const struct rte_flow_item *item,
298 		       const void *default_mask,
299 		       struct mlx5_flow_data *data);
300 	/** Size in bytes of the destination structure. */
301 	const unsigned int dst_sz;
302 	/** List of possible following items.  */
303 	const enum rte_flow_item_type *const items;
304 };
305 
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308 	RTE_FLOW_ACTION_TYPE_DROP,
309 	RTE_FLOW_ACTION_TYPE_QUEUE,
310 	RTE_FLOW_ACTION_TYPE_MARK,
311 	RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313 	RTE_FLOW_ACTION_TYPE_COUNT,
314 #endif
315 	RTE_FLOW_ACTION_TYPE_END,
316 };
317 
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320 	[RTE_FLOW_ITEM_TYPE_END] = {
321 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322 			       RTE_FLOW_ITEM_TYPE_VXLAN,
323 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324 			       RTE_FLOW_ITEM_TYPE_GRE),
325 	},
326 	[RTE_FLOW_ITEM_TYPE_ETH] = {
327 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328 			       RTE_FLOW_ITEM_TYPE_IPV4,
329 			       RTE_FLOW_ITEM_TYPE_IPV6),
330 		.actions = valid_actions,
331 		.mask = &(const struct rte_flow_item_eth){
332 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
334 			.type = -1,
335 		},
336 		.default_mask = &rte_flow_item_eth_mask,
337 		.mask_sz = sizeof(struct rte_flow_item_eth),
338 		.convert = mlx5_flow_create_eth,
339 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
340 	},
341 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
342 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343 			       RTE_FLOW_ITEM_TYPE_IPV6),
344 		.actions = valid_actions,
345 		.mask = &(const struct rte_flow_item_vlan){
346 			.tci = -1,
347 			.inner_type = -1,
348 		},
349 		.default_mask = &rte_flow_item_vlan_mask,
350 		.mask_sz = sizeof(struct rte_flow_item_vlan),
351 		.convert = mlx5_flow_create_vlan,
352 		.dst_sz = 0,
353 	},
354 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
355 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356 			       RTE_FLOW_ITEM_TYPE_TCP,
357 			       RTE_FLOW_ITEM_TYPE_GRE),
358 		.actions = valid_actions,
359 		.mask = &(const struct rte_flow_item_ipv4){
360 			.hdr = {
361 				.src_addr = -1,
362 				.dst_addr = -1,
363 				.type_of_service = -1,
364 				.next_proto_id = -1,
365 			},
366 		},
367 		.default_mask = &rte_flow_item_ipv4_mask,
368 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
369 		.convert = mlx5_flow_create_ipv4,
370 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
371 	},
372 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
373 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374 			       RTE_FLOW_ITEM_TYPE_TCP,
375 			       RTE_FLOW_ITEM_TYPE_GRE),
376 		.actions = valid_actions,
377 		.mask = &(const struct rte_flow_item_ipv6){
378 			.hdr = {
379 				.src_addr = {
380 					0xff, 0xff, 0xff, 0xff,
381 					0xff, 0xff, 0xff, 0xff,
382 					0xff, 0xff, 0xff, 0xff,
383 					0xff, 0xff, 0xff, 0xff,
384 				},
385 				.dst_addr = {
386 					0xff, 0xff, 0xff, 0xff,
387 					0xff, 0xff, 0xff, 0xff,
388 					0xff, 0xff, 0xff, 0xff,
389 					0xff, 0xff, 0xff, 0xff,
390 				},
391 				.vtc_flow = -1,
392 				.proto = -1,
393 				.hop_limits = -1,
394 			},
395 		},
396 		.default_mask = &rte_flow_item_ipv6_mask,
397 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
398 		.convert = mlx5_flow_create_ipv6,
399 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
400 	},
401 	[RTE_FLOW_ITEM_TYPE_UDP] = {
402 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404 		.actions = valid_actions,
405 		.mask = &(const struct rte_flow_item_udp){
406 			.hdr = {
407 				.src_port = -1,
408 				.dst_port = -1,
409 			},
410 		},
411 		.default_mask = &rte_flow_item_udp_mask,
412 		.mask_sz = sizeof(struct rte_flow_item_udp),
413 		.convert = mlx5_flow_create_udp,
414 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415 	},
416 	[RTE_FLOW_ITEM_TYPE_TCP] = {
417 		.actions = valid_actions,
418 		.mask = &(const struct rte_flow_item_tcp){
419 			.hdr = {
420 				.src_port = -1,
421 				.dst_port = -1,
422 			},
423 		},
424 		.default_mask = &rte_flow_item_tcp_mask,
425 		.mask_sz = sizeof(struct rte_flow_item_tcp),
426 		.convert = mlx5_flow_create_tcp,
427 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428 	},
429 	[RTE_FLOW_ITEM_TYPE_GRE] = {
430 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431 			       RTE_FLOW_ITEM_TYPE_IPV4,
432 			       RTE_FLOW_ITEM_TYPE_IPV6),
433 		.actions = valid_actions,
434 		.mask = &(const struct rte_flow_item_gre){
435 			.protocol = -1,
436 		},
437 		.default_mask = &rte_flow_item_gre_mask,
438 		.mask_sz = sizeof(struct rte_flow_item_gre),
439 		.convert = mlx5_flow_create_gre,
440 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
441 	},
442 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
443 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444 			       RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445 			       RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446 		.actions = valid_actions,
447 		.mask = &(const struct rte_flow_item_vxlan){
448 			.vni = "\xff\xff\xff",
449 		},
450 		.default_mask = &rte_flow_item_vxlan_mask,
451 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
452 		.convert = mlx5_flow_create_vxlan,
453 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
454 	},
455 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457 			       RTE_FLOW_ITEM_TYPE_IPV4,
458 			       RTE_FLOW_ITEM_TYPE_IPV6),
459 		.actions = valid_actions,
460 		.mask = &(const struct rte_flow_item_vxlan_gpe){
461 			.vni = "\xff\xff\xff",
462 		},
463 		.default_mask = &rte_flow_item_vxlan_gpe_mask,
464 		.mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465 		.convert = mlx5_flow_create_vxlan_gpe,
466 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
467 	},
468 };
469 
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472 	uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
473 	uint32_t create:1;
474 	/**< Whether resources should remain after a validate. */
475 	uint32_t drop:1; /**< Target is a drop queue. */
476 	uint32_t mark:1; /**< Mark is present in the flow. */
477 	uint32_t count:1; /**< Count is present in the flow. */
478 	uint32_t mark_id; /**< Mark identifier. */
479 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481 	uint8_t rss_key[40]; /**< copy of the RSS key. */
482 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
483 	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
486 	struct {
487 		struct ibv_flow_attr *ibv_attr;
488 		/**< Pointer to Verbs attributes. */
489 		unsigned int offset;
490 		/**< Current position or total size of the attribute. */
491 		uint64_t hash_fields; /**< Verbs hash fields. */
492 	} queue[RTE_DIM(hash_rxq_init)];
493 };
494 
495 static const struct rte_flow_ops mlx5_flow_ops = {
496 	.validate = mlx5_flow_validate,
497 	.create = mlx5_flow_create,
498 	.destroy = mlx5_flow_destroy,
499 	.flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501 	.query = mlx5_flow_query,
502 #else
503 	.query = NULL,
504 #endif
505 	.isolate = mlx5_flow_isolate,
506 };
507 
508 /* Convert FDIR request to Generic flow. */
509 struct mlx5_fdir {
510 	struct rte_flow_attr attr;
511 	struct rte_flow_action actions[2];
512 	struct rte_flow_item items[4];
513 	struct rte_flow_item_eth l2;
514 	struct rte_flow_item_eth l2_mask;
515 	union {
516 		struct rte_flow_item_ipv4 ipv4;
517 		struct rte_flow_item_ipv6 ipv6;
518 	} l3;
519 	union {
520 		struct rte_flow_item_ipv4 ipv4;
521 		struct rte_flow_item_ipv6 ipv6;
522 	} l3_mask;
523 	union {
524 		struct rte_flow_item_udp udp;
525 		struct rte_flow_item_tcp tcp;
526 	} l4;
527 	union {
528 		struct rte_flow_item_udp udp;
529 		struct rte_flow_item_tcp tcp;
530 	} l4_mask;
531 	struct rte_flow_action_queue queue;
532 };
533 
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536 	enum ibv_flow_spec_type type;
537 	uint16_t size;
538 };
539 
540 /**
541  * Check support for a given item.
542  *
543  * @param item[in]
544  *   Item specification.
545  * @param mask[in]
546  *   Bit-masks covering supported fields to compare with spec, last and mask in
547  *   \item.
548  * @param size
549  *   Bit-Mask size in bytes.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556 			const uint8_t *mask, unsigned int size)
557 {
558 	if (!item->spec && (item->mask || item->last)) {
559 		rte_errno = EINVAL;
560 		return -rte_errno;
561 	}
562 	if (item->spec && !item->mask) {
563 		unsigned int i;
564 		const uint8_t *spec = item->spec;
565 
566 		for (i = 0; i < size; ++i)
567 			if ((spec[i] | mask[i]) != mask[i]) {
568 				rte_errno = EINVAL;
569 				return -rte_errno;
570 			}
571 	}
572 	if (item->last && !item->mask) {
573 		unsigned int i;
574 		const uint8_t *spec = item->last;
575 
576 		for (i = 0; i < size; ++i)
577 			if ((spec[i] | mask[i]) != mask[i]) {
578 				rte_errno = EINVAL;
579 				return -rte_errno;
580 			}
581 	}
582 	if (item->mask) {
583 		unsigned int i;
584 		const uint8_t *spec = item->spec;
585 
586 		for (i = 0; i < size; ++i)
587 			if ((spec[i] | mask[i]) != mask[i]) {
588 				rte_errno = EINVAL;
589 				return -rte_errno;
590 			}
591 	}
592 	if (item->spec && item->last) {
593 		uint8_t spec[size];
594 		uint8_t last[size];
595 		const uint8_t *apply = mask;
596 		unsigned int i;
597 		int ret;
598 
599 		if (item->mask)
600 			apply = item->mask;
601 		for (i = 0; i < size; ++i) {
602 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
603 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
604 		}
605 		ret = memcmp(spec, last, size);
606 		if (ret != 0) {
607 			rte_errno = EINVAL;
608 			return -rte_errno;
609 		}
610 	}
611 	return 0;
612 }
613 
614 /**
615  * Extract attribute to the parser.
616  *
617  * @param[in] attr
618  *   Flow rule attributes.
619  * @param[out] error
620  *   Perform verbose error reporting if not NULL.
621  *
622  * @return
623  *   0 on success, a negative errno value otherwise and rte_errno is set.
624  */
625 static int
626 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
627 			     struct rte_flow_error *error)
628 {
629 	if (attr->group) {
630 		rte_flow_error_set(error, ENOTSUP,
631 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
632 				   NULL,
633 				   "groups are not supported");
634 		return -rte_errno;
635 	}
636 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
637 		rte_flow_error_set(error, ENOTSUP,
638 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
639 				   NULL,
640 				   "priorities are not supported");
641 		return -rte_errno;
642 	}
643 	if (attr->egress) {
644 		rte_flow_error_set(error, ENOTSUP,
645 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
646 				   NULL,
647 				   "egress is not supported");
648 		return -rte_errno;
649 	}
650 	if (attr->transfer) {
651 		rte_flow_error_set(error, ENOTSUP,
652 				   RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
653 				   NULL,
654 				   "transfer is not supported");
655 		return -rte_errno;
656 	}
657 	if (!attr->ingress) {
658 		rte_flow_error_set(error, ENOTSUP,
659 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
660 				   NULL,
661 				   "only ingress is supported");
662 		return -rte_errno;
663 	}
664 	return 0;
665 }
666 
667 /**
668  * Extract actions request to the parser.
669  *
670  * @param dev
671  *   Pointer to Ethernet device.
672  * @param[in] actions
673  *   Associated actions (list terminated by the END action).
674  * @param[out] error
675  *   Perform verbose error reporting if not NULL.
676  * @param[in, out] parser
677  *   Internal parser structure.
678  *
679  * @return
680  *   0 on success, a negative errno value otherwise and rte_errno is set.
681  */
682 static int
683 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
684 			  const struct rte_flow_action actions[],
685 			  struct rte_flow_error *error,
686 			  struct mlx5_flow_parse *parser)
687 {
688 	enum { FATE = 1, MARK = 2, COUNT = 4, };
689 	uint32_t overlap = 0;
690 	struct priv *priv = dev->data->dev_private;
691 
692 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
693 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
694 			continue;
695 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
696 			if (overlap & FATE)
697 				goto exit_action_overlap;
698 			overlap |= FATE;
699 			parser->drop = 1;
700 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
701 			const struct rte_flow_action_queue *queue =
702 				(const struct rte_flow_action_queue *)
703 				actions->conf;
704 
705 			if (overlap & FATE)
706 				goto exit_action_overlap;
707 			overlap |= FATE;
708 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
709 				goto exit_action_not_supported;
710 			parser->queues[0] = queue->index;
711 			parser->rss_conf = (struct rte_flow_action_rss){
712 				.queue_num = 1,
713 				.queue = parser->queues,
714 			};
715 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
716 			const struct rte_flow_action_rss *rss =
717 				(const struct rte_flow_action_rss *)
718 				actions->conf;
719 			const uint8_t *rss_key;
720 			uint32_t rss_key_len;
721 			uint16_t n;
722 
723 			if (overlap & FATE)
724 				goto exit_action_overlap;
725 			overlap |= FATE;
726 			if (rss->func &&
727 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
728 				rte_flow_error_set(error, EINVAL,
729 						   RTE_FLOW_ERROR_TYPE_ACTION,
730 						   actions,
731 						   "the only supported RSS hash"
732 						   " function is Toeplitz");
733 				return -rte_errno;
734 			}
735 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
736 			if (parser->rss_conf.level > 1) {
737 				rte_flow_error_set(error, EINVAL,
738 						   RTE_FLOW_ERROR_TYPE_ACTION,
739 						   actions,
740 						   "a nonzero RSS encapsulation"
741 						   " level is not supported");
742 				return -rte_errno;
743 			}
744 #endif
745 			if (parser->rss_conf.level > 2) {
746 				rte_flow_error_set(error, EINVAL,
747 						   RTE_FLOW_ERROR_TYPE_ACTION,
748 						   actions,
749 						   "RSS encapsulation level"
750 						   " > 1 is not supported");
751 				return -rte_errno;
752 			}
753 			if (rss->types & MLX5_RSS_HF_MASK) {
754 				rte_flow_error_set(error, EINVAL,
755 						   RTE_FLOW_ERROR_TYPE_ACTION,
756 						   actions,
757 						   "unsupported RSS type"
758 						   " requested");
759 				return -rte_errno;
760 			}
761 			if (rss->key_len) {
762 				rss_key_len = rss->key_len;
763 				rss_key = rss->key;
764 			} else {
765 				rss_key_len = rss_hash_default_key_len;
766 				rss_key = rss_hash_default_key;
767 			}
768 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
769 				rte_flow_error_set(error, EINVAL,
770 						   RTE_FLOW_ERROR_TYPE_ACTION,
771 						   actions,
772 						   "RSS hash key must be"
773 						   " exactly 40 bytes long");
774 				return -rte_errno;
775 			}
776 			if (!rss->queue_num) {
777 				rte_flow_error_set(error, EINVAL,
778 						   RTE_FLOW_ERROR_TYPE_ACTION,
779 						   actions,
780 						   "no valid queues");
781 				return -rte_errno;
782 			}
783 			if (rss->queue_num > RTE_DIM(parser->queues)) {
784 				rte_flow_error_set(error, EINVAL,
785 						   RTE_FLOW_ERROR_TYPE_ACTION,
786 						   actions,
787 						   "too many queues for RSS"
788 						   " context");
789 				return -rte_errno;
790 			}
791 			for (n = 0; n < rss->queue_num; ++n) {
792 				if (rss->queue[n] >= priv->rxqs_n) {
793 					rte_flow_error_set(error, EINVAL,
794 						   RTE_FLOW_ERROR_TYPE_ACTION,
795 						   actions,
796 						   "queue id > number of"
797 						   " queues");
798 					return -rte_errno;
799 				}
800 			}
801 			parser->rss_conf = (struct rte_flow_action_rss){
802 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
803 				.level = rss->level,
804 				.types = rss->types,
805 				.key_len = rss_key_len,
806 				.queue_num = rss->queue_num,
807 				.key = memcpy(parser->rss_key, rss_key,
808 					      sizeof(*rss_key) * rss_key_len),
809 				.queue = memcpy(parser->queues, rss->queue,
810 						sizeof(*rss->queue) *
811 						rss->queue_num),
812 			};
813 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
814 			const struct rte_flow_action_mark *mark =
815 				(const struct rte_flow_action_mark *)
816 				actions->conf;
817 
818 			if (overlap & MARK)
819 				goto exit_action_overlap;
820 			overlap |= MARK;
821 			if (!mark) {
822 				rte_flow_error_set(error, EINVAL,
823 						   RTE_FLOW_ERROR_TYPE_ACTION,
824 						   actions,
825 						   "mark must be defined");
826 				return -rte_errno;
827 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
828 				rte_flow_error_set(error, ENOTSUP,
829 						   RTE_FLOW_ERROR_TYPE_ACTION,
830 						   actions,
831 						   "mark must be between 0"
832 						   " and 16777199");
833 				return -rte_errno;
834 			}
835 			parser->mark = 1;
836 			parser->mark_id = mark->id;
837 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
838 			if (overlap & MARK)
839 				goto exit_action_overlap;
840 			overlap |= MARK;
841 			parser->mark = 1;
842 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
843 			   priv->config.flow_counter_en) {
844 			if (overlap & COUNT)
845 				goto exit_action_overlap;
846 			overlap |= COUNT;
847 			parser->count = 1;
848 		} else {
849 			goto exit_action_not_supported;
850 		}
851 	}
852 	/* When fate is unknown, drop traffic. */
853 	if (!(overlap & FATE))
854 		parser->drop = 1;
855 	if (parser->drop && parser->mark)
856 		parser->mark = 0;
857 	if (!parser->rss_conf.queue_num && !parser->drop) {
858 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
859 				   NULL, "no valid action");
860 		return -rte_errno;
861 	}
862 	return 0;
863 exit_action_not_supported:
864 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
865 			   actions, "action not supported");
866 	return -rte_errno;
867 exit_action_overlap:
868 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
869 			   actions, "overlapping actions are not supported");
870 	return -rte_errno;
871 }
872 
873 /**
874  * Validate items.
875  *
876  * @param[in] items
877  *   Pattern specification (list terminated by the END pattern item).
878  * @param[out] error
879  *   Perform verbose error reporting if not NULL.
880  * @param[in, out] parser
881  *   Internal parser structure.
882  *
883  * @return
884  *   0 on success, a negative errno value otherwise and rte_errno is set.
885  */
886 static int
887 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
888 				 const struct rte_flow_item items[],
889 				 struct rte_flow_error *error,
890 				 struct mlx5_flow_parse *parser)
891 {
892 	struct priv *priv = dev->data->dev_private;
893 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
894 	unsigned int i;
895 	int ret = 0;
896 
897 	/* Initialise the offsets to start after verbs attribute. */
898 	for (i = 0; i != hash_rxq_init_n; ++i)
899 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
900 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
901 		const struct mlx5_flow_items *token = NULL;
902 		unsigned int n;
903 
904 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
905 			continue;
906 		for (i = 0;
907 		     cur_item->items &&
908 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
909 		     ++i) {
910 			if (cur_item->items[i] == items->type) {
911 				token = &mlx5_flow_items[items->type];
912 				break;
913 			}
914 		}
915 		if (!token) {
916 			ret = -ENOTSUP;
917 			goto exit_item_not_supported;
918 		}
919 		cur_item = token;
920 		ret = mlx5_flow_item_validate(items,
921 					      (const uint8_t *)cur_item->mask,
922 					      cur_item->mask_sz);
923 		if (ret)
924 			goto exit_item_not_supported;
925 		if (IS_TUNNEL(items->type)) {
926 			if (parser->tunnel) {
927 				rte_flow_error_set(error, ENOTSUP,
928 						   RTE_FLOW_ERROR_TYPE_ITEM,
929 						   items,
930 						   "Cannot recognize multiple"
931 						   " tunnel encapsulations.");
932 				return -rte_errno;
933 			}
934 			if (!priv->config.tunnel_en &&
935 			    parser->rss_conf.level > 1) {
936 				rte_flow_error_set(error, ENOTSUP,
937 					RTE_FLOW_ERROR_TYPE_ITEM,
938 					items,
939 					"RSS on tunnel is not supported");
940 				return -rte_errno;
941 			}
942 			parser->inner = IBV_FLOW_SPEC_INNER;
943 			parser->tunnel = flow_ptype[items->type];
944 		}
945 		if (parser->drop) {
946 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
947 		} else {
948 			for (n = 0; n != hash_rxq_init_n; ++n)
949 				parser->queue[n].offset += cur_item->dst_sz;
950 		}
951 	}
952 	if (parser->drop) {
953 		parser->queue[HASH_RXQ_ETH].offset +=
954 			sizeof(struct ibv_flow_spec_action_drop);
955 	}
956 	if (parser->mark) {
957 		for (i = 0; i != hash_rxq_init_n; ++i)
958 			parser->queue[i].offset +=
959 				sizeof(struct ibv_flow_spec_action_tag);
960 	}
961 	if (parser->count) {
962 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
963 
964 		for (i = 0; i != hash_rxq_init_n; ++i)
965 			parser->queue[i].offset += size;
966 	}
967 	return 0;
968 exit_item_not_supported:
969 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
970 				  items, "item not supported");
971 }
972 
973 /**
974  * Allocate memory space to store verbs flow attributes.
975  *
976  * @param[in] size
977  *   Amount of byte to allocate.
978  * @param[out] error
979  *   Perform verbose error reporting if not NULL.
980  *
981  * @return
982  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
983  */
984 static struct ibv_flow_attr *
985 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
986 {
987 	struct ibv_flow_attr *ibv_attr;
988 
989 	ibv_attr = rte_calloc(__func__, 1, size, 0);
990 	if (!ibv_attr) {
991 		rte_flow_error_set(error, ENOMEM,
992 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
993 				   NULL,
994 				   "cannot allocate verbs spec attributes");
995 		return NULL;
996 	}
997 	return ibv_attr;
998 }
999 
1000 /**
1001  * Make inner packet matching with an higher priority from the non Inner
1002  * matching.
1003  *
1004  * @param dev
1005  *   Pointer to Ethernet device.
1006  * @param[in, out] parser
1007  *   Internal parser structure.
1008  * @param attr
1009  *   User flow attribute.
1010  */
1011 static void
1012 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1013 			  struct mlx5_flow_parse *parser,
1014 			  const struct rte_flow_attr *attr)
1015 {
1016 	struct priv *priv = dev->data->dev_private;
1017 	unsigned int i;
1018 	uint16_t priority;
1019 
1020 	/*			8 priorities	>= 16 priorities
1021 	 * Control flow:	4-7		8-15
1022 	 * User normal flow:	1-3		4-7
1023 	 * User tunnel flow:	0-2		0-3
1024 	 */
1025 	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1026 	if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1027 		priority /= 2;
1028 	/*
1029 	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1030 	 * priorities, lower 4 otherwise.
1031 	 */
1032 	if (!parser->inner) {
1033 		if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1034 			priority += 1;
1035 		else
1036 			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1037 	}
1038 	if (parser->drop) {
1039 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1040 				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1041 		return;
1042 	}
1043 	for (i = 0; i != hash_rxq_init_n; ++i) {
1044 		if (!parser->queue[i].ibv_attr)
1045 			continue;
1046 		parser->queue[i].ibv_attr->priority = priority +
1047 				hash_rxq_init[i].flow_priority;
1048 	}
1049 }
1050 
1051 /**
1052  * Finalise verbs flow attributes.
1053  *
1054  * @param[in, out] parser
1055  *   Internal parser structure.
1056  */
1057 static void
1058 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1059 {
1060 	unsigned int i;
1061 	uint32_t inner = parser->inner;
1062 
1063 	/* Don't create extra flows for outer RSS. */
1064 	if (parser->tunnel && parser->rss_conf.level < 2)
1065 		return;
1066 	/*
1067 	 * Fill missing layers in verbs specifications, or compute the correct
1068 	 * offset to allocate the memory space for the attributes and
1069 	 * specifications.
1070 	 */
1071 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1072 		union {
1073 			struct ibv_flow_spec_ipv4_ext ipv4;
1074 			struct ibv_flow_spec_ipv6 ipv6;
1075 			struct ibv_flow_spec_tcp_udp udp_tcp;
1076 			struct ibv_flow_spec_eth eth;
1077 		} specs;
1078 		void *dst;
1079 		uint16_t size;
1080 
1081 		if (i == parser->layer)
1082 			continue;
1083 		if (parser->layer == HASH_RXQ_ETH ||
1084 		    parser->layer == HASH_RXQ_TUNNEL) {
1085 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1086 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1087 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1088 					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1089 					.size = size,
1090 				};
1091 			} else {
1092 				size = sizeof(struct ibv_flow_spec_ipv6);
1093 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1094 					.type = inner | IBV_FLOW_SPEC_IPV6,
1095 					.size = size,
1096 				};
1097 			}
1098 			if (parser->queue[i].ibv_attr) {
1099 				dst = (void *)((uintptr_t)
1100 					       parser->queue[i].ibv_attr +
1101 					       parser->queue[i].offset);
1102 				memcpy(dst, &specs, size);
1103 				++parser->queue[i].ibv_attr->num_of_specs;
1104 			}
1105 			parser->queue[i].offset += size;
1106 		}
1107 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1108 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1109 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1110 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1111 				.type = inner | ((i == HASH_RXQ_UDPV4 ||
1112 					  i == HASH_RXQ_UDPV6) ?
1113 					 IBV_FLOW_SPEC_UDP :
1114 					 IBV_FLOW_SPEC_TCP),
1115 				.size = size,
1116 			};
1117 			if (parser->queue[i].ibv_attr) {
1118 				dst = (void *)((uintptr_t)
1119 					       parser->queue[i].ibv_attr +
1120 					       parser->queue[i].offset);
1121 				memcpy(dst, &specs, size);
1122 				++parser->queue[i].ibv_attr->num_of_specs;
1123 			}
1124 			parser->queue[i].offset += size;
1125 		}
1126 	}
1127 }
1128 
1129 /**
1130  * Update flows according to pattern and RSS hash fields.
1131  *
1132  * @param[in, out] parser
1133  *   Internal parser structure.
1134  *
1135  * @return
1136  *   0 on success, a negative errno value otherwise and rte_errno is set.
1137  */
1138 static int
1139 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1140 {
1141 	unsigned int i;
1142 	enum hash_rxq_type start;
1143 	enum hash_rxq_type layer;
1144 	int outer = parser->tunnel && parser->rss_conf.level < 2;
1145 	uint64_t rss = parser->rss_conf.types;
1146 
1147 	/* Default to outer RSS. */
1148 	if (!parser->rss_conf.level)
1149 		parser->rss_conf.level = 1;
1150 	layer = outer ? parser->out_layer : parser->layer;
1151 	if (layer == HASH_RXQ_TUNNEL)
1152 		layer = HASH_RXQ_ETH;
1153 	if (outer) {
1154 		/* Only one hash type for outer RSS. */
1155 		if (rss && layer == HASH_RXQ_ETH) {
1156 			start = HASH_RXQ_TCPV4;
1157 		} else if (rss && layer != HASH_RXQ_ETH &&
1158 			   !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1159 			/* If RSS not match L4 pattern, try L3 RSS. */
1160 			if (layer < HASH_RXQ_IPV4)
1161 				layer = HASH_RXQ_IPV4;
1162 			else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1163 				layer = HASH_RXQ_IPV6;
1164 			start = layer;
1165 		} else {
1166 			start = layer;
1167 		}
1168 		/* Scan first valid hash type. */
1169 		for (i = start; rss && i <= layer; ++i) {
1170 			if (!parser->queue[i].ibv_attr)
1171 				continue;
1172 			if (hash_rxq_init[i].dpdk_rss_hf & rss)
1173 				break;
1174 		}
1175 		if (rss && i <= layer)
1176 			parser->queue[layer].hash_fields =
1177 					hash_rxq_init[i].hash_fields;
1178 		/* Trim unused hash types. */
1179 		for (i = 0; i != hash_rxq_init_n; ++i) {
1180 			if (parser->queue[i].ibv_attr && i != layer) {
1181 				rte_free(parser->queue[i].ibv_attr);
1182 				parser->queue[i].ibv_attr = NULL;
1183 			}
1184 		}
1185 	} else {
1186 		/* Expand for inner or normal RSS. */
1187 		if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1188 			start = HASH_RXQ_TCPV4;
1189 		else if (rss && layer == HASH_RXQ_IPV6)
1190 			start = HASH_RXQ_TCPV6;
1191 		else
1192 			start = layer;
1193 		/* For L4 pattern, try L3 RSS if no L4 RSS. */
1194 		/* Trim unused hash types. */
1195 		for (i = 0; i != hash_rxq_init_n; ++i) {
1196 			if (!parser->queue[i].ibv_attr)
1197 				continue;
1198 			if (i < start || i > layer) {
1199 				rte_free(parser->queue[i].ibv_attr);
1200 				parser->queue[i].ibv_attr = NULL;
1201 				continue;
1202 			}
1203 			if (!rss)
1204 				continue;
1205 			if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1206 				parser->queue[i].hash_fields =
1207 						hash_rxq_init[i].hash_fields;
1208 			} else if (i != layer) {
1209 				/* Remove unused RSS expansion. */
1210 				rte_free(parser->queue[i].ibv_attr);
1211 				parser->queue[i].ibv_attr = NULL;
1212 			} else if (layer < HASH_RXQ_IPV4 &&
1213 				   (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1214 				    rss)) {
1215 				/* Allow IPv4 RSS on L4 pattern. */
1216 				parser->queue[i].hash_fields =
1217 					hash_rxq_init[HASH_RXQ_IPV4]
1218 						.hash_fields;
1219 			} else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1220 				   (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1221 				    rss)) {
1222 				/* Allow IPv4 RSS on L4 pattern. */
1223 				parser->queue[i].hash_fields =
1224 					hash_rxq_init[HASH_RXQ_IPV6]
1225 						.hash_fields;
1226 			}
1227 		}
1228 	}
1229 	return 0;
1230 }
1231 
1232 /**
1233  * Validate and convert a flow supported by the NIC.
1234  *
1235  * @param dev
1236  *   Pointer to Ethernet device.
1237  * @param[in] attr
1238  *   Flow rule attributes.
1239  * @param[in] pattern
1240  *   Pattern specification (list terminated by the END pattern item).
1241  * @param[in] actions
1242  *   Associated actions (list terminated by the END action).
1243  * @param[out] error
1244  *   Perform verbose error reporting if not NULL.
1245  * @param[in, out] parser
1246  *   Internal parser structure.
1247  *
1248  * @return
1249  *   0 on success, a negative errno value otherwise and rte_errno is set.
1250  */
1251 static int
1252 mlx5_flow_convert(struct rte_eth_dev *dev,
1253 		  const struct rte_flow_attr *attr,
1254 		  const struct rte_flow_item items[],
1255 		  const struct rte_flow_action actions[],
1256 		  struct rte_flow_error *error,
1257 		  struct mlx5_flow_parse *parser)
1258 {
1259 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1260 	unsigned int i;
1261 	int ret;
1262 
1263 	/* First step. Validate the attributes, items and actions. */
1264 	*parser = (struct mlx5_flow_parse){
1265 		.create = parser->create,
1266 		.layer = HASH_RXQ_ETH,
1267 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1268 	};
1269 	ret = mlx5_flow_convert_attributes(attr, error);
1270 	if (ret)
1271 		return ret;
1272 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1273 	if (ret)
1274 		return ret;
1275 	ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1276 	if (ret)
1277 		return ret;
1278 	mlx5_flow_convert_finalise(parser);
1279 	/*
1280 	 * Second step.
1281 	 * Allocate the memory space to store verbs specifications.
1282 	 */
1283 	if (parser->drop) {
1284 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1285 
1286 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1287 			mlx5_flow_convert_allocate(offset, error);
1288 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1289 			goto exit_enomem;
1290 		parser->queue[HASH_RXQ_ETH].offset =
1291 			sizeof(struct ibv_flow_attr);
1292 	} else {
1293 		for (i = 0; i != hash_rxq_init_n; ++i) {
1294 			unsigned int offset;
1295 
1296 			offset = parser->queue[i].offset;
1297 			parser->queue[i].ibv_attr =
1298 				mlx5_flow_convert_allocate(offset, error);
1299 			if (!parser->queue[i].ibv_attr)
1300 				goto exit_enomem;
1301 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1302 		}
1303 	}
1304 	/* Third step. Conversion parse, fill the specifications. */
1305 	parser->inner = 0;
1306 	parser->tunnel = 0;
1307 	parser->layer = HASH_RXQ_ETH;
1308 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1309 		struct mlx5_flow_data data = {
1310 			.dev = dev,
1311 			.parser = parser,
1312 			.error = error,
1313 		};
1314 
1315 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1316 			continue;
1317 		cur_item = &mlx5_flow_items[items->type];
1318 		ret = cur_item->convert(items,
1319 					(cur_item->default_mask ?
1320 					 cur_item->default_mask :
1321 					 cur_item->mask),
1322 					 &data);
1323 		if (ret)
1324 			goto exit_free;
1325 	}
1326 	if (!parser->drop) {
1327 		/* RSS check, remove unused hash types. */
1328 		ret = mlx5_flow_convert_rss(parser);
1329 		if (ret)
1330 			goto exit_free;
1331 		/* Complete missing specification. */
1332 		mlx5_flow_convert_finalise(parser);
1333 	}
1334 	mlx5_flow_update_priority(dev, parser, attr);
1335 	if (parser->mark)
1336 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1337 	if (parser->count && parser->create) {
1338 		mlx5_flow_create_count(dev, parser);
1339 		if (!parser->cs)
1340 			goto exit_count_error;
1341 	}
1342 exit_free:
1343 	/* Only verification is expected, all resources should be released. */
1344 	if (!parser->create) {
1345 		for (i = 0; i != hash_rxq_init_n; ++i) {
1346 			if (parser->queue[i].ibv_attr) {
1347 				rte_free(parser->queue[i].ibv_attr);
1348 				parser->queue[i].ibv_attr = NULL;
1349 			}
1350 		}
1351 	}
1352 	return ret;
1353 exit_enomem:
1354 	for (i = 0; i != hash_rxq_init_n; ++i) {
1355 		if (parser->queue[i].ibv_attr) {
1356 			rte_free(parser->queue[i].ibv_attr);
1357 			parser->queue[i].ibv_attr = NULL;
1358 		}
1359 	}
1360 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1361 			   NULL, "cannot allocate verbs spec attributes");
1362 	return -rte_errno;
1363 exit_count_error:
1364 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1365 			   NULL, "cannot create counter");
1366 	return -rte_errno;
1367 }
1368 
1369 /**
1370  * Copy the specification created into the flow.
1371  *
1372  * @param parser
1373  *   Internal parser structure.
1374  * @param src
1375  *   Create specification.
1376  * @param size
1377  *   Size in bytes of the specification to copy.
1378  */
1379 static void
1380 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1381 		      unsigned int size)
1382 {
1383 	unsigned int i;
1384 	void *dst;
1385 
1386 	for (i = 0; i != hash_rxq_init_n; ++i) {
1387 		if (!parser->queue[i].ibv_attr)
1388 			continue;
1389 		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1390 				parser->queue[i].offset);
1391 		memcpy(dst, src, size);
1392 		++parser->queue[i].ibv_attr->num_of_specs;
1393 		parser->queue[i].offset += size;
1394 	}
1395 }
1396 
1397 /**
1398  * Convert Ethernet item to Verbs specification.
1399  *
1400  * @param item[in]
1401  *   Item specification.
1402  * @param default_mask[in]
1403  *   Default bit-masks to use when item->mask is not provided.
1404  * @param data[in, out]
1405  *   User structure.
1406  *
1407  * @return
1408  *   0 on success, a negative errno value otherwise and rte_errno is set.
1409  */
1410 static int
1411 mlx5_flow_create_eth(const struct rte_flow_item *item,
1412 		     const void *default_mask,
1413 		     struct mlx5_flow_data *data)
1414 {
1415 	const struct rte_flow_item_eth *spec = item->spec;
1416 	const struct rte_flow_item_eth *mask = item->mask;
1417 	struct mlx5_flow_parse *parser = data->parser;
1418 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1419 	struct ibv_flow_spec_eth eth = {
1420 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1421 		.size = eth_size,
1422 	};
1423 
1424 	parser->layer = HASH_RXQ_ETH;
1425 	if (spec) {
1426 		unsigned int i;
1427 
1428 		if (!mask)
1429 			mask = default_mask;
1430 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1431 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1432 		eth.val.ether_type = spec->type;
1433 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1434 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1435 		eth.mask.ether_type = mask->type;
1436 		/* Remove unwanted bits from values. */
1437 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1438 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1439 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1440 		}
1441 		eth.val.ether_type &= eth.mask.ether_type;
1442 	}
1443 	mlx5_flow_create_copy(parser, &eth, eth_size);
1444 	return 0;
1445 }
1446 
1447 /**
1448  * Convert VLAN item to Verbs specification.
1449  *
1450  * @param item[in]
1451  *   Item specification.
1452  * @param default_mask[in]
1453  *   Default bit-masks to use when item->mask is not provided.
1454  * @param data[in, out]
1455  *   User structure.
1456  *
1457  * @return
1458  *   0 on success, a negative errno value otherwise and rte_errno is set.
1459  */
1460 static int
1461 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1462 		      const void *default_mask,
1463 		      struct mlx5_flow_data *data)
1464 {
1465 	const struct rte_flow_item_vlan *spec = item->spec;
1466 	const struct rte_flow_item_vlan *mask = item->mask;
1467 	struct mlx5_flow_parse *parser = data->parser;
1468 	struct ibv_flow_spec_eth *eth;
1469 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1470 	const char *msg = "VLAN cannot be empty";
1471 
1472 	if (spec) {
1473 		unsigned int i;
1474 		if (!mask)
1475 			mask = default_mask;
1476 
1477 		for (i = 0; i != hash_rxq_init_n; ++i) {
1478 			if (!parser->queue[i].ibv_attr)
1479 				continue;
1480 
1481 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1482 				       parser->queue[i].offset - eth_size);
1483 			eth->val.vlan_tag = spec->tci;
1484 			eth->mask.vlan_tag = mask->tci;
1485 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1486 			/*
1487 			 * From verbs perspective an empty VLAN is equivalent
1488 			 * to a packet without VLAN layer.
1489 			 */
1490 			if (!eth->mask.vlan_tag)
1491 				goto error;
1492 			/* Outer TPID cannot be matched. */
1493 			if (eth->mask.ether_type) {
1494 				msg = "VLAN TPID matching is not supported";
1495 				goto error;
1496 			}
1497 			eth->val.ether_type = spec->inner_type;
1498 			eth->mask.ether_type = mask->inner_type;
1499 			eth->val.ether_type &= eth->mask.ether_type;
1500 		}
1501 		return 0;
1502 	}
1503 error:
1504 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1505 				  item, msg);
1506 }
1507 
1508 /**
1509  * Convert IPv4 item to Verbs specification.
1510  *
1511  * @param item[in]
1512  *   Item specification.
1513  * @param default_mask[in]
1514  *   Default bit-masks to use when item->mask is not provided.
1515  * @param data[in, out]
1516  *   User structure.
1517  *
1518  * @return
1519  *   0 on success, a negative errno value otherwise and rte_errno is set.
1520  */
1521 static int
1522 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1523 		      const void *default_mask,
1524 		      struct mlx5_flow_data *data)
1525 {
1526 	struct priv *priv = data->dev->data->dev_private;
1527 	const struct rte_flow_item_ipv4 *spec = item->spec;
1528 	const struct rte_flow_item_ipv4 *mask = item->mask;
1529 	struct mlx5_flow_parse *parser = data->parser;
1530 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1531 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1532 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1533 		.size = ipv4_size,
1534 	};
1535 
1536 	if (parser->layer == HASH_RXQ_TUNNEL &&
1537 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1538 	    !priv->config.l3_vxlan_en)
1539 		return rte_flow_error_set(data->error, EINVAL,
1540 					  RTE_FLOW_ERROR_TYPE_ITEM,
1541 					  item,
1542 					  "L3 VXLAN not enabled by device"
1543 					  " parameter and/or not configured"
1544 					  " in firmware");
1545 	parser->layer = HASH_RXQ_IPV4;
1546 	if (spec) {
1547 		if (!mask)
1548 			mask = default_mask;
1549 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1550 			.src_ip = spec->hdr.src_addr,
1551 			.dst_ip = spec->hdr.dst_addr,
1552 			.proto = spec->hdr.next_proto_id,
1553 			.tos = spec->hdr.type_of_service,
1554 		};
1555 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1556 			.src_ip = mask->hdr.src_addr,
1557 			.dst_ip = mask->hdr.dst_addr,
1558 			.proto = mask->hdr.next_proto_id,
1559 			.tos = mask->hdr.type_of_service,
1560 		};
1561 		/* Remove unwanted bits from values. */
1562 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1563 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1564 		ipv4.val.proto &= ipv4.mask.proto;
1565 		ipv4.val.tos &= ipv4.mask.tos;
1566 	}
1567 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1568 	return 0;
1569 }
1570 
1571 /**
1572  * Convert IPv6 item to Verbs specification.
1573  *
1574  * @param item[in]
1575  *   Item specification.
1576  * @param default_mask[in]
1577  *   Default bit-masks to use when item->mask is not provided.
1578  * @param data[in, out]
1579  *   User structure.
1580  *
1581  * @return
1582  *   0 on success, a negative errno value otherwise and rte_errno is set.
1583  */
1584 static int
1585 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1586 		      const void *default_mask,
1587 		      struct mlx5_flow_data *data)
1588 {
1589 	struct priv *priv = data->dev->data->dev_private;
1590 	const struct rte_flow_item_ipv6 *spec = item->spec;
1591 	const struct rte_flow_item_ipv6 *mask = item->mask;
1592 	struct mlx5_flow_parse *parser = data->parser;
1593 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1594 	struct ibv_flow_spec_ipv6 ipv6 = {
1595 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1596 		.size = ipv6_size,
1597 	};
1598 
1599 	if (parser->layer == HASH_RXQ_TUNNEL &&
1600 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1601 	    !priv->config.l3_vxlan_en)
1602 		return rte_flow_error_set(data->error, EINVAL,
1603 					  RTE_FLOW_ERROR_TYPE_ITEM,
1604 					  item,
1605 					  "L3 VXLAN not enabled by device"
1606 					  " parameter and/or not configured"
1607 					  " in firmware");
1608 	parser->layer = HASH_RXQ_IPV6;
1609 	if (spec) {
1610 		unsigned int i;
1611 		uint32_t vtc_flow_val;
1612 		uint32_t vtc_flow_mask;
1613 
1614 		if (!mask)
1615 			mask = default_mask;
1616 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1617 		       RTE_DIM(ipv6.val.src_ip));
1618 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1619 		       RTE_DIM(ipv6.val.dst_ip));
1620 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1621 		       RTE_DIM(ipv6.mask.src_ip));
1622 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1623 		       RTE_DIM(ipv6.mask.dst_ip));
1624 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1625 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1626 		ipv6.val.flow_label =
1627 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1628 					 IPV6_HDR_FL_SHIFT);
1629 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1630 					 IPV6_HDR_TC_SHIFT;
1631 		ipv6.val.next_hdr = spec->hdr.proto;
1632 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1633 		ipv6.mask.flow_label =
1634 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1635 					 IPV6_HDR_FL_SHIFT);
1636 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1637 					  IPV6_HDR_TC_SHIFT;
1638 		ipv6.mask.next_hdr = mask->hdr.proto;
1639 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1640 		/* Remove unwanted bits from values. */
1641 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1642 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1643 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1644 		}
1645 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1646 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1647 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1648 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1649 	}
1650 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1651 	return 0;
1652 }
1653 
1654 /**
1655  * Convert UDP item to Verbs specification.
1656  *
1657  * @param item[in]
1658  *   Item specification.
1659  * @param default_mask[in]
1660  *   Default bit-masks to use when item->mask is not provided.
1661  * @param data[in, out]
1662  *   User structure.
1663  *
1664  * @return
1665  *   0 on success, a negative errno value otherwise and rte_errno is set.
1666  */
1667 static int
1668 mlx5_flow_create_udp(const struct rte_flow_item *item,
1669 		     const void *default_mask,
1670 		     struct mlx5_flow_data *data)
1671 {
1672 	const struct rte_flow_item_udp *spec = item->spec;
1673 	const struct rte_flow_item_udp *mask = item->mask;
1674 	struct mlx5_flow_parse *parser = data->parser;
1675 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1676 	struct ibv_flow_spec_tcp_udp udp = {
1677 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1678 		.size = udp_size,
1679 	};
1680 
1681 	if (parser->layer == HASH_RXQ_IPV4)
1682 		parser->layer = HASH_RXQ_UDPV4;
1683 	else
1684 		parser->layer = HASH_RXQ_UDPV6;
1685 	if (spec) {
1686 		if (!mask)
1687 			mask = default_mask;
1688 		udp.val.dst_port = spec->hdr.dst_port;
1689 		udp.val.src_port = spec->hdr.src_port;
1690 		udp.mask.dst_port = mask->hdr.dst_port;
1691 		udp.mask.src_port = mask->hdr.src_port;
1692 		/* Remove unwanted bits from values. */
1693 		udp.val.src_port &= udp.mask.src_port;
1694 		udp.val.dst_port &= udp.mask.dst_port;
1695 	}
1696 	mlx5_flow_create_copy(parser, &udp, udp_size);
1697 	return 0;
1698 }
1699 
1700 /**
1701  * Convert TCP item to Verbs specification.
1702  *
1703  * @param item[in]
1704  *   Item specification.
1705  * @param default_mask[in]
1706  *   Default bit-masks to use when item->mask is not provided.
1707  * @param data[in, out]
1708  *   User structure.
1709  *
1710  * @return
1711  *   0 on success, a negative errno value otherwise and rte_errno is set.
1712  */
1713 static int
1714 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1715 		     const void *default_mask,
1716 		     struct mlx5_flow_data *data)
1717 {
1718 	const struct rte_flow_item_tcp *spec = item->spec;
1719 	const struct rte_flow_item_tcp *mask = item->mask;
1720 	struct mlx5_flow_parse *parser = data->parser;
1721 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1722 	struct ibv_flow_spec_tcp_udp tcp = {
1723 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1724 		.size = tcp_size,
1725 	};
1726 
1727 	if (parser->layer == HASH_RXQ_IPV4)
1728 		parser->layer = HASH_RXQ_TCPV4;
1729 	else
1730 		parser->layer = HASH_RXQ_TCPV6;
1731 	if (spec) {
1732 		if (!mask)
1733 			mask = default_mask;
1734 		tcp.val.dst_port = spec->hdr.dst_port;
1735 		tcp.val.src_port = spec->hdr.src_port;
1736 		tcp.mask.dst_port = mask->hdr.dst_port;
1737 		tcp.mask.src_port = mask->hdr.src_port;
1738 		/* Remove unwanted bits from values. */
1739 		tcp.val.src_port &= tcp.mask.src_port;
1740 		tcp.val.dst_port &= tcp.mask.dst_port;
1741 	}
1742 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1743 	return 0;
1744 }
1745 
1746 /**
1747  * Convert VXLAN item to Verbs specification.
1748  *
1749  * @param item[in]
1750  *   Item specification.
1751  * @param default_mask[in]
1752  *   Default bit-masks to use when item->mask is not provided.
1753  * @param data[in, out]
1754  *   User structure.
1755  *
1756  * @return
1757  *   0 on success, a negative errno value otherwise and rte_errno is set.
1758  */
1759 static int
1760 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1761 		       const void *default_mask,
1762 		       struct mlx5_flow_data *data)
1763 {
1764 	const struct rte_flow_item_vxlan *spec = item->spec;
1765 	const struct rte_flow_item_vxlan *mask = item->mask;
1766 	struct mlx5_flow_parse *parser = data->parser;
1767 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1768 	struct ibv_flow_spec_tunnel vxlan = {
1769 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1770 		.size = size,
1771 	};
1772 	union vni {
1773 		uint32_t vlan_id;
1774 		uint8_t vni[4];
1775 	} id;
1776 
1777 	id.vni[0] = 0;
1778 	parser->inner = IBV_FLOW_SPEC_INNER;
1779 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1780 	parser->out_layer = parser->layer;
1781 	parser->layer = HASH_RXQ_TUNNEL;
1782 	/* Default VXLAN to outer RSS. */
1783 	if (!parser->rss_conf.level)
1784 		parser->rss_conf.level = 1;
1785 	if (spec) {
1786 		if (!mask)
1787 			mask = default_mask;
1788 		memcpy(&id.vni[1], spec->vni, 3);
1789 		vxlan.val.tunnel_id = id.vlan_id;
1790 		memcpy(&id.vni[1], mask->vni, 3);
1791 		vxlan.mask.tunnel_id = id.vlan_id;
1792 		/* Remove unwanted bits from values. */
1793 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1794 	}
1795 	/*
1796 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1797 	 * layer is defined in the Verbs specification it is interpreted as
1798 	 * wildcard and all packets will match this rule, if it follows a full
1799 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1800 	 * before will also match this rule.
1801 	 * To avoid such situation, VNI 0 is currently refused.
1802 	 */
1803 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1804 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1805 		return rte_flow_error_set(data->error, EINVAL,
1806 					  RTE_FLOW_ERROR_TYPE_ITEM,
1807 					  item,
1808 					  "VxLAN vni cannot be 0");
1809 	mlx5_flow_create_copy(parser, &vxlan, size);
1810 	return 0;
1811 }
1812 
1813 /**
1814  * Convert VXLAN-GPE item to Verbs specification.
1815  *
1816  * @param item[in]
1817  *   Item specification.
1818  * @param default_mask[in]
1819  *   Default bit-masks to use when item->mask is not provided.
1820  * @param data[in, out]
1821  *   User structure.
1822  *
1823  * @return
1824  *   0 on success, a negative errno value otherwise and rte_errno is set.
1825  */
1826 static int
1827 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1828 			   const void *default_mask,
1829 			   struct mlx5_flow_data *data)
1830 {
1831 	struct priv *priv = data->dev->data->dev_private;
1832 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1833 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1834 	struct mlx5_flow_parse *parser = data->parser;
1835 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1836 	struct ibv_flow_spec_tunnel vxlan = {
1837 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1838 		.size = size,
1839 	};
1840 	union vni {
1841 		uint32_t vlan_id;
1842 		uint8_t vni[4];
1843 	} id;
1844 
1845 	if (!priv->config.l3_vxlan_en)
1846 		return rte_flow_error_set(data->error, EINVAL,
1847 					  RTE_FLOW_ERROR_TYPE_ITEM,
1848 					  item,
1849 					  "L3 VXLAN not enabled by device"
1850 					  " parameter and/or not configured"
1851 					  " in firmware");
1852 	id.vni[0] = 0;
1853 	parser->inner = IBV_FLOW_SPEC_INNER;
1854 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1855 	parser->out_layer = parser->layer;
1856 	parser->layer = HASH_RXQ_TUNNEL;
1857 	/* Default VXLAN-GPE to outer RSS. */
1858 	if (!parser->rss_conf.level)
1859 		parser->rss_conf.level = 1;
1860 	if (spec) {
1861 		if (!mask)
1862 			mask = default_mask;
1863 		memcpy(&id.vni[1], spec->vni, 3);
1864 		vxlan.val.tunnel_id = id.vlan_id;
1865 		memcpy(&id.vni[1], mask->vni, 3);
1866 		vxlan.mask.tunnel_id = id.vlan_id;
1867 		if (spec->protocol)
1868 			return rte_flow_error_set(data->error, EINVAL,
1869 						  RTE_FLOW_ERROR_TYPE_ITEM,
1870 						  item,
1871 						  "VxLAN-GPE protocol not"
1872 						  " supported");
1873 		/* Remove unwanted bits from values. */
1874 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1875 	}
1876 	/*
1877 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1878 	 * layer is defined in the Verbs specification it is interpreted as
1879 	 * wildcard and all packets will match this rule, if it follows a full
1880 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1881 	 * before will also match this rule.
1882 	 * To avoid such situation, VNI 0 is currently refused.
1883 	 */
1884 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1885 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1886 		return rte_flow_error_set(data->error, EINVAL,
1887 					  RTE_FLOW_ERROR_TYPE_ITEM,
1888 					  item,
1889 					  "VxLAN-GPE vni cannot be 0");
1890 	mlx5_flow_create_copy(parser, &vxlan, size);
1891 	return 0;
1892 }
1893 
1894 /**
1895  * Convert GRE item to Verbs specification.
1896  *
1897  * @param item[in]
1898  *   Item specification.
1899  * @param default_mask[in]
1900  *   Default bit-masks to use when item->mask is not provided.
1901  * @param data[in, out]
1902  *   User structure.
1903  *
1904  * @return
1905  *   0 on success, a negative errno value otherwise and rte_errno is set.
1906  */
1907 static int
1908 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1909 		     const void *default_mask __rte_unused,
1910 		     struct mlx5_flow_data *data)
1911 {
1912 	struct mlx5_flow_parse *parser = data->parser;
1913 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1914 	struct ibv_flow_spec_tunnel tunnel = {
1915 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1916 		.size = size,
1917 	};
1918 	struct ibv_flow_spec_ipv4_ext *ipv4;
1919 	struct ibv_flow_spec_ipv6 *ipv6;
1920 	unsigned int i;
1921 
1922 	parser->inner = IBV_FLOW_SPEC_INNER;
1923 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1924 	parser->out_layer = parser->layer;
1925 	parser->layer = HASH_RXQ_TUNNEL;
1926 	/* Default GRE to inner RSS. */
1927 	if (!parser->rss_conf.level)
1928 		parser->rss_conf.level = 2;
1929 	/* Update encapsulation IP layer protocol. */
1930 	for (i = 0; i != hash_rxq_init_n; ++i) {
1931 		if (!parser->queue[i].ibv_attr)
1932 			continue;
1933 		if (parser->out_layer == HASH_RXQ_IPV4) {
1934 			ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1935 				parser->queue[i].offset -
1936 				sizeof(struct ibv_flow_spec_ipv4_ext));
1937 			if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1938 				break;
1939 			ipv4->val.proto = MLX5_GRE;
1940 			ipv4->mask.proto = 0xff;
1941 		} else if (parser->out_layer == HASH_RXQ_IPV6) {
1942 			ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1943 				parser->queue[i].offset -
1944 				sizeof(struct ibv_flow_spec_ipv6));
1945 			if (ipv6->mask.next_hdr &&
1946 			    ipv6->val.next_hdr != MLX5_GRE)
1947 				break;
1948 			ipv6->val.next_hdr = MLX5_GRE;
1949 			ipv6->mask.next_hdr = 0xff;
1950 		}
1951 	}
1952 	if (i != hash_rxq_init_n)
1953 		return rte_flow_error_set(data->error, EINVAL,
1954 					  RTE_FLOW_ERROR_TYPE_ITEM,
1955 					  item,
1956 					  "IP protocol of GRE must be 47");
1957 	mlx5_flow_create_copy(parser, &tunnel, size);
1958 	return 0;
1959 }
1960 
1961 /**
1962  * Convert mark/flag action to Verbs specification.
1963  *
1964  * @param parser
1965  *   Internal parser structure.
1966  * @param mark_id
1967  *   Mark identifier.
1968  *
1969  * @return
1970  *   0 on success, a negative errno value otherwise and rte_errno is set.
1971  */
1972 static int
1973 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1974 {
1975 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1976 	struct ibv_flow_spec_action_tag tag = {
1977 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1978 		.size = size,
1979 		.tag_id = mlx5_flow_mark_set(mark_id),
1980 	};
1981 
1982 	assert(parser->mark);
1983 	mlx5_flow_create_copy(parser, &tag, size);
1984 	return 0;
1985 }
1986 
1987 /**
1988  * Convert count action to Verbs specification.
1989  *
1990  * @param dev
1991  *   Pointer to Ethernet device.
1992  * @param parser
1993  *   Pointer to MLX5 flow parser structure.
1994  *
1995  * @return
1996  *   0 on success, a negative errno value otherwise and rte_errno is set.
1997  */
1998 static int
1999 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2000 		       struct mlx5_flow_parse *parser __rte_unused)
2001 {
2002 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2003 	struct priv *priv = dev->data->dev_private;
2004 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2005 	struct ibv_counter_set_init_attr init_attr = {0};
2006 	struct ibv_flow_spec_counter_action counter = {
2007 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
2008 		.size = size,
2009 		.counter_set_handle = 0,
2010 	};
2011 
2012 	init_attr.counter_set_id = 0;
2013 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2014 	if (!parser->cs) {
2015 		rte_errno = EINVAL;
2016 		return -rte_errno;
2017 	}
2018 	counter.counter_set_handle = parser->cs->handle;
2019 	mlx5_flow_create_copy(parser, &counter, size);
2020 #endif
2021 	return 0;
2022 }
2023 
2024 /**
2025  * Complete flow rule creation with a drop queue.
2026  *
2027  * @param dev
2028  *   Pointer to Ethernet device.
2029  * @param parser
2030  *   Internal parser structure.
2031  * @param flow
2032  *   Pointer to the rte_flow.
2033  * @param[out] error
2034  *   Perform verbose error reporting if not NULL.
2035  *
2036  * @return
2037  *   0 on success, a negative errno value otherwise and rte_errno is set.
2038  */
2039 static int
2040 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2041 				   struct mlx5_flow_parse *parser,
2042 				   struct rte_flow *flow,
2043 				   struct rte_flow_error *error)
2044 {
2045 	struct priv *priv = dev->data->dev_private;
2046 	struct ibv_flow_spec_action_drop *drop;
2047 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2048 
2049 	assert(priv->pd);
2050 	assert(priv->ctx);
2051 	flow->drop = 1;
2052 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2053 			parser->queue[HASH_RXQ_ETH].offset);
2054 	*drop = (struct ibv_flow_spec_action_drop){
2055 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2056 			.size = size,
2057 	};
2058 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2059 	parser->queue[HASH_RXQ_ETH].offset += size;
2060 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
2061 		parser->queue[HASH_RXQ_ETH].ibv_attr;
2062 	if (parser->count)
2063 		flow->cs = parser->cs;
2064 	if (!priv->dev->data->dev_started)
2065 		return 0;
2066 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2067 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
2068 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2069 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
2070 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2071 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2072 				   NULL, "flow rule creation failure");
2073 		goto error;
2074 	}
2075 	return 0;
2076 error:
2077 	assert(flow);
2078 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2079 		claim_zero(mlx5_glue->destroy_flow
2080 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2081 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2082 	}
2083 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2084 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2085 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2086 	}
2087 	if (flow->cs) {
2088 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2089 		flow->cs = NULL;
2090 		parser->cs = NULL;
2091 	}
2092 	return -rte_errno;
2093 }
2094 
2095 /**
2096  * Create hash Rx queues when RSS is enabled.
2097  *
2098  * @param dev
2099  *   Pointer to Ethernet device.
2100  * @param parser
2101  *   Internal parser structure.
2102  * @param flow
2103  *   Pointer to the rte_flow.
2104  * @param[out] error
2105  *   Perform verbose error reporting if not NULL.
2106  *
2107  * @return
2108  *   0 on success, a negative errno value otherwise and rte_errno is set.
2109  */
2110 static int
2111 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2112 				  struct mlx5_flow_parse *parser,
2113 				  struct rte_flow *flow,
2114 				  struct rte_flow_error *error)
2115 {
2116 	struct priv *priv = dev->data->dev_private;
2117 	unsigned int i;
2118 
2119 	for (i = 0; i != hash_rxq_init_n; ++i) {
2120 		if (!parser->queue[i].ibv_attr)
2121 			continue;
2122 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2123 		parser->queue[i].ibv_attr = NULL;
2124 		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2125 		if (!priv->dev->data->dev_started)
2126 			continue;
2127 		flow->frxq[i].hrxq =
2128 			mlx5_hrxq_get(dev,
2129 				      parser->rss_conf.key,
2130 				      parser->rss_conf.key_len,
2131 				      flow->frxq[i].hash_fields,
2132 				      parser->rss_conf.queue,
2133 				      parser->rss_conf.queue_num,
2134 				      parser->tunnel,
2135 				      parser->rss_conf.level);
2136 		if (flow->frxq[i].hrxq)
2137 			continue;
2138 		flow->frxq[i].hrxq =
2139 			mlx5_hrxq_new(dev,
2140 				      parser->rss_conf.key,
2141 				      parser->rss_conf.key_len,
2142 				      flow->frxq[i].hash_fields,
2143 				      parser->rss_conf.queue,
2144 				      parser->rss_conf.queue_num,
2145 				      parser->tunnel,
2146 				      parser->rss_conf.level);
2147 		if (!flow->frxq[i].hrxq) {
2148 			return rte_flow_error_set(error, ENOMEM,
2149 						  RTE_FLOW_ERROR_TYPE_HANDLE,
2150 						  NULL,
2151 						  "cannot create hash rxq");
2152 		}
2153 	}
2154 	return 0;
2155 }
2156 
2157 /**
2158  * RXQ update after flow rule creation.
2159  *
2160  * @param dev
2161  *   Pointer to Ethernet device.
2162  * @param flow
2163  *   Pointer to the flow rule.
2164  */
2165 static void
2166 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2167 {
2168 	struct priv *priv = dev->data->dev_private;
2169 	unsigned int i;
2170 	unsigned int j;
2171 
2172 	if (!dev->data->dev_started)
2173 		return;
2174 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2175 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2176 						 [(*flow->queues)[i]];
2177 		struct mlx5_rxq_ctrl *rxq_ctrl =
2178 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2179 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2180 
2181 		rxq_data->mark |= flow->mark;
2182 		if (!tunnel)
2183 			continue;
2184 		rxq_ctrl->tunnel_types[tunnel] += 1;
2185 		/* Clear tunnel type if more than one tunnel types set. */
2186 		for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2187 			if (j == tunnel)
2188 				continue;
2189 			if (rxq_ctrl->tunnel_types[j] > 0) {
2190 				rxq_data->tunnel = 0;
2191 				break;
2192 			}
2193 		}
2194 		if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2195 			rxq_data->tunnel = flow->tunnel;
2196 	}
2197 }
2198 
2199 /**
2200  * Dump flow hash RX queue detail.
2201  *
2202  * @param dev
2203  *   Pointer to Ethernet device.
2204  * @param flow
2205  *   Pointer to the rte_flow.
2206  * @param hrxq_idx
2207  *   Hash RX queue index.
2208  */
2209 static void
2210 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2211 	       struct rte_flow *flow __rte_unused,
2212 	       unsigned int hrxq_idx __rte_unused)
2213 {
2214 #ifndef NDEBUG
2215 	uintptr_t spec_ptr;
2216 	uint16_t j;
2217 	char buf[256];
2218 	uint8_t off;
2219 
2220 	spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2221 	for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2222 	     j++) {
2223 		struct ibv_flow_spec *spec = (void *)spec_ptr;
2224 		off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2225 			       spec->hdr.size);
2226 		spec_ptr += spec->hdr.size;
2227 	}
2228 	DRV_LOG(DEBUG,
2229 		"port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2230 		" hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2231 		" flags:%x, comp_mask:%x specs:%s",
2232 		dev->data->port_id, (void *)flow, hrxq_idx,
2233 		(void *)flow->frxq[hrxq_idx].hrxq,
2234 		(void *)flow->frxq[hrxq_idx].hrxq->qp,
2235 		(void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2236 		flow->frxq[hrxq_idx].hash_fields |
2237 		(flow->tunnel &&
2238 		 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2239 		flow->rss_conf.queue_num,
2240 		flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2241 		flow->frxq[hrxq_idx].ibv_attr->size,
2242 		flow->frxq[hrxq_idx].ibv_attr->priority,
2243 		flow->frxq[hrxq_idx].ibv_attr->type,
2244 		flow->frxq[hrxq_idx].ibv_attr->flags,
2245 		flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2246 		buf);
2247 #endif
2248 }
2249 
2250 /**
2251  * Complete flow rule creation.
2252  *
2253  * @param dev
2254  *   Pointer to Ethernet device.
2255  * @param parser
2256  *   Internal parser structure.
2257  * @param flow
2258  *   Pointer to the rte_flow.
2259  * @param[out] error
2260  *   Perform verbose error reporting if not NULL.
2261  *
2262  * @return
2263  *   0 on success, a negative errno value otherwise and rte_errno is set.
2264  */
2265 static int
2266 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2267 			      struct mlx5_flow_parse *parser,
2268 			      struct rte_flow *flow,
2269 			      struct rte_flow_error *error)
2270 {
2271 	struct priv *priv = dev->data->dev_private;
2272 	int ret;
2273 	unsigned int i;
2274 	unsigned int flows_n = 0;
2275 
2276 	assert(priv->pd);
2277 	assert(priv->ctx);
2278 	assert(!parser->drop);
2279 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2280 	if (ret)
2281 		goto error;
2282 	if (parser->count)
2283 		flow->cs = parser->cs;
2284 	if (!priv->dev->data->dev_started)
2285 		return 0;
2286 	for (i = 0; i != hash_rxq_init_n; ++i) {
2287 		if (!flow->frxq[i].hrxq)
2288 			continue;
2289 		flow->frxq[i].ibv_flow =
2290 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2291 					       flow->frxq[i].ibv_attr);
2292 		mlx5_flow_dump(dev, flow, i);
2293 		if (!flow->frxq[i].ibv_flow) {
2294 			rte_flow_error_set(error, ENOMEM,
2295 					   RTE_FLOW_ERROR_TYPE_HANDLE,
2296 					   NULL, "flow rule creation failure");
2297 			goto error;
2298 		}
2299 		++flows_n;
2300 	}
2301 	if (!flows_n) {
2302 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2303 				   NULL, "internal error in flow creation");
2304 		goto error;
2305 	}
2306 	mlx5_flow_create_update_rxqs(dev, flow);
2307 	return 0;
2308 error:
2309 	ret = rte_errno; /* Save rte_errno before cleanup. */
2310 	assert(flow);
2311 	for (i = 0; i != hash_rxq_init_n; ++i) {
2312 		if (flow->frxq[i].ibv_flow) {
2313 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2314 
2315 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2316 		}
2317 		if (flow->frxq[i].hrxq)
2318 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2319 		if (flow->frxq[i].ibv_attr)
2320 			rte_free(flow->frxq[i].ibv_attr);
2321 	}
2322 	if (flow->cs) {
2323 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2324 		flow->cs = NULL;
2325 		parser->cs = NULL;
2326 	}
2327 	rte_errno = ret; /* Restore rte_errno. */
2328 	return -rte_errno;
2329 }
2330 
2331 /**
2332  * Convert a flow.
2333  *
2334  * @param dev
2335  *   Pointer to Ethernet device.
2336  * @param list
2337  *   Pointer to a TAILQ flow list.
2338  * @param[in] attr
2339  *   Flow rule attributes.
2340  * @param[in] pattern
2341  *   Pattern specification (list terminated by the END pattern item).
2342  * @param[in] actions
2343  *   Associated actions (list terminated by the END action).
2344  * @param[out] error
2345  *   Perform verbose error reporting if not NULL.
2346  *
2347  * @return
2348  *   A flow on success, NULL otherwise and rte_errno is set.
2349  */
2350 static struct rte_flow *
2351 mlx5_flow_list_create(struct rte_eth_dev *dev,
2352 		      struct mlx5_flows *list,
2353 		      const struct rte_flow_attr *attr,
2354 		      const struct rte_flow_item items[],
2355 		      const struct rte_flow_action actions[],
2356 		      struct rte_flow_error *error)
2357 {
2358 	struct mlx5_flow_parse parser = { .create = 1, };
2359 	struct rte_flow *flow = NULL;
2360 	unsigned int i;
2361 	int ret;
2362 
2363 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2364 	if (ret)
2365 		goto exit;
2366 	flow = rte_calloc(__func__, 1,
2367 			  sizeof(*flow) +
2368 			  parser.rss_conf.queue_num * sizeof(uint16_t),
2369 			  0);
2370 	if (!flow) {
2371 		rte_flow_error_set(error, ENOMEM,
2372 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2373 				   NULL,
2374 				   "cannot allocate flow memory");
2375 		return NULL;
2376 	}
2377 	/* Copy configuration. */
2378 	flow->queues = (uint16_t (*)[])(flow + 1);
2379 	flow->tunnel = parser.tunnel;
2380 	flow->rss_conf = (struct rte_flow_action_rss){
2381 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2382 		.level = 0,
2383 		.types = parser.rss_conf.types,
2384 		.key_len = parser.rss_conf.key_len,
2385 		.queue_num = parser.rss_conf.queue_num,
2386 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
2387 			      sizeof(*parser.rss_conf.key) *
2388 			      parser.rss_conf.key_len),
2389 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
2390 				sizeof(*parser.rss_conf.queue) *
2391 				parser.rss_conf.queue_num),
2392 	};
2393 	flow->mark = parser.mark;
2394 	/* finalise the flow. */
2395 	if (parser.drop)
2396 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2397 							 error);
2398 	else
2399 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2400 	if (ret)
2401 		goto exit;
2402 	TAILQ_INSERT_TAIL(list, flow, next);
2403 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2404 		(void *)flow);
2405 	return flow;
2406 exit:
2407 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2408 		error->message);
2409 	for (i = 0; i != hash_rxq_init_n; ++i) {
2410 		if (parser.queue[i].ibv_attr)
2411 			rte_free(parser.queue[i].ibv_attr);
2412 	}
2413 	rte_free(flow);
2414 	return NULL;
2415 }
2416 
2417 /**
2418  * Validate a flow supported by the NIC.
2419  *
2420  * @see rte_flow_validate()
2421  * @see rte_flow_ops
2422  */
2423 int
2424 mlx5_flow_validate(struct rte_eth_dev *dev,
2425 		   const struct rte_flow_attr *attr,
2426 		   const struct rte_flow_item items[],
2427 		   const struct rte_flow_action actions[],
2428 		   struct rte_flow_error *error)
2429 {
2430 	struct mlx5_flow_parse parser = { .create = 0, };
2431 
2432 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2433 }
2434 
2435 /**
2436  * Create a flow.
2437  *
2438  * @see rte_flow_create()
2439  * @see rte_flow_ops
2440  */
2441 struct rte_flow *
2442 mlx5_flow_create(struct rte_eth_dev *dev,
2443 		 const struct rte_flow_attr *attr,
2444 		 const struct rte_flow_item items[],
2445 		 const struct rte_flow_action actions[],
2446 		 struct rte_flow_error *error)
2447 {
2448 	struct priv *priv = dev->data->dev_private;
2449 
2450 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2451 				     error);
2452 }
2453 
2454 /**
2455  * Destroy a flow in a list.
2456  *
2457  * @param dev
2458  *   Pointer to Ethernet device.
2459  * @param list
2460  *   Pointer to a TAILQ flow list.
2461  * @param[in] flow
2462  *   Flow to destroy.
2463  */
2464 static void
2465 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2466 		       struct rte_flow *flow)
2467 {
2468 	struct priv *priv = dev->data->dev_private;
2469 	unsigned int i;
2470 
2471 	if (flow->drop || !dev->data->dev_started)
2472 		goto free;
2473 	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2474 		/* Update queue tunnel type. */
2475 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2476 						 [(*flow->queues)[i]];
2477 		struct mlx5_rxq_ctrl *rxq_ctrl =
2478 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2479 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2480 
2481 		assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2482 		rxq_ctrl->tunnel_types[tunnel] -= 1;
2483 		if (!rxq_ctrl->tunnel_types[tunnel]) {
2484 			/* Update tunnel type. */
2485 			uint8_t j;
2486 			uint8_t types = 0;
2487 			uint8_t last;
2488 
2489 			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2490 				if (rxq_ctrl->tunnel_types[j]) {
2491 					types += 1;
2492 					last = j;
2493 				}
2494 			/* Keep same if more than one tunnel types left. */
2495 			if (types == 1)
2496 				rxq_data->tunnel = ptype_ext[last];
2497 			else if (types == 0)
2498 				/* No tunnel type left. */
2499 				rxq_data->tunnel = 0;
2500 		}
2501 	}
2502 	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2503 		struct rte_flow *tmp;
2504 		int mark = 0;
2505 
2506 		/*
2507 		 * To remove the mark from the queue, the queue must not be
2508 		 * present in any other marked flow (RSS or not).
2509 		 */
2510 		TAILQ_FOREACH(tmp, list, next) {
2511 			unsigned int j;
2512 			uint16_t *tqs = NULL;
2513 			uint16_t tq_n = 0;
2514 
2515 			if (!tmp->mark)
2516 				continue;
2517 			for (j = 0; j != hash_rxq_init_n; ++j) {
2518 				if (!tmp->frxq[j].hrxq)
2519 					continue;
2520 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2521 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2522 			}
2523 			if (!tq_n)
2524 				continue;
2525 			for (j = 0; (j != tq_n) && !mark; j++)
2526 				if (tqs[j] == (*flow->queues)[i])
2527 					mark = 1;
2528 		}
2529 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2530 	}
2531 free:
2532 	if (flow->drop) {
2533 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2534 			claim_zero(mlx5_glue->destroy_flow
2535 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2536 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2537 	} else {
2538 		for (i = 0; i != hash_rxq_init_n; ++i) {
2539 			struct mlx5_flow *frxq = &flow->frxq[i];
2540 
2541 			if (frxq->ibv_flow)
2542 				claim_zero(mlx5_glue->destroy_flow
2543 					   (frxq->ibv_flow));
2544 			if (frxq->hrxq)
2545 				mlx5_hrxq_release(dev, frxq->hrxq);
2546 			if (frxq->ibv_attr)
2547 				rte_free(frxq->ibv_attr);
2548 		}
2549 	}
2550 	if (flow->cs) {
2551 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2552 		flow->cs = NULL;
2553 	}
2554 	TAILQ_REMOVE(list, flow, next);
2555 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2556 		(void *)flow);
2557 	rte_free(flow);
2558 }
2559 
2560 /**
2561  * Destroy all flows.
2562  *
2563  * @param dev
2564  *   Pointer to Ethernet device.
2565  * @param list
2566  *   Pointer to a TAILQ flow list.
2567  */
2568 void
2569 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2570 {
2571 	while (!TAILQ_EMPTY(list)) {
2572 		struct rte_flow *flow;
2573 
2574 		flow = TAILQ_FIRST(list);
2575 		mlx5_flow_list_destroy(dev, list, flow);
2576 	}
2577 }
2578 
2579 /**
2580  * Create drop queue.
2581  *
2582  * @param dev
2583  *   Pointer to Ethernet device.
2584  *
2585  * @return
2586  *   0 on success, a negative errno value otherwise and rte_errno is set.
2587  */
2588 int
2589 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2590 {
2591 	struct priv *priv = dev->data->dev_private;
2592 	struct mlx5_hrxq_drop *fdq = NULL;
2593 
2594 	assert(priv->pd);
2595 	assert(priv->ctx);
2596 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2597 	if (!fdq) {
2598 		DRV_LOG(WARNING,
2599 			"port %u cannot allocate memory for drop queue",
2600 			dev->data->port_id);
2601 		rte_errno = ENOMEM;
2602 		return -rte_errno;
2603 	}
2604 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2605 	if (!fdq->cq) {
2606 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2607 			dev->data->port_id);
2608 		rte_errno = errno;
2609 		goto error;
2610 	}
2611 	fdq->wq = mlx5_glue->create_wq
2612 		(priv->ctx,
2613 		 &(struct ibv_wq_init_attr){
2614 			.wq_type = IBV_WQT_RQ,
2615 			.max_wr = 1,
2616 			.max_sge = 1,
2617 			.pd = priv->pd,
2618 			.cq = fdq->cq,
2619 		 });
2620 	if (!fdq->wq) {
2621 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2622 			dev->data->port_id);
2623 		rte_errno = errno;
2624 		goto error;
2625 	}
2626 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2627 		(priv->ctx,
2628 		 &(struct ibv_rwq_ind_table_init_attr){
2629 			.log_ind_tbl_size = 0,
2630 			.ind_tbl = &fdq->wq,
2631 			.comp_mask = 0,
2632 		 });
2633 	if (!fdq->ind_table) {
2634 		DRV_LOG(WARNING,
2635 			"port %u cannot allocate indirection table for drop"
2636 			" queue",
2637 			dev->data->port_id);
2638 		rte_errno = errno;
2639 		goto error;
2640 	}
2641 	fdq->qp = mlx5_glue->create_qp_ex
2642 		(priv->ctx,
2643 		 &(struct ibv_qp_init_attr_ex){
2644 			.qp_type = IBV_QPT_RAW_PACKET,
2645 			.comp_mask =
2646 				IBV_QP_INIT_ATTR_PD |
2647 				IBV_QP_INIT_ATTR_IND_TABLE |
2648 				IBV_QP_INIT_ATTR_RX_HASH,
2649 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2650 				.rx_hash_function =
2651 					IBV_RX_HASH_FUNC_TOEPLITZ,
2652 				.rx_hash_key_len = rss_hash_default_key_len,
2653 				.rx_hash_key = rss_hash_default_key,
2654 				.rx_hash_fields_mask = 0,
2655 				},
2656 			.rwq_ind_tbl = fdq->ind_table,
2657 			.pd = priv->pd
2658 		 });
2659 	if (!fdq->qp) {
2660 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2661 			dev->data->port_id);
2662 		rte_errno = errno;
2663 		goto error;
2664 	}
2665 	priv->flow_drop_queue = fdq;
2666 	return 0;
2667 error:
2668 	if (fdq->qp)
2669 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2670 	if (fdq->ind_table)
2671 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2672 	if (fdq->wq)
2673 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2674 	if (fdq->cq)
2675 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2676 	if (fdq)
2677 		rte_free(fdq);
2678 	priv->flow_drop_queue = NULL;
2679 	return -rte_errno;
2680 }
2681 
2682 /**
2683  * Delete drop queue.
2684  *
2685  * @param dev
2686  *   Pointer to Ethernet device.
2687  */
2688 void
2689 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2690 {
2691 	struct priv *priv = dev->data->dev_private;
2692 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2693 
2694 	if (!fdq)
2695 		return;
2696 	if (fdq->qp)
2697 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2698 	if (fdq->ind_table)
2699 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2700 	if (fdq->wq)
2701 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2702 	if (fdq->cq)
2703 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2704 	rte_free(fdq);
2705 	priv->flow_drop_queue = NULL;
2706 }
2707 
2708 /**
2709  * Remove all flows.
2710  *
2711  * @param dev
2712  *   Pointer to Ethernet device.
2713  * @param list
2714  *   Pointer to a TAILQ flow list.
2715  */
2716 void
2717 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2718 {
2719 	struct priv *priv = dev->data->dev_private;
2720 	struct rte_flow *flow;
2721 	unsigned int i;
2722 
2723 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2724 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2725 
2726 		if (flow->drop) {
2727 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2728 				continue;
2729 			claim_zero(mlx5_glue->destroy_flow
2730 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2731 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2732 			DRV_LOG(DEBUG, "port %u flow %p removed",
2733 				dev->data->port_id, (void *)flow);
2734 			/* Next flow. */
2735 			continue;
2736 		}
2737 		/* Verify the flow has not already been cleaned. */
2738 		for (i = 0; i != hash_rxq_init_n; ++i) {
2739 			if (!flow->frxq[i].ibv_flow)
2740 				continue;
2741 			/*
2742 			 * Indirection table may be necessary to remove the
2743 			 * flags in the Rx queues.
2744 			 * This helps to speed-up the process by avoiding
2745 			 * another loop.
2746 			 */
2747 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2748 			break;
2749 		}
2750 		if (i == hash_rxq_init_n)
2751 			return;
2752 		if (flow->mark) {
2753 			assert(ind_tbl);
2754 			for (i = 0; i != ind_tbl->queues_n; ++i)
2755 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2756 		}
2757 		for (i = 0; i != hash_rxq_init_n; ++i) {
2758 			if (!flow->frxq[i].ibv_flow)
2759 				continue;
2760 			claim_zero(mlx5_glue->destroy_flow
2761 				   (flow->frxq[i].ibv_flow));
2762 			flow->frxq[i].ibv_flow = NULL;
2763 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2764 			flow->frxq[i].hrxq = NULL;
2765 		}
2766 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2767 			(void *)flow);
2768 	}
2769 	/* Cleanup Rx queue tunnel info. */
2770 	for (i = 0; i != priv->rxqs_n; ++i) {
2771 		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2772 		struct mlx5_rxq_ctrl *rxq_ctrl =
2773 			container_of(q, struct mlx5_rxq_ctrl, rxq);
2774 
2775 		if (!q)
2776 			continue;
2777 		memset((void *)rxq_ctrl->tunnel_types, 0,
2778 		       sizeof(rxq_ctrl->tunnel_types));
2779 		q->tunnel = 0;
2780 	}
2781 }
2782 
2783 /**
2784  * Add all flows.
2785  *
2786  * @param dev
2787  *   Pointer to Ethernet device.
2788  * @param list
2789  *   Pointer to a TAILQ flow list.
2790  *
2791  * @return
2792  *   0 on success, a negative errno value otherwise and rte_errno is set.
2793  */
2794 int
2795 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2796 {
2797 	struct priv *priv = dev->data->dev_private;
2798 	struct rte_flow *flow;
2799 
2800 	TAILQ_FOREACH(flow, list, next) {
2801 		unsigned int i;
2802 
2803 		if (flow->drop) {
2804 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2805 				mlx5_glue->create_flow
2806 				(priv->flow_drop_queue->qp,
2807 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2808 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2809 				DRV_LOG(DEBUG,
2810 					"port %u flow %p cannot be applied",
2811 					dev->data->port_id, (void *)flow);
2812 				rte_errno = EINVAL;
2813 				return -rte_errno;
2814 			}
2815 			DRV_LOG(DEBUG, "port %u flow %p applied",
2816 				dev->data->port_id, (void *)flow);
2817 			/* Next flow. */
2818 			continue;
2819 		}
2820 		for (i = 0; i != hash_rxq_init_n; ++i) {
2821 			if (!flow->frxq[i].ibv_attr)
2822 				continue;
2823 			flow->frxq[i].hrxq =
2824 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2825 					      flow->rss_conf.key_len,
2826 					      flow->frxq[i].hash_fields,
2827 					      flow->rss_conf.queue,
2828 					      flow->rss_conf.queue_num,
2829 					      flow->tunnel,
2830 					      flow->rss_conf.level);
2831 			if (flow->frxq[i].hrxq)
2832 				goto flow_create;
2833 			flow->frxq[i].hrxq =
2834 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2835 					      flow->rss_conf.key_len,
2836 					      flow->frxq[i].hash_fields,
2837 					      flow->rss_conf.queue,
2838 					      flow->rss_conf.queue_num,
2839 					      flow->tunnel,
2840 					      flow->rss_conf.level);
2841 			if (!flow->frxq[i].hrxq) {
2842 				DRV_LOG(DEBUG,
2843 					"port %u flow %p cannot create hash"
2844 					" rxq",
2845 					dev->data->port_id, (void *)flow);
2846 				rte_errno = EINVAL;
2847 				return -rte_errno;
2848 			}
2849 flow_create:
2850 			mlx5_flow_dump(dev, flow, i);
2851 			flow->frxq[i].ibv_flow =
2852 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2853 						       flow->frxq[i].ibv_attr);
2854 			if (!flow->frxq[i].ibv_flow) {
2855 				DRV_LOG(DEBUG,
2856 					"port %u flow %p type %u cannot be"
2857 					" applied",
2858 					dev->data->port_id, (void *)flow, i);
2859 				rte_errno = EINVAL;
2860 				return -rte_errno;
2861 			}
2862 		}
2863 		mlx5_flow_create_update_rxqs(dev, flow);
2864 	}
2865 	return 0;
2866 }
2867 
2868 /**
2869  * Verify the flow list is empty
2870  *
2871  * @param dev
2872  *  Pointer to Ethernet device.
2873  *
2874  * @return the number of flows not released.
2875  */
2876 int
2877 mlx5_flow_verify(struct rte_eth_dev *dev)
2878 {
2879 	struct priv *priv = dev->data->dev_private;
2880 	struct rte_flow *flow;
2881 	int ret = 0;
2882 
2883 	TAILQ_FOREACH(flow, &priv->flows, next) {
2884 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2885 			dev->data->port_id, (void *)flow);
2886 		++ret;
2887 	}
2888 	return ret;
2889 }
2890 
2891 /**
2892  * Enable a control flow configured from the control plane.
2893  *
2894  * @param dev
2895  *   Pointer to Ethernet device.
2896  * @param eth_spec
2897  *   An Ethernet flow spec to apply.
2898  * @param eth_mask
2899  *   An Ethernet flow mask to apply.
2900  * @param vlan_spec
2901  *   A VLAN flow spec to apply.
2902  * @param vlan_mask
2903  *   A VLAN flow mask to apply.
2904  *
2905  * @return
2906  *   0 on success, a negative errno value otherwise and rte_errno is set.
2907  */
2908 int
2909 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2910 		    struct rte_flow_item_eth *eth_spec,
2911 		    struct rte_flow_item_eth *eth_mask,
2912 		    struct rte_flow_item_vlan *vlan_spec,
2913 		    struct rte_flow_item_vlan *vlan_mask)
2914 {
2915 	struct priv *priv = dev->data->dev_private;
2916 	const struct rte_flow_attr attr = {
2917 		.ingress = 1,
2918 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2919 	};
2920 	struct rte_flow_item items[] = {
2921 		{
2922 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2923 			.spec = eth_spec,
2924 			.last = NULL,
2925 			.mask = eth_mask,
2926 		},
2927 		{
2928 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2929 				RTE_FLOW_ITEM_TYPE_END,
2930 			.spec = vlan_spec,
2931 			.last = NULL,
2932 			.mask = vlan_mask,
2933 		},
2934 		{
2935 			.type = RTE_FLOW_ITEM_TYPE_END,
2936 		},
2937 	};
2938 	uint16_t queue[priv->reta_idx_n];
2939 	struct rte_flow_action_rss action_rss = {
2940 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2941 		.level = 0,
2942 		.types = priv->rss_conf.rss_hf,
2943 		.key_len = priv->rss_conf.rss_key_len,
2944 		.queue_num = priv->reta_idx_n,
2945 		.key = priv->rss_conf.rss_key,
2946 		.queue = queue,
2947 	};
2948 	struct rte_flow_action actions[] = {
2949 		{
2950 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2951 			.conf = &action_rss,
2952 		},
2953 		{
2954 			.type = RTE_FLOW_ACTION_TYPE_END,
2955 		},
2956 	};
2957 	struct rte_flow *flow;
2958 	struct rte_flow_error error;
2959 	unsigned int i;
2960 
2961 	if (!priv->reta_idx_n) {
2962 		rte_errno = EINVAL;
2963 		return -rte_errno;
2964 	}
2965 	for (i = 0; i != priv->reta_idx_n; ++i)
2966 		queue[i] = (*priv->reta_idx)[i];
2967 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2968 				     actions, &error);
2969 	if (!flow)
2970 		return -rte_errno;
2971 	return 0;
2972 }
2973 
2974 /**
2975  * Enable a flow control configured from the control plane.
2976  *
2977  * @param dev
2978  *   Pointer to Ethernet device.
2979  * @param eth_spec
2980  *   An Ethernet flow spec to apply.
2981  * @param eth_mask
2982  *   An Ethernet flow mask to apply.
2983  *
2984  * @return
2985  *   0 on success, a negative errno value otherwise and rte_errno is set.
2986  */
2987 int
2988 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2989 	       struct rte_flow_item_eth *eth_spec,
2990 	       struct rte_flow_item_eth *eth_mask)
2991 {
2992 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2993 }
2994 
2995 /**
2996  * Destroy a flow.
2997  *
2998  * @see rte_flow_destroy()
2999  * @see rte_flow_ops
3000  */
3001 int
3002 mlx5_flow_destroy(struct rte_eth_dev *dev,
3003 		  struct rte_flow *flow,
3004 		  struct rte_flow_error *error __rte_unused)
3005 {
3006 	struct priv *priv = dev->data->dev_private;
3007 
3008 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
3009 	return 0;
3010 }
3011 
3012 /**
3013  * Destroy all flows.
3014  *
3015  * @see rte_flow_flush()
3016  * @see rte_flow_ops
3017  */
3018 int
3019 mlx5_flow_flush(struct rte_eth_dev *dev,
3020 		struct rte_flow_error *error __rte_unused)
3021 {
3022 	struct priv *priv = dev->data->dev_private;
3023 
3024 	mlx5_flow_list_flush(dev, &priv->flows);
3025 	return 0;
3026 }
3027 
3028 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3029 /**
3030  * Query flow counter.
3031  *
3032  * @param cs
3033  *   the counter set.
3034  * @param counter_value
3035  *   returned data from the counter.
3036  *
3037  * @return
3038  *   0 on success, a negative errno value otherwise and rte_errno is set.
3039  */
3040 static int
3041 mlx5_flow_query_count(struct ibv_counter_set *cs,
3042 		      struct mlx5_flow_counter_stats *counter_stats,
3043 		      struct rte_flow_query_count *query_count,
3044 		      struct rte_flow_error *error)
3045 {
3046 	uint64_t counters[2];
3047 	struct ibv_query_counter_set_attr query_cs_attr = {
3048 		.cs = cs,
3049 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3050 	};
3051 	struct ibv_counter_set_data query_out = {
3052 		.out = counters,
3053 		.outlen = 2 * sizeof(uint64_t),
3054 	};
3055 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3056 
3057 	if (err)
3058 		return rte_flow_error_set(error, err,
3059 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3060 					  NULL,
3061 					  "cannot read counter");
3062 	query_count->hits_set = 1;
3063 	query_count->bytes_set = 1;
3064 	query_count->hits = counters[0] - counter_stats->hits;
3065 	query_count->bytes = counters[1] - counter_stats->bytes;
3066 	if (query_count->reset) {
3067 		counter_stats->hits = counters[0];
3068 		counter_stats->bytes = counters[1];
3069 	}
3070 	return 0;
3071 }
3072 
3073 /**
3074  * Query a flows.
3075  *
3076  * @see rte_flow_query()
3077  * @see rte_flow_ops
3078  */
3079 int
3080 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3081 		struct rte_flow *flow,
3082 		enum rte_flow_action_type action __rte_unused,
3083 		void *data,
3084 		struct rte_flow_error *error)
3085 {
3086 	if (flow->cs) {
3087 		int ret;
3088 
3089 		ret = mlx5_flow_query_count(flow->cs,
3090 					    &flow->counter_stats,
3091 					    (struct rte_flow_query_count *)data,
3092 					    error);
3093 		if (ret)
3094 			return ret;
3095 	} else {
3096 		return rte_flow_error_set(error, EINVAL,
3097 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3098 					  NULL,
3099 					  "no counter found for flow");
3100 	}
3101 	return 0;
3102 }
3103 #endif
3104 
3105 /**
3106  * Isolated mode.
3107  *
3108  * @see rte_flow_isolate()
3109  * @see rte_flow_ops
3110  */
3111 int
3112 mlx5_flow_isolate(struct rte_eth_dev *dev,
3113 		  int enable,
3114 		  struct rte_flow_error *error)
3115 {
3116 	struct priv *priv = dev->data->dev_private;
3117 
3118 	if (dev->data->dev_started) {
3119 		rte_flow_error_set(error, EBUSY,
3120 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3121 				   NULL,
3122 				   "port must be stopped first");
3123 		return -rte_errno;
3124 	}
3125 	priv->isolated = !!enable;
3126 	if (enable)
3127 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3128 	else
3129 		priv->dev->dev_ops = &mlx5_dev_ops;
3130 	return 0;
3131 }
3132 
3133 /**
3134  * Convert a flow director filter to a generic flow.
3135  *
3136  * @param dev
3137  *   Pointer to Ethernet device.
3138  * @param fdir_filter
3139  *   Flow director filter to add.
3140  * @param attributes
3141  *   Generic flow parameters structure.
3142  *
3143  * @return
3144  *   0 on success, a negative errno value otherwise and rte_errno is set.
3145  */
3146 static int
3147 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3148 			 const struct rte_eth_fdir_filter *fdir_filter,
3149 			 struct mlx5_fdir *attributes)
3150 {
3151 	struct priv *priv = dev->data->dev_private;
3152 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
3153 	const struct rte_eth_fdir_masks *mask =
3154 		&dev->data->dev_conf.fdir_conf.mask;
3155 
3156 	/* Validate queue number. */
3157 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3158 		DRV_LOG(ERR, "port %u invalid queue number %d",
3159 			dev->data->port_id, fdir_filter->action.rx_queue);
3160 		rte_errno = EINVAL;
3161 		return -rte_errno;
3162 	}
3163 	attributes->attr.ingress = 1;
3164 	attributes->items[0] = (struct rte_flow_item) {
3165 		.type = RTE_FLOW_ITEM_TYPE_ETH,
3166 		.spec = &attributes->l2,
3167 		.mask = &attributes->l2_mask,
3168 	};
3169 	switch (fdir_filter->action.behavior) {
3170 	case RTE_ETH_FDIR_ACCEPT:
3171 		attributes->actions[0] = (struct rte_flow_action){
3172 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
3173 			.conf = &attributes->queue,
3174 		};
3175 		break;
3176 	case RTE_ETH_FDIR_REJECT:
3177 		attributes->actions[0] = (struct rte_flow_action){
3178 			.type = RTE_FLOW_ACTION_TYPE_DROP,
3179 		};
3180 		break;
3181 	default:
3182 		DRV_LOG(ERR, "port %u invalid behavior %d",
3183 			dev->data->port_id,
3184 			fdir_filter->action.behavior);
3185 		rte_errno = ENOTSUP;
3186 		return -rte_errno;
3187 	}
3188 	attributes->queue.index = fdir_filter->action.rx_queue;
3189 	/* Handle L3. */
3190 	switch (fdir_filter->input.flow_type) {
3191 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3192 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3193 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3194 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3195 			.src_addr = input->flow.ip4_flow.src_ip,
3196 			.dst_addr = input->flow.ip4_flow.dst_ip,
3197 			.time_to_live = input->flow.ip4_flow.ttl,
3198 			.type_of_service = input->flow.ip4_flow.tos,
3199 			.next_proto_id = input->flow.ip4_flow.proto,
3200 		};
3201 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3202 			.src_addr = mask->ipv4_mask.src_ip,
3203 			.dst_addr = mask->ipv4_mask.dst_ip,
3204 			.time_to_live = mask->ipv4_mask.ttl,
3205 			.type_of_service = mask->ipv4_mask.tos,
3206 			.next_proto_id = mask->ipv4_mask.proto,
3207 		};
3208 		attributes->items[1] = (struct rte_flow_item){
3209 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
3210 			.spec = &attributes->l3,
3211 			.mask = &attributes->l3_mask,
3212 		};
3213 		break;
3214 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3215 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3216 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3217 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3218 			.hop_limits = input->flow.ipv6_flow.hop_limits,
3219 			.proto = input->flow.ipv6_flow.proto,
3220 		};
3221 
3222 		memcpy(attributes->l3.ipv6.hdr.src_addr,
3223 		       input->flow.ipv6_flow.src_ip,
3224 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3225 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
3226 		       input->flow.ipv6_flow.dst_ip,
3227 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3228 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3229 		       mask->ipv6_mask.src_ip,
3230 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3231 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3232 		       mask->ipv6_mask.dst_ip,
3233 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3234 		attributes->items[1] = (struct rte_flow_item){
3235 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
3236 			.spec = &attributes->l3,
3237 			.mask = &attributes->l3_mask,
3238 		};
3239 		break;
3240 	default:
3241 		DRV_LOG(ERR, "port %u invalid flow type%d",
3242 			dev->data->port_id, fdir_filter->input.flow_type);
3243 		rte_errno = ENOTSUP;
3244 		return -rte_errno;
3245 	}
3246 	/* Handle L4. */
3247 	switch (fdir_filter->input.flow_type) {
3248 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3249 		attributes->l4.udp.hdr = (struct udp_hdr){
3250 			.src_port = input->flow.udp4_flow.src_port,
3251 			.dst_port = input->flow.udp4_flow.dst_port,
3252 		};
3253 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3254 			.src_port = mask->src_port_mask,
3255 			.dst_port = mask->dst_port_mask,
3256 		};
3257 		attributes->items[2] = (struct rte_flow_item){
3258 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3259 			.spec = &attributes->l4,
3260 			.mask = &attributes->l4_mask,
3261 		};
3262 		break;
3263 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3264 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3265 			.src_port = input->flow.tcp4_flow.src_port,
3266 			.dst_port = input->flow.tcp4_flow.dst_port,
3267 		};
3268 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3269 			.src_port = mask->src_port_mask,
3270 			.dst_port = mask->dst_port_mask,
3271 		};
3272 		attributes->items[2] = (struct rte_flow_item){
3273 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3274 			.spec = &attributes->l4,
3275 			.mask = &attributes->l4_mask,
3276 		};
3277 		break;
3278 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3279 		attributes->l4.udp.hdr = (struct udp_hdr){
3280 			.src_port = input->flow.udp6_flow.src_port,
3281 			.dst_port = input->flow.udp6_flow.dst_port,
3282 		};
3283 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3284 			.src_port = mask->src_port_mask,
3285 			.dst_port = mask->dst_port_mask,
3286 		};
3287 		attributes->items[2] = (struct rte_flow_item){
3288 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3289 			.spec = &attributes->l4,
3290 			.mask = &attributes->l4_mask,
3291 		};
3292 		break;
3293 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3294 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3295 			.src_port = input->flow.tcp6_flow.src_port,
3296 			.dst_port = input->flow.tcp6_flow.dst_port,
3297 		};
3298 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3299 			.src_port = mask->src_port_mask,
3300 			.dst_port = mask->dst_port_mask,
3301 		};
3302 		attributes->items[2] = (struct rte_flow_item){
3303 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3304 			.spec = &attributes->l4,
3305 			.mask = &attributes->l4_mask,
3306 		};
3307 		break;
3308 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3309 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3310 		break;
3311 	default:
3312 		DRV_LOG(ERR, "port %u invalid flow type%d",
3313 			dev->data->port_id, fdir_filter->input.flow_type);
3314 		rte_errno = ENOTSUP;
3315 		return -rte_errno;
3316 	}
3317 	return 0;
3318 }
3319 
3320 /**
3321  * Add new flow director filter and store it in list.
3322  *
3323  * @param dev
3324  *   Pointer to Ethernet device.
3325  * @param fdir_filter
3326  *   Flow director filter to add.
3327  *
3328  * @return
3329  *   0 on success, a negative errno value otherwise and rte_errno is set.
3330  */
3331 static int
3332 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3333 		     const struct rte_eth_fdir_filter *fdir_filter)
3334 {
3335 	struct priv *priv = dev->data->dev_private;
3336 	struct mlx5_fdir attributes = {
3337 		.attr.group = 0,
3338 		.l2_mask = {
3339 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3340 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3341 			.type = 0,
3342 		},
3343 	};
3344 	struct mlx5_flow_parse parser = {
3345 		.layer = HASH_RXQ_ETH,
3346 	};
3347 	struct rte_flow_error error;
3348 	struct rte_flow *flow;
3349 	int ret;
3350 
3351 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3352 	if (ret)
3353 		return ret;
3354 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3355 				attributes.actions, &error, &parser);
3356 	if (ret)
3357 		return ret;
3358 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3359 				     attributes.items, attributes.actions,
3360 				     &error);
3361 	if (flow) {
3362 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3363 			(void *)flow);
3364 		return 0;
3365 	}
3366 	return -rte_errno;
3367 }
3368 
3369 /**
3370  * Delete specific filter.
3371  *
3372  * @param dev
3373  *   Pointer to Ethernet device.
3374  * @param fdir_filter
3375  *   Filter to be deleted.
3376  *
3377  * @return
3378  *   0 on success, a negative errno value otherwise and rte_errno is set.
3379  */
3380 static int
3381 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3382 			const struct rte_eth_fdir_filter *fdir_filter)
3383 {
3384 	struct priv *priv = dev->data->dev_private;
3385 	struct mlx5_fdir attributes = {
3386 		.attr.group = 0,
3387 	};
3388 	struct mlx5_flow_parse parser = {
3389 		.create = 1,
3390 		.layer = HASH_RXQ_ETH,
3391 	};
3392 	struct rte_flow_error error;
3393 	struct rte_flow *flow;
3394 	unsigned int i;
3395 	int ret;
3396 
3397 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3398 	if (ret)
3399 		return ret;
3400 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3401 				attributes.actions, &error, &parser);
3402 	if (ret)
3403 		goto exit;
3404 	/*
3405 	 * Special case for drop action which is only set in the
3406 	 * specifications when the flow is created.  In this situation the
3407 	 * drop specification is missing.
3408 	 */
3409 	if (parser.drop) {
3410 		struct ibv_flow_spec_action_drop *drop;
3411 
3412 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3413 				parser.queue[HASH_RXQ_ETH].offset);
3414 		*drop = (struct ibv_flow_spec_action_drop){
3415 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3416 			.size = sizeof(struct ibv_flow_spec_action_drop),
3417 		};
3418 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3419 	}
3420 	TAILQ_FOREACH(flow, &priv->flows, next) {
3421 		struct ibv_flow_attr *attr;
3422 		struct ibv_spec_header *attr_h;
3423 		void *spec;
3424 		struct ibv_flow_attr *flow_attr;
3425 		struct ibv_spec_header *flow_h;
3426 		void *flow_spec;
3427 		unsigned int specs_n;
3428 
3429 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3430 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3431 		/* Compare first the attributes. */
3432 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3433 			continue;
3434 		if (attr->num_of_specs == 0)
3435 			continue;
3436 		spec = (void *)((uintptr_t)attr +
3437 				sizeof(struct ibv_flow_attr));
3438 		flow_spec = (void *)((uintptr_t)flow_attr +
3439 				     sizeof(struct ibv_flow_attr));
3440 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3441 		for (i = 0; i != specs_n; ++i) {
3442 			attr_h = spec;
3443 			flow_h = flow_spec;
3444 			if (memcmp(spec, flow_spec,
3445 				   RTE_MIN(attr_h->size, flow_h->size)))
3446 				goto wrong_flow;
3447 			spec = (void *)((uintptr_t)spec + attr_h->size);
3448 			flow_spec = (void *)((uintptr_t)flow_spec +
3449 					     flow_h->size);
3450 		}
3451 		/* At this point, the flow match. */
3452 		break;
3453 wrong_flow:
3454 		/* The flow does not match. */
3455 		continue;
3456 	}
3457 	ret = rte_errno; /* Save rte_errno before cleanup. */
3458 	if (flow)
3459 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
3460 exit:
3461 	for (i = 0; i != hash_rxq_init_n; ++i) {
3462 		if (parser.queue[i].ibv_attr)
3463 			rte_free(parser.queue[i].ibv_attr);
3464 	}
3465 	rte_errno = ret; /* Restore rte_errno. */
3466 	return -rte_errno;
3467 }
3468 
3469 /**
3470  * Update queue for specific filter.
3471  *
3472  * @param dev
3473  *   Pointer to Ethernet device.
3474  * @param fdir_filter
3475  *   Filter to be updated.
3476  *
3477  * @return
3478  *   0 on success, a negative errno value otherwise and rte_errno is set.
3479  */
3480 static int
3481 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3482 			const struct rte_eth_fdir_filter *fdir_filter)
3483 {
3484 	int ret;
3485 
3486 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3487 	if (ret)
3488 		return ret;
3489 	return mlx5_fdir_filter_add(dev, fdir_filter);
3490 }
3491 
3492 /**
3493  * Flush all filters.
3494  *
3495  * @param dev
3496  *   Pointer to Ethernet device.
3497  */
3498 static void
3499 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3500 {
3501 	struct priv *priv = dev->data->dev_private;
3502 
3503 	mlx5_flow_list_flush(dev, &priv->flows);
3504 }
3505 
3506 /**
3507  * Get flow director information.
3508  *
3509  * @param dev
3510  *   Pointer to Ethernet device.
3511  * @param[out] fdir_info
3512  *   Resulting flow director information.
3513  */
3514 static void
3515 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3516 {
3517 	struct priv *priv = dev->data->dev_private;
3518 	struct rte_eth_fdir_masks *mask =
3519 		&priv->dev->data->dev_conf.fdir_conf.mask;
3520 
3521 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3522 	fdir_info->guarant_spc = 0;
3523 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3524 	fdir_info->max_flexpayload = 0;
3525 	fdir_info->flow_types_mask[0] = 0;
3526 	fdir_info->flex_payload_unit = 0;
3527 	fdir_info->max_flex_payload_segment_num = 0;
3528 	fdir_info->flex_payload_limit = 0;
3529 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3530 }
3531 
3532 /**
3533  * Deal with flow director operations.
3534  *
3535  * @param dev
3536  *   Pointer to Ethernet device.
3537  * @param filter_op
3538  *   Operation to perform.
3539  * @param arg
3540  *   Pointer to operation-specific structure.
3541  *
3542  * @return
3543  *   0 on success, a negative errno value otherwise and rte_errno is set.
3544  */
3545 static int
3546 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3547 		    void *arg)
3548 {
3549 	struct priv *priv = dev->data->dev_private;
3550 	enum rte_fdir_mode fdir_mode =
3551 		priv->dev->data->dev_conf.fdir_conf.mode;
3552 
3553 	if (filter_op == RTE_ETH_FILTER_NOP)
3554 		return 0;
3555 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3556 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3557 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3558 			dev->data->port_id, fdir_mode);
3559 		rte_errno = EINVAL;
3560 		return -rte_errno;
3561 	}
3562 	switch (filter_op) {
3563 	case RTE_ETH_FILTER_ADD:
3564 		return mlx5_fdir_filter_add(dev, arg);
3565 	case RTE_ETH_FILTER_UPDATE:
3566 		return mlx5_fdir_filter_update(dev, arg);
3567 	case RTE_ETH_FILTER_DELETE:
3568 		return mlx5_fdir_filter_delete(dev, arg);
3569 	case RTE_ETH_FILTER_FLUSH:
3570 		mlx5_fdir_filter_flush(dev);
3571 		break;
3572 	case RTE_ETH_FILTER_INFO:
3573 		mlx5_fdir_info_get(dev, arg);
3574 		break;
3575 	default:
3576 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3577 			dev->data->port_id, filter_op);
3578 		rte_errno = EINVAL;
3579 		return -rte_errno;
3580 	}
3581 	return 0;
3582 }
3583 
3584 /**
3585  * Manage filter operations.
3586  *
3587  * @param dev
3588  *   Pointer to Ethernet device structure.
3589  * @param filter_type
3590  *   Filter type.
3591  * @param filter_op
3592  *   Operation to perform.
3593  * @param arg
3594  *   Pointer to operation-specific structure.
3595  *
3596  * @return
3597  *   0 on success, a negative errno value otherwise and rte_errno is set.
3598  */
3599 int
3600 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3601 		     enum rte_filter_type filter_type,
3602 		     enum rte_filter_op filter_op,
3603 		     void *arg)
3604 {
3605 	switch (filter_type) {
3606 	case RTE_ETH_FILTER_GENERIC:
3607 		if (filter_op != RTE_ETH_FILTER_GET) {
3608 			rte_errno = EINVAL;
3609 			return -rte_errno;
3610 		}
3611 		*(const void **)arg = &mlx5_flow_ops;
3612 		return 0;
3613 	case RTE_ETH_FILTER_FDIR:
3614 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3615 	default:
3616 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3617 			dev->data->port_id, filter_type);
3618 		rte_errno = ENOTSUP;
3619 		return -rte_errno;
3620 	}
3621 	return 0;
3622 }
3623 
3624 /**
3625  * Detect number of Verbs flow priorities supported.
3626  *
3627  * @param dev
3628  *   Pointer to Ethernet device.
3629  *
3630  * @return
3631  *   number of supported Verbs flow priority.
3632  */
3633 unsigned int
3634 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3635 {
3636 	struct priv *priv = dev->data->dev_private;
3637 	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3638 	struct {
3639 		struct ibv_flow_attr attr;
3640 		struct ibv_flow_spec_eth eth;
3641 		struct ibv_flow_spec_action_drop drop;
3642 	} flow_attr = {
3643 		.attr = {
3644 			.num_of_specs = 2,
3645 		},
3646 		.eth = {
3647 			.type = IBV_FLOW_SPEC_ETH,
3648 			.size = sizeof(struct ibv_flow_spec_eth),
3649 		},
3650 		.drop = {
3651 			.size = sizeof(struct ibv_flow_spec_action_drop),
3652 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3653 		},
3654 	};
3655 	struct ibv_flow *flow;
3656 
3657 	do {
3658 		flow_attr.attr.priority = verb_priorities - 1;
3659 		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3660 					      &flow_attr.attr);
3661 		if (flow) {
3662 			claim_zero(mlx5_glue->destroy_flow(flow));
3663 			/* Try more priorities. */
3664 			verb_priorities *= 2;
3665 		} else {
3666 			/* Failed, restore last right number. */
3667 			verb_priorities /= 2;
3668 			break;
3669 		}
3670 	} while (1);
3671 	DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3672 		" user flow priorities: %d",
3673 		dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3674 	return verb_priorities;
3675 }
3676