xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision c73a9071877a1b80d01517b9c6205b9b3b503f59)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36 
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41 
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44 	int dummy;
45 };
46 #endif
47 
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51 
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 	struct rte_eth_dev *dev; /** Ethernet device. */
55 	struct mlx5_flow_parse *parser; /** Parser context. */
56 	struct rte_flow_error *error; /** Error context. */
57 };
58 
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 		     const void *default_mask,
62 		     struct mlx5_flow_data *data);
63 
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 		      const void *default_mask,
67 		      struct mlx5_flow_data *data);
68 
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 		      const void *default_mask,
72 		      struct mlx5_flow_data *data);
73 
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 		      const void *default_mask,
77 		      struct mlx5_flow_data *data);
78 
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 		     const void *default_mask,
82 		     struct mlx5_flow_data *data);
83 
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 		     const void *default_mask,
87 		     struct mlx5_flow_data *data);
88 
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 		       const void *default_mask,
92 		       struct mlx5_flow_data *data);
93 
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 			   const void *default_mask,
97 			   struct mlx5_flow_data *data);
98 
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 		     const void *default_mask,
102 		     struct mlx5_flow_data *data);
103 
104 struct mlx5_flow_parse;
105 
106 static void
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
108 		      unsigned int size);
109 
110 static int
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
112 
113 static int
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
115 
116 /* Hash RX queue types. */
117 enum hash_rxq_type {
118 	HASH_RXQ_TCPV4,
119 	HASH_RXQ_UDPV4,
120 	HASH_RXQ_IPV4,
121 	HASH_RXQ_TCPV6,
122 	HASH_RXQ_UDPV6,
123 	HASH_RXQ_IPV6,
124 	HASH_RXQ_ETH,
125 	HASH_RXQ_TUNNEL,
126 };
127 
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130 	uint64_t hash_fields; /* Fields that participate in the hash. */
131 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132 	unsigned int flow_priority; /* Flow priority to use. */
133 	unsigned int ip_version; /* Internet protocol. */
134 };
135 
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
138 	[HASH_RXQ_TCPV4] = {
139 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140 				IBV_RX_HASH_DST_IPV4 |
141 				IBV_RX_HASH_SRC_PORT_TCP |
142 				IBV_RX_HASH_DST_PORT_TCP),
143 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
144 		.flow_priority = 0,
145 		.ip_version = MLX5_IPV4,
146 	},
147 	[HASH_RXQ_UDPV4] = {
148 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149 				IBV_RX_HASH_DST_IPV4 |
150 				IBV_RX_HASH_SRC_PORT_UDP |
151 				IBV_RX_HASH_DST_PORT_UDP),
152 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
153 		.flow_priority = 0,
154 		.ip_version = MLX5_IPV4,
155 	},
156 	[HASH_RXQ_IPV4] = {
157 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158 				IBV_RX_HASH_DST_IPV4),
159 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
160 				ETH_RSS_FRAG_IPV4),
161 		.flow_priority = 1,
162 		.ip_version = MLX5_IPV4,
163 	},
164 	[HASH_RXQ_TCPV6] = {
165 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166 				IBV_RX_HASH_DST_IPV6 |
167 				IBV_RX_HASH_SRC_PORT_TCP |
168 				IBV_RX_HASH_DST_PORT_TCP),
169 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
170 		.flow_priority = 0,
171 		.ip_version = MLX5_IPV6,
172 	},
173 	[HASH_RXQ_UDPV6] = {
174 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175 				IBV_RX_HASH_DST_IPV6 |
176 				IBV_RX_HASH_SRC_PORT_UDP |
177 				IBV_RX_HASH_DST_PORT_UDP),
178 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
179 		.flow_priority = 0,
180 		.ip_version = MLX5_IPV6,
181 	},
182 	[HASH_RXQ_IPV6] = {
183 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184 				IBV_RX_HASH_DST_IPV6),
185 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
186 				ETH_RSS_FRAG_IPV6),
187 		.flow_priority = 1,
188 		.ip_version = MLX5_IPV6,
189 	},
190 	[HASH_RXQ_ETH] = {
191 		.hash_fields = 0,
192 		.dpdk_rss_hf = 0,
193 		.flow_priority = 2,
194 	},
195 };
196 
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
199 
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202 	uint64_t hits; /**< Number of packets matched by the rule. */
203 	uint64_t bytes; /**< Number of bytes matched by the rule. */
204 };
205 
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209 	struct ibv_qp *qp; /**< Verbs queue pair. */
210 	struct ibv_wq *wq; /**< Verbs work queue. */
211 	struct ibv_cq *cq; /**< Verbs completion queue. */
212 };
213 
214 /* Flows structures. */
215 struct mlx5_flow {
216 	uint64_t hash_fields; /**< Fields that participate in the hash. */
217 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
219 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
220 };
221 
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
226 };
227 
228 struct rte_flow {
229 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230 	uint32_t mark:1; /**< Set if the flow is marked. */
231 	uint32_t drop:1; /**< Drop queue. */
232 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233 	uint16_t (*queues)[]; /**< Queues indexes to use. */
234 	uint8_t rss_key[40]; /**< copy of the RSS key. */
235 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239 	/**< Flow with Rx queue. */
240 };
241 
242 /** Static initializer for items. */
243 #define ITEMS(...) \
244 	(const enum rte_flow_item_type []){ \
245 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
246 	}
247 
248 #define IS_TUNNEL(type) ( \
249 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251 	(type) == RTE_FLOW_ITEM_TYPE_GRE)
252 
253 const uint32_t flow_ptype[] = {
254 	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256 	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
257 };
258 
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
260 
261 const uint32_t ptype_ext[] = {
262 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
263 					      RTE_PTYPE_L4_UDP,
264 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]	= RTE_PTYPE_TUNNEL_VXLAN_GPE |
265 						  RTE_PTYPE_L4_UDP,
266 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
267 };
268 
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271 	/** List of possible actions for these items. */
272 	const enum rte_flow_action_type *const actions;
273 	/** Bit-masks corresponding to the possibilities for the item. */
274 	const void *mask;
275 	/**
276 	 * Default bit-masks to use when item->mask is not provided. When
277 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
278 	 * used instead.
279 	 */
280 	const void *default_mask;
281 	/** Bit-masks size in bytes. */
282 	const unsigned int mask_sz;
283 	/**
284 	 * Conversion function from rte_flow to NIC specific flow.
285 	 *
286 	 * @param item
287 	 *   rte_flow item to convert.
288 	 * @param default_mask
289 	 *   Default bit-masks to use when item->mask is not provided.
290 	 * @param data
291 	 *   Internal structure to store the conversion.
292 	 *
293 	 * @return
294 	 *   0 on success, a negative errno value otherwise and rte_errno is
295 	 *   set.
296 	 */
297 	int (*convert)(const struct rte_flow_item *item,
298 		       const void *default_mask,
299 		       struct mlx5_flow_data *data);
300 	/** Size in bytes of the destination structure. */
301 	const unsigned int dst_sz;
302 	/** List of possible following items.  */
303 	const enum rte_flow_item_type *const items;
304 };
305 
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308 	RTE_FLOW_ACTION_TYPE_DROP,
309 	RTE_FLOW_ACTION_TYPE_QUEUE,
310 	RTE_FLOW_ACTION_TYPE_MARK,
311 	RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313 	RTE_FLOW_ACTION_TYPE_COUNT,
314 #endif
315 	RTE_FLOW_ACTION_TYPE_END,
316 };
317 
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320 	[RTE_FLOW_ITEM_TYPE_END] = {
321 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322 			       RTE_FLOW_ITEM_TYPE_VXLAN,
323 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324 			       RTE_FLOW_ITEM_TYPE_GRE),
325 	},
326 	[RTE_FLOW_ITEM_TYPE_ETH] = {
327 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328 			       RTE_FLOW_ITEM_TYPE_IPV4,
329 			       RTE_FLOW_ITEM_TYPE_IPV6),
330 		.actions = valid_actions,
331 		.mask = &(const struct rte_flow_item_eth){
332 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
334 			.type = -1,
335 		},
336 		.default_mask = &rte_flow_item_eth_mask,
337 		.mask_sz = sizeof(struct rte_flow_item_eth),
338 		.convert = mlx5_flow_create_eth,
339 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
340 	},
341 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
342 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343 			       RTE_FLOW_ITEM_TYPE_IPV6),
344 		.actions = valid_actions,
345 		.mask = &(const struct rte_flow_item_vlan){
346 			.tci = -1,
347 			.inner_type = -1,
348 		},
349 		.default_mask = &rte_flow_item_vlan_mask,
350 		.mask_sz = sizeof(struct rte_flow_item_vlan),
351 		.convert = mlx5_flow_create_vlan,
352 		.dst_sz = 0,
353 	},
354 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
355 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356 			       RTE_FLOW_ITEM_TYPE_TCP,
357 			       RTE_FLOW_ITEM_TYPE_GRE),
358 		.actions = valid_actions,
359 		.mask = &(const struct rte_flow_item_ipv4){
360 			.hdr = {
361 				.src_addr = -1,
362 				.dst_addr = -1,
363 				.type_of_service = -1,
364 				.next_proto_id = -1,
365 			},
366 		},
367 		.default_mask = &rte_flow_item_ipv4_mask,
368 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
369 		.convert = mlx5_flow_create_ipv4,
370 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
371 	},
372 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
373 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374 			       RTE_FLOW_ITEM_TYPE_TCP,
375 			       RTE_FLOW_ITEM_TYPE_GRE),
376 		.actions = valid_actions,
377 		.mask = &(const struct rte_flow_item_ipv6){
378 			.hdr = {
379 				.src_addr = {
380 					0xff, 0xff, 0xff, 0xff,
381 					0xff, 0xff, 0xff, 0xff,
382 					0xff, 0xff, 0xff, 0xff,
383 					0xff, 0xff, 0xff, 0xff,
384 				},
385 				.dst_addr = {
386 					0xff, 0xff, 0xff, 0xff,
387 					0xff, 0xff, 0xff, 0xff,
388 					0xff, 0xff, 0xff, 0xff,
389 					0xff, 0xff, 0xff, 0xff,
390 				},
391 				.vtc_flow = -1,
392 				.proto = -1,
393 				.hop_limits = -1,
394 			},
395 		},
396 		.default_mask = &rte_flow_item_ipv6_mask,
397 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
398 		.convert = mlx5_flow_create_ipv6,
399 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
400 	},
401 	[RTE_FLOW_ITEM_TYPE_UDP] = {
402 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404 		.actions = valid_actions,
405 		.mask = &(const struct rte_flow_item_udp){
406 			.hdr = {
407 				.src_port = -1,
408 				.dst_port = -1,
409 			},
410 		},
411 		.default_mask = &rte_flow_item_udp_mask,
412 		.mask_sz = sizeof(struct rte_flow_item_udp),
413 		.convert = mlx5_flow_create_udp,
414 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415 	},
416 	[RTE_FLOW_ITEM_TYPE_TCP] = {
417 		.actions = valid_actions,
418 		.mask = &(const struct rte_flow_item_tcp){
419 			.hdr = {
420 				.src_port = -1,
421 				.dst_port = -1,
422 			},
423 		},
424 		.default_mask = &rte_flow_item_tcp_mask,
425 		.mask_sz = sizeof(struct rte_flow_item_tcp),
426 		.convert = mlx5_flow_create_tcp,
427 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428 	},
429 	[RTE_FLOW_ITEM_TYPE_GRE] = {
430 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431 			       RTE_FLOW_ITEM_TYPE_IPV4,
432 			       RTE_FLOW_ITEM_TYPE_IPV6),
433 		.actions = valid_actions,
434 		.mask = &(const struct rte_flow_item_gre){
435 			.protocol = -1,
436 		},
437 		.default_mask = &rte_flow_item_gre_mask,
438 		.mask_sz = sizeof(struct rte_flow_item_gre),
439 		.convert = mlx5_flow_create_gre,
440 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
441 	},
442 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
443 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444 			       RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445 			       RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446 		.actions = valid_actions,
447 		.mask = &(const struct rte_flow_item_vxlan){
448 			.vni = "\xff\xff\xff",
449 		},
450 		.default_mask = &rte_flow_item_vxlan_mask,
451 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
452 		.convert = mlx5_flow_create_vxlan,
453 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
454 	},
455 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457 			       RTE_FLOW_ITEM_TYPE_IPV4,
458 			       RTE_FLOW_ITEM_TYPE_IPV6),
459 		.actions = valid_actions,
460 		.mask = &(const struct rte_flow_item_vxlan_gpe){
461 			.vni = "\xff\xff\xff",
462 		},
463 		.default_mask = &rte_flow_item_vxlan_gpe_mask,
464 		.mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465 		.convert = mlx5_flow_create_vxlan_gpe,
466 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
467 	},
468 };
469 
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472 	uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
473 	uint32_t create:1;
474 	/**< Whether resources should remain after a validate. */
475 	uint32_t drop:1; /**< Target is a drop queue. */
476 	uint32_t mark:1; /**< Mark is present in the flow. */
477 	uint32_t count:1; /**< Count is present in the flow. */
478 	uint32_t mark_id; /**< Mark identifier. */
479 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481 	uint8_t rss_key[40]; /**< copy of the RSS key. */
482 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
483 	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
486 	struct {
487 		struct ibv_flow_attr *ibv_attr;
488 		/**< Pointer to Verbs attributes. */
489 		unsigned int offset;
490 		/**< Current position or total size of the attribute. */
491 		uint64_t hash_fields; /**< Verbs hash fields. */
492 	} queue[RTE_DIM(hash_rxq_init)];
493 };
494 
495 static const struct rte_flow_ops mlx5_flow_ops = {
496 	.validate = mlx5_flow_validate,
497 	.create = mlx5_flow_create,
498 	.destroy = mlx5_flow_destroy,
499 	.flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501 	.query = mlx5_flow_query,
502 #else
503 	.query = NULL,
504 #endif
505 	.isolate = mlx5_flow_isolate,
506 };
507 
508 /* Convert FDIR request to Generic flow. */
509 struct mlx5_fdir {
510 	struct rte_flow_attr attr;
511 	struct rte_flow_action actions[2];
512 	struct rte_flow_item items[4];
513 	struct rte_flow_item_eth l2;
514 	struct rte_flow_item_eth l2_mask;
515 	union {
516 		struct rte_flow_item_ipv4 ipv4;
517 		struct rte_flow_item_ipv6 ipv6;
518 	} l3;
519 	union {
520 		struct rte_flow_item_ipv4 ipv4;
521 		struct rte_flow_item_ipv6 ipv6;
522 	} l3_mask;
523 	union {
524 		struct rte_flow_item_udp udp;
525 		struct rte_flow_item_tcp tcp;
526 	} l4;
527 	union {
528 		struct rte_flow_item_udp udp;
529 		struct rte_flow_item_tcp tcp;
530 	} l4_mask;
531 	struct rte_flow_action_queue queue;
532 };
533 
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536 	enum ibv_flow_spec_type type;
537 	uint16_t size;
538 };
539 
540 /**
541  * Check item is fully supported by the NIC matching capability.
542  *
543  * @param item[in]
544  *   Item specification.
545  * @param mask[in]
546  *   Bit-masks covering supported fields to compare with spec, last and mask in
547  *   \item.
548  * @param size
549  *   Bit-Mask size in bytes.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556 			const uint8_t *mask, unsigned int size)
557 {
558 	unsigned int i;
559 	const uint8_t *spec = item->spec;
560 	const uint8_t *last = item->last;
561 	const uint8_t *m = item->mask ? item->mask : mask;
562 
563 	if (!spec && (item->mask || last))
564 		goto error;
565 	if (!spec)
566 		return 0;
567 	/*
568 	 * Single-pass check to make sure that:
569 	 * - item->mask is supported, no bits are set outside mask.
570 	 * - Both masked item->spec and item->last are equal (no range
571 	 *   supported).
572 	 */
573 	for (i = 0; i < size; i++) {
574 		if (!m[i])
575 			continue;
576 		if ((m[i] | mask[i]) != mask[i])
577 			goto error;
578 		if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
579 			goto error;
580 	}
581 	return 0;
582 error:
583 	rte_errno = ENOTSUP;
584 	return -rte_errno;
585 }
586 
587 /**
588  * Extract attribute to the parser.
589  *
590  * @param[in] attr
591  *   Flow rule attributes.
592  * @param[out] error
593  *   Perform verbose error reporting if not NULL.
594  *
595  * @return
596  *   0 on success, a negative errno value otherwise and rte_errno is set.
597  */
598 static int
599 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
600 			     struct rte_flow_error *error)
601 {
602 	if (attr->group) {
603 		rte_flow_error_set(error, ENOTSUP,
604 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
605 				   NULL,
606 				   "groups are not supported");
607 		return -rte_errno;
608 	}
609 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
610 		rte_flow_error_set(error, ENOTSUP,
611 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
612 				   NULL,
613 				   "priorities are not supported");
614 		return -rte_errno;
615 	}
616 	if (attr->egress) {
617 		rte_flow_error_set(error, ENOTSUP,
618 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
619 				   NULL,
620 				   "egress is not supported");
621 		return -rte_errno;
622 	}
623 	if (attr->transfer) {
624 		rte_flow_error_set(error, ENOTSUP,
625 				   RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
626 				   NULL,
627 				   "transfer is not supported");
628 		return -rte_errno;
629 	}
630 	if (!attr->ingress) {
631 		rte_flow_error_set(error, ENOTSUP,
632 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
633 				   NULL,
634 				   "only ingress is supported");
635 		return -rte_errno;
636 	}
637 	return 0;
638 }
639 
640 /**
641  * Extract actions request to the parser.
642  *
643  * @param dev
644  *   Pointer to Ethernet device.
645  * @param[in] actions
646  *   Associated actions (list terminated by the END action).
647  * @param[out] error
648  *   Perform verbose error reporting if not NULL.
649  * @param[in, out] parser
650  *   Internal parser structure.
651  *
652  * @return
653  *   0 on success, a negative errno value otherwise and rte_errno is set.
654  */
655 static int
656 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
657 			  const struct rte_flow_action actions[],
658 			  struct rte_flow_error *error,
659 			  struct mlx5_flow_parse *parser)
660 {
661 	enum { FATE = 1, MARK = 2, COUNT = 4, };
662 	uint32_t overlap = 0;
663 	struct priv *priv = dev->data->dev_private;
664 
665 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
666 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
667 			continue;
668 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
669 			if (overlap & FATE)
670 				goto exit_action_overlap;
671 			overlap |= FATE;
672 			parser->drop = 1;
673 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
674 			const struct rte_flow_action_queue *queue =
675 				(const struct rte_flow_action_queue *)
676 				actions->conf;
677 
678 			if (overlap & FATE)
679 				goto exit_action_overlap;
680 			overlap |= FATE;
681 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
682 				goto exit_action_not_supported;
683 			parser->queues[0] = queue->index;
684 			parser->rss_conf = (struct rte_flow_action_rss){
685 				.queue_num = 1,
686 				.queue = parser->queues,
687 			};
688 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
689 			const struct rte_flow_action_rss *rss =
690 				(const struct rte_flow_action_rss *)
691 				actions->conf;
692 			const uint8_t *rss_key;
693 			uint32_t rss_key_len;
694 			uint16_t n;
695 
696 			if (overlap & FATE)
697 				goto exit_action_overlap;
698 			overlap |= FATE;
699 			if (rss->func &&
700 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
701 				rte_flow_error_set(error, EINVAL,
702 						   RTE_FLOW_ERROR_TYPE_ACTION,
703 						   actions,
704 						   "the only supported RSS hash"
705 						   " function is Toeplitz");
706 				return -rte_errno;
707 			}
708 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
709 			if (parser->rss_conf.level > 1) {
710 				rte_flow_error_set(error, EINVAL,
711 						   RTE_FLOW_ERROR_TYPE_ACTION,
712 						   actions,
713 						   "a nonzero RSS encapsulation"
714 						   " level is not supported");
715 				return -rte_errno;
716 			}
717 #endif
718 			if (parser->rss_conf.level > 2) {
719 				rte_flow_error_set(error, EINVAL,
720 						   RTE_FLOW_ERROR_TYPE_ACTION,
721 						   actions,
722 						   "RSS encapsulation level"
723 						   " > 1 is not supported");
724 				return -rte_errno;
725 			}
726 			if (rss->types & MLX5_RSS_HF_MASK) {
727 				rte_flow_error_set(error, EINVAL,
728 						   RTE_FLOW_ERROR_TYPE_ACTION,
729 						   actions,
730 						   "unsupported RSS type"
731 						   " requested");
732 				return -rte_errno;
733 			}
734 			if (rss->key_len) {
735 				rss_key_len = rss->key_len;
736 				rss_key = rss->key;
737 			} else {
738 				rss_key_len = rss_hash_default_key_len;
739 				rss_key = rss_hash_default_key;
740 			}
741 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
742 				rte_flow_error_set(error, EINVAL,
743 						   RTE_FLOW_ERROR_TYPE_ACTION,
744 						   actions,
745 						   "RSS hash key must be"
746 						   " exactly 40 bytes long");
747 				return -rte_errno;
748 			}
749 			if (!rss->queue_num) {
750 				rte_flow_error_set(error, EINVAL,
751 						   RTE_FLOW_ERROR_TYPE_ACTION,
752 						   actions,
753 						   "no valid queues");
754 				return -rte_errno;
755 			}
756 			if (rss->queue_num > RTE_DIM(parser->queues)) {
757 				rte_flow_error_set(error, EINVAL,
758 						   RTE_FLOW_ERROR_TYPE_ACTION,
759 						   actions,
760 						   "too many queues for RSS"
761 						   " context");
762 				return -rte_errno;
763 			}
764 			for (n = 0; n < rss->queue_num; ++n) {
765 				if (rss->queue[n] >= priv->rxqs_n) {
766 					rte_flow_error_set(error, EINVAL,
767 						   RTE_FLOW_ERROR_TYPE_ACTION,
768 						   actions,
769 						   "queue id > number of"
770 						   " queues");
771 					return -rte_errno;
772 				}
773 			}
774 			parser->rss_conf = (struct rte_flow_action_rss){
775 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
776 				.level = rss->level,
777 				.types = rss->types,
778 				.key_len = rss_key_len,
779 				.queue_num = rss->queue_num,
780 				.key = memcpy(parser->rss_key, rss_key,
781 					      sizeof(*rss_key) * rss_key_len),
782 				.queue = memcpy(parser->queues, rss->queue,
783 						sizeof(*rss->queue) *
784 						rss->queue_num),
785 			};
786 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
787 			const struct rte_flow_action_mark *mark =
788 				(const struct rte_flow_action_mark *)
789 				actions->conf;
790 
791 			if (overlap & MARK)
792 				goto exit_action_overlap;
793 			overlap |= MARK;
794 			if (!mark) {
795 				rte_flow_error_set(error, EINVAL,
796 						   RTE_FLOW_ERROR_TYPE_ACTION,
797 						   actions,
798 						   "mark must be defined");
799 				return -rte_errno;
800 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
801 				rte_flow_error_set(error, ENOTSUP,
802 						   RTE_FLOW_ERROR_TYPE_ACTION,
803 						   actions,
804 						   "mark must be between 0"
805 						   " and 16777199");
806 				return -rte_errno;
807 			}
808 			parser->mark = 1;
809 			parser->mark_id = mark->id;
810 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
811 			if (overlap & MARK)
812 				goto exit_action_overlap;
813 			overlap |= MARK;
814 			parser->mark = 1;
815 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
816 			   priv->config.flow_counter_en) {
817 			if (overlap & COUNT)
818 				goto exit_action_overlap;
819 			overlap |= COUNT;
820 			parser->count = 1;
821 		} else {
822 			goto exit_action_not_supported;
823 		}
824 	}
825 	/* When fate is unknown, drop traffic. */
826 	if (!(overlap & FATE))
827 		parser->drop = 1;
828 	if (parser->drop && parser->mark)
829 		parser->mark = 0;
830 	if (!parser->rss_conf.queue_num && !parser->drop) {
831 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
832 				   NULL, "no valid action");
833 		return -rte_errno;
834 	}
835 	return 0;
836 exit_action_not_supported:
837 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
838 			   actions, "action not supported");
839 	return -rte_errno;
840 exit_action_overlap:
841 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
842 			   actions, "overlapping actions are not supported");
843 	return -rte_errno;
844 }
845 
846 /**
847  * Validate items.
848  *
849  * @param[in] items
850  *   Pattern specification (list terminated by the END pattern item).
851  * @param[out] error
852  *   Perform verbose error reporting if not NULL.
853  * @param[in, out] parser
854  *   Internal parser structure.
855  *
856  * @return
857  *   0 on success, a negative errno value otherwise and rte_errno is set.
858  */
859 static int
860 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
861 				 const struct rte_flow_item items[],
862 				 struct rte_flow_error *error,
863 				 struct mlx5_flow_parse *parser)
864 {
865 	struct priv *priv = dev->data->dev_private;
866 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
867 	unsigned int i;
868 	int ret = 0;
869 
870 	/* Initialise the offsets to start after verbs attribute. */
871 	for (i = 0; i != hash_rxq_init_n; ++i)
872 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
873 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
874 		const struct mlx5_flow_items *token = NULL;
875 		unsigned int n;
876 
877 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
878 			continue;
879 		for (i = 0;
880 		     cur_item->items &&
881 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
882 		     ++i) {
883 			if (cur_item->items[i] == items->type) {
884 				token = &mlx5_flow_items[items->type];
885 				break;
886 			}
887 		}
888 		if (!token) {
889 			ret = -ENOTSUP;
890 			goto exit_item_not_supported;
891 		}
892 		cur_item = token;
893 		ret = mlx5_flow_item_validate(items,
894 					      (const uint8_t *)cur_item->mask,
895 					      cur_item->mask_sz);
896 		if (ret)
897 			goto exit_item_not_supported;
898 		if (IS_TUNNEL(items->type)) {
899 			if (parser->tunnel) {
900 				rte_flow_error_set(error, ENOTSUP,
901 						   RTE_FLOW_ERROR_TYPE_ITEM,
902 						   items,
903 						   "Cannot recognize multiple"
904 						   " tunnel encapsulations.");
905 				return -rte_errno;
906 			}
907 			if (!priv->config.tunnel_en &&
908 			    parser->rss_conf.level > 1) {
909 				rte_flow_error_set(error, ENOTSUP,
910 					RTE_FLOW_ERROR_TYPE_ITEM,
911 					items,
912 					"RSS on tunnel is not supported");
913 				return -rte_errno;
914 			}
915 			parser->inner = IBV_FLOW_SPEC_INNER;
916 			parser->tunnel = flow_ptype[items->type];
917 		}
918 		if (parser->drop) {
919 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
920 		} else {
921 			for (n = 0; n != hash_rxq_init_n; ++n)
922 				parser->queue[n].offset += cur_item->dst_sz;
923 		}
924 	}
925 	if (parser->drop) {
926 		parser->queue[HASH_RXQ_ETH].offset +=
927 			sizeof(struct ibv_flow_spec_action_drop);
928 	}
929 	if (parser->mark) {
930 		for (i = 0; i != hash_rxq_init_n; ++i)
931 			parser->queue[i].offset +=
932 				sizeof(struct ibv_flow_spec_action_tag);
933 	}
934 	if (parser->count) {
935 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
936 
937 		for (i = 0; i != hash_rxq_init_n; ++i)
938 			parser->queue[i].offset += size;
939 	}
940 	return 0;
941 exit_item_not_supported:
942 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
943 				  items, "item not supported");
944 }
945 
946 /**
947  * Allocate memory space to store verbs flow attributes.
948  *
949  * @param[in] size
950  *   Amount of byte to allocate.
951  * @param[out] error
952  *   Perform verbose error reporting if not NULL.
953  *
954  * @return
955  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
956  */
957 static struct ibv_flow_attr *
958 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
959 {
960 	struct ibv_flow_attr *ibv_attr;
961 
962 	ibv_attr = rte_calloc(__func__, 1, size, 0);
963 	if (!ibv_attr) {
964 		rte_flow_error_set(error, ENOMEM,
965 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
966 				   NULL,
967 				   "cannot allocate verbs spec attributes");
968 		return NULL;
969 	}
970 	return ibv_attr;
971 }
972 
973 /**
974  * Make inner packet matching with an higher priority from the non Inner
975  * matching.
976  *
977  * @param dev
978  *   Pointer to Ethernet device.
979  * @param[in, out] parser
980  *   Internal parser structure.
981  * @param attr
982  *   User flow attribute.
983  */
984 static void
985 mlx5_flow_update_priority(struct rte_eth_dev *dev,
986 			  struct mlx5_flow_parse *parser,
987 			  const struct rte_flow_attr *attr)
988 {
989 	struct priv *priv = dev->data->dev_private;
990 	unsigned int i;
991 	uint16_t priority;
992 
993 	/*			8 priorities	>= 16 priorities
994 	 * Control flow:	4-7		8-15
995 	 * User normal flow:	1-3		4-7
996 	 * User tunnel flow:	0-2		0-3
997 	 */
998 	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
999 	if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1000 		priority /= 2;
1001 	/*
1002 	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1003 	 * priorities, lower 4 otherwise.
1004 	 */
1005 	if (!parser->inner) {
1006 		if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1007 			priority += 1;
1008 		else
1009 			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1010 	}
1011 	if (parser->drop) {
1012 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1013 				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1014 		return;
1015 	}
1016 	for (i = 0; i != hash_rxq_init_n; ++i) {
1017 		if (!parser->queue[i].ibv_attr)
1018 			continue;
1019 		parser->queue[i].ibv_attr->priority = priority +
1020 				hash_rxq_init[i].flow_priority;
1021 	}
1022 }
1023 
1024 /**
1025  * Finalise verbs flow attributes.
1026  *
1027  * @param[in, out] parser
1028  *   Internal parser structure.
1029  */
1030 static void
1031 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1032 {
1033 	unsigned int i;
1034 	uint32_t inner = parser->inner;
1035 
1036 	/* Don't create extra flows for outer RSS. */
1037 	if (parser->tunnel && parser->rss_conf.level < 2)
1038 		return;
1039 	/*
1040 	 * Fill missing layers in verbs specifications, or compute the correct
1041 	 * offset to allocate the memory space for the attributes and
1042 	 * specifications.
1043 	 */
1044 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1045 		union {
1046 			struct ibv_flow_spec_ipv4_ext ipv4;
1047 			struct ibv_flow_spec_ipv6 ipv6;
1048 			struct ibv_flow_spec_tcp_udp udp_tcp;
1049 			struct ibv_flow_spec_eth eth;
1050 		} specs;
1051 		void *dst;
1052 		uint16_t size;
1053 
1054 		if (i == parser->layer)
1055 			continue;
1056 		if (parser->layer == HASH_RXQ_ETH ||
1057 		    parser->layer == HASH_RXQ_TUNNEL) {
1058 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1059 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1060 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1061 					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1062 					.size = size,
1063 				};
1064 			} else {
1065 				size = sizeof(struct ibv_flow_spec_ipv6);
1066 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1067 					.type = inner | IBV_FLOW_SPEC_IPV6,
1068 					.size = size,
1069 				};
1070 			}
1071 			if (parser->queue[i].ibv_attr) {
1072 				dst = (void *)((uintptr_t)
1073 					       parser->queue[i].ibv_attr +
1074 					       parser->queue[i].offset);
1075 				memcpy(dst, &specs, size);
1076 				++parser->queue[i].ibv_attr->num_of_specs;
1077 			}
1078 			parser->queue[i].offset += size;
1079 		}
1080 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1081 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1082 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1083 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1084 				.type = inner | ((i == HASH_RXQ_UDPV4 ||
1085 					  i == HASH_RXQ_UDPV6) ?
1086 					 IBV_FLOW_SPEC_UDP :
1087 					 IBV_FLOW_SPEC_TCP),
1088 				.size = size,
1089 			};
1090 			if (parser->queue[i].ibv_attr) {
1091 				dst = (void *)((uintptr_t)
1092 					       parser->queue[i].ibv_attr +
1093 					       parser->queue[i].offset);
1094 				memcpy(dst, &specs, size);
1095 				++parser->queue[i].ibv_attr->num_of_specs;
1096 			}
1097 			parser->queue[i].offset += size;
1098 		}
1099 	}
1100 }
1101 
1102 /**
1103  * Update flows according to pattern and RSS hash fields.
1104  *
1105  * @param[in, out] parser
1106  *   Internal parser structure.
1107  *
1108  * @return
1109  *   0 on success, a negative errno value otherwise and rte_errno is set.
1110  */
1111 static int
1112 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1113 {
1114 	unsigned int i;
1115 	enum hash_rxq_type start;
1116 	enum hash_rxq_type layer;
1117 	int outer = parser->tunnel && parser->rss_conf.level < 2;
1118 	uint64_t rss = parser->rss_conf.types;
1119 
1120 	/* Default to outer RSS. */
1121 	if (!parser->rss_conf.level)
1122 		parser->rss_conf.level = 1;
1123 	layer = outer ? parser->out_layer : parser->layer;
1124 	if (layer == HASH_RXQ_TUNNEL)
1125 		layer = HASH_RXQ_ETH;
1126 	if (outer) {
1127 		/* Only one hash type for outer RSS. */
1128 		if (rss && layer == HASH_RXQ_ETH) {
1129 			start = HASH_RXQ_TCPV4;
1130 		} else if (rss && layer != HASH_RXQ_ETH &&
1131 			   !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1132 			/* If RSS not match L4 pattern, try L3 RSS. */
1133 			if (layer < HASH_RXQ_IPV4)
1134 				layer = HASH_RXQ_IPV4;
1135 			else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1136 				layer = HASH_RXQ_IPV6;
1137 			start = layer;
1138 		} else {
1139 			start = layer;
1140 		}
1141 		/* Scan first valid hash type. */
1142 		for (i = start; rss && i <= layer; ++i) {
1143 			if (!parser->queue[i].ibv_attr)
1144 				continue;
1145 			if (hash_rxq_init[i].dpdk_rss_hf & rss)
1146 				break;
1147 		}
1148 		if (rss && i <= layer)
1149 			parser->queue[layer].hash_fields =
1150 					hash_rxq_init[i].hash_fields;
1151 		/* Trim unused hash types. */
1152 		for (i = 0; i != hash_rxq_init_n; ++i) {
1153 			if (parser->queue[i].ibv_attr && i != layer) {
1154 				rte_free(parser->queue[i].ibv_attr);
1155 				parser->queue[i].ibv_attr = NULL;
1156 			}
1157 		}
1158 	} else {
1159 		/* Expand for inner or normal RSS. */
1160 		if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1161 			start = HASH_RXQ_TCPV4;
1162 		else if (rss && layer == HASH_RXQ_IPV6)
1163 			start = HASH_RXQ_TCPV6;
1164 		else
1165 			start = layer;
1166 		/* For L4 pattern, try L3 RSS if no L4 RSS. */
1167 		/* Trim unused hash types. */
1168 		for (i = 0; i != hash_rxq_init_n; ++i) {
1169 			if (!parser->queue[i].ibv_attr)
1170 				continue;
1171 			if (i < start || i > layer) {
1172 				rte_free(parser->queue[i].ibv_attr);
1173 				parser->queue[i].ibv_attr = NULL;
1174 				continue;
1175 			}
1176 			if (!rss)
1177 				continue;
1178 			if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1179 				parser->queue[i].hash_fields =
1180 						hash_rxq_init[i].hash_fields;
1181 			} else if (i != layer) {
1182 				/* Remove unused RSS expansion. */
1183 				rte_free(parser->queue[i].ibv_attr);
1184 				parser->queue[i].ibv_attr = NULL;
1185 			} else if (layer < HASH_RXQ_IPV4 &&
1186 				   (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1187 				    rss)) {
1188 				/* Allow IPv4 RSS on L4 pattern. */
1189 				parser->queue[i].hash_fields =
1190 					hash_rxq_init[HASH_RXQ_IPV4]
1191 						.hash_fields;
1192 			} else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1193 				   (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1194 				    rss)) {
1195 				/* Allow IPv4 RSS on L4 pattern. */
1196 				parser->queue[i].hash_fields =
1197 					hash_rxq_init[HASH_RXQ_IPV6]
1198 						.hash_fields;
1199 			}
1200 		}
1201 	}
1202 	return 0;
1203 }
1204 
1205 /**
1206  * Validate and convert a flow supported by the NIC.
1207  *
1208  * @param dev
1209  *   Pointer to Ethernet device.
1210  * @param[in] attr
1211  *   Flow rule attributes.
1212  * @param[in] pattern
1213  *   Pattern specification (list terminated by the END pattern item).
1214  * @param[in] actions
1215  *   Associated actions (list terminated by the END action).
1216  * @param[out] error
1217  *   Perform verbose error reporting if not NULL.
1218  * @param[in, out] parser
1219  *   Internal parser structure.
1220  *
1221  * @return
1222  *   0 on success, a negative errno value otherwise and rte_errno is set.
1223  */
1224 static int
1225 mlx5_flow_convert(struct rte_eth_dev *dev,
1226 		  const struct rte_flow_attr *attr,
1227 		  const struct rte_flow_item items[],
1228 		  const struct rte_flow_action actions[],
1229 		  struct rte_flow_error *error,
1230 		  struct mlx5_flow_parse *parser)
1231 {
1232 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1233 	unsigned int i;
1234 	int ret;
1235 
1236 	/* First step. Validate the attributes, items and actions. */
1237 	*parser = (struct mlx5_flow_parse){
1238 		.create = parser->create,
1239 		.layer = HASH_RXQ_ETH,
1240 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1241 	};
1242 	ret = mlx5_flow_convert_attributes(attr, error);
1243 	if (ret)
1244 		return ret;
1245 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1246 	if (ret)
1247 		return ret;
1248 	ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1249 	if (ret)
1250 		return ret;
1251 	mlx5_flow_convert_finalise(parser);
1252 	/*
1253 	 * Second step.
1254 	 * Allocate the memory space to store verbs specifications.
1255 	 */
1256 	if (parser->drop) {
1257 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1258 
1259 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1260 			mlx5_flow_convert_allocate(offset, error);
1261 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1262 			goto exit_enomem;
1263 		parser->queue[HASH_RXQ_ETH].offset =
1264 			sizeof(struct ibv_flow_attr);
1265 	} else {
1266 		for (i = 0; i != hash_rxq_init_n; ++i) {
1267 			unsigned int offset;
1268 
1269 			offset = parser->queue[i].offset;
1270 			parser->queue[i].ibv_attr =
1271 				mlx5_flow_convert_allocate(offset, error);
1272 			if (!parser->queue[i].ibv_attr)
1273 				goto exit_enomem;
1274 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1275 		}
1276 	}
1277 	/* Third step. Conversion parse, fill the specifications. */
1278 	parser->inner = 0;
1279 	parser->tunnel = 0;
1280 	parser->layer = HASH_RXQ_ETH;
1281 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1282 		struct mlx5_flow_data data = {
1283 			.dev = dev,
1284 			.parser = parser,
1285 			.error = error,
1286 		};
1287 
1288 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1289 			continue;
1290 		cur_item = &mlx5_flow_items[items->type];
1291 		ret = cur_item->convert(items,
1292 					(cur_item->default_mask ?
1293 					 cur_item->default_mask :
1294 					 cur_item->mask),
1295 					 &data);
1296 		if (ret)
1297 			goto exit_free;
1298 	}
1299 	if (!parser->drop) {
1300 		/* RSS check, remove unused hash types. */
1301 		ret = mlx5_flow_convert_rss(parser);
1302 		if (ret)
1303 			goto exit_free;
1304 		/* Complete missing specification. */
1305 		mlx5_flow_convert_finalise(parser);
1306 	}
1307 	mlx5_flow_update_priority(dev, parser, attr);
1308 	if (parser->mark)
1309 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1310 	if (parser->count && parser->create) {
1311 		mlx5_flow_create_count(dev, parser);
1312 		if (!parser->cs)
1313 			goto exit_count_error;
1314 	}
1315 exit_free:
1316 	/* Only verification is expected, all resources should be released. */
1317 	if (!parser->create) {
1318 		for (i = 0; i != hash_rxq_init_n; ++i) {
1319 			if (parser->queue[i].ibv_attr) {
1320 				rte_free(parser->queue[i].ibv_attr);
1321 				parser->queue[i].ibv_attr = NULL;
1322 			}
1323 		}
1324 	}
1325 	return ret;
1326 exit_enomem:
1327 	for (i = 0; i != hash_rxq_init_n; ++i) {
1328 		if (parser->queue[i].ibv_attr) {
1329 			rte_free(parser->queue[i].ibv_attr);
1330 			parser->queue[i].ibv_attr = NULL;
1331 		}
1332 	}
1333 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1334 			   NULL, "cannot allocate verbs spec attributes");
1335 	return -rte_errno;
1336 exit_count_error:
1337 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1338 			   NULL, "cannot create counter");
1339 	return -rte_errno;
1340 }
1341 
1342 /**
1343  * Copy the specification created into the flow.
1344  *
1345  * @param parser
1346  *   Internal parser structure.
1347  * @param src
1348  *   Create specification.
1349  * @param size
1350  *   Size in bytes of the specification to copy.
1351  */
1352 static void
1353 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1354 		      unsigned int size)
1355 {
1356 	unsigned int i;
1357 	void *dst;
1358 
1359 	for (i = 0; i != hash_rxq_init_n; ++i) {
1360 		if (!parser->queue[i].ibv_attr)
1361 			continue;
1362 		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1363 				parser->queue[i].offset);
1364 		memcpy(dst, src, size);
1365 		++parser->queue[i].ibv_attr->num_of_specs;
1366 		parser->queue[i].offset += size;
1367 	}
1368 }
1369 
1370 /**
1371  * Convert Ethernet item to Verbs specification.
1372  *
1373  * @param item[in]
1374  *   Item specification.
1375  * @param default_mask[in]
1376  *   Default bit-masks to use when item->mask is not provided.
1377  * @param data[in, out]
1378  *   User structure.
1379  *
1380  * @return
1381  *   0 on success, a negative errno value otherwise and rte_errno is set.
1382  */
1383 static int
1384 mlx5_flow_create_eth(const struct rte_flow_item *item,
1385 		     const void *default_mask,
1386 		     struct mlx5_flow_data *data)
1387 {
1388 	const struct rte_flow_item_eth *spec = item->spec;
1389 	const struct rte_flow_item_eth *mask = item->mask;
1390 	struct mlx5_flow_parse *parser = data->parser;
1391 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1392 	struct ibv_flow_spec_eth eth = {
1393 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1394 		.size = eth_size,
1395 	};
1396 
1397 	parser->layer = HASH_RXQ_ETH;
1398 	if (spec) {
1399 		unsigned int i;
1400 
1401 		if (!mask)
1402 			mask = default_mask;
1403 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1404 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1405 		eth.val.ether_type = spec->type;
1406 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1407 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1408 		eth.mask.ether_type = mask->type;
1409 		/* Remove unwanted bits from values. */
1410 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1411 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1412 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1413 		}
1414 		eth.val.ether_type &= eth.mask.ether_type;
1415 	}
1416 	mlx5_flow_create_copy(parser, &eth, eth_size);
1417 	return 0;
1418 }
1419 
1420 /**
1421  * Convert VLAN item to Verbs specification.
1422  *
1423  * @param item[in]
1424  *   Item specification.
1425  * @param default_mask[in]
1426  *   Default bit-masks to use when item->mask is not provided.
1427  * @param data[in, out]
1428  *   User structure.
1429  *
1430  * @return
1431  *   0 on success, a negative errno value otherwise and rte_errno is set.
1432  */
1433 static int
1434 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1435 		      const void *default_mask,
1436 		      struct mlx5_flow_data *data)
1437 {
1438 	const struct rte_flow_item_vlan *spec = item->spec;
1439 	const struct rte_flow_item_vlan *mask = item->mask;
1440 	struct mlx5_flow_parse *parser = data->parser;
1441 	struct ibv_flow_spec_eth *eth;
1442 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1443 	const char *msg = "VLAN cannot be empty";
1444 
1445 	if (spec) {
1446 		unsigned int i;
1447 		if (!mask)
1448 			mask = default_mask;
1449 
1450 		for (i = 0; i != hash_rxq_init_n; ++i) {
1451 			if (!parser->queue[i].ibv_attr)
1452 				continue;
1453 
1454 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1455 				       parser->queue[i].offset - eth_size);
1456 			eth->val.vlan_tag = spec->tci;
1457 			eth->mask.vlan_tag = mask->tci;
1458 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1459 			/*
1460 			 * From verbs perspective an empty VLAN is equivalent
1461 			 * to a packet without VLAN layer.
1462 			 */
1463 			if (!eth->mask.vlan_tag)
1464 				goto error;
1465 			/* Outer TPID cannot be matched. */
1466 			if (eth->mask.ether_type) {
1467 				msg = "VLAN TPID matching is not supported";
1468 				goto error;
1469 			}
1470 			eth->val.ether_type = spec->inner_type;
1471 			eth->mask.ether_type = mask->inner_type;
1472 			eth->val.ether_type &= eth->mask.ether_type;
1473 		}
1474 		return 0;
1475 	}
1476 error:
1477 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1478 				  item, msg);
1479 }
1480 
1481 /**
1482  * Convert IPv4 item to Verbs specification.
1483  *
1484  * @param item[in]
1485  *   Item specification.
1486  * @param default_mask[in]
1487  *   Default bit-masks to use when item->mask is not provided.
1488  * @param data[in, out]
1489  *   User structure.
1490  *
1491  * @return
1492  *   0 on success, a negative errno value otherwise and rte_errno is set.
1493  */
1494 static int
1495 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1496 		      const void *default_mask,
1497 		      struct mlx5_flow_data *data)
1498 {
1499 	struct priv *priv = data->dev->data->dev_private;
1500 	const struct rte_flow_item_ipv4 *spec = item->spec;
1501 	const struct rte_flow_item_ipv4 *mask = item->mask;
1502 	struct mlx5_flow_parse *parser = data->parser;
1503 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1504 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1505 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1506 		.size = ipv4_size,
1507 	};
1508 
1509 	if (parser->layer == HASH_RXQ_TUNNEL &&
1510 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1511 	    !priv->config.l3_vxlan_en)
1512 		return rte_flow_error_set(data->error, EINVAL,
1513 					  RTE_FLOW_ERROR_TYPE_ITEM,
1514 					  item,
1515 					  "L3 VXLAN not enabled by device"
1516 					  " parameter and/or not configured"
1517 					  " in firmware");
1518 	parser->layer = HASH_RXQ_IPV4;
1519 	if (spec) {
1520 		if (!mask)
1521 			mask = default_mask;
1522 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1523 			.src_ip = spec->hdr.src_addr,
1524 			.dst_ip = spec->hdr.dst_addr,
1525 			.proto = spec->hdr.next_proto_id,
1526 			.tos = spec->hdr.type_of_service,
1527 		};
1528 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1529 			.src_ip = mask->hdr.src_addr,
1530 			.dst_ip = mask->hdr.dst_addr,
1531 			.proto = mask->hdr.next_proto_id,
1532 			.tos = mask->hdr.type_of_service,
1533 		};
1534 		/* Remove unwanted bits from values. */
1535 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1536 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1537 		ipv4.val.proto &= ipv4.mask.proto;
1538 		ipv4.val.tos &= ipv4.mask.tos;
1539 	}
1540 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1541 	return 0;
1542 }
1543 
1544 /**
1545  * Convert IPv6 item to Verbs specification.
1546  *
1547  * @param item[in]
1548  *   Item specification.
1549  * @param default_mask[in]
1550  *   Default bit-masks to use when item->mask is not provided.
1551  * @param data[in, out]
1552  *   User structure.
1553  *
1554  * @return
1555  *   0 on success, a negative errno value otherwise and rte_errno is set.
1556  */
1557 static int
1558 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1559 		      const void *default_mask,
1560 		      struct mlx5_flow_data *data)
1561 {
1562 	struct priv *priv = data->dev->data->dev_private;
1563 	const struct rte_flow_item_ipv6 *spec = item->spec;
1564 	const struct rte_flow_item_ipv6 *mask = item->mask;
1565 	struct mlx5_flow_parse *parser = data->parser;
1566 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1567 	struct ibv_flow_spec_ipv6 ipv6 = {
1568 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1569 		.size = ipv6_size,
1570 	};
1571 
1572 	if (parser->layer == HASH_RXQ_TUNNEL &&
1573 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1574 	    !priv->config.l3_vxlan_en)
1575 		return rte_flow_error_set(data->error, EINVAL,
1576 					  RTE_FLOW_ERROR_TYPE_ITEM,
1577 					  item,
1578 					  "L3 VXLAN not enabled by device"
1579 					  " parameter and/or not configured"
1580 					  " in firmware");
1581 	parser->layer = HASH_RXQ_IPV6;
1582 	if (spec) {
1583 		unsigned int i;
1584 		uint32_t vtc_flow_val;
1585 		uint32_t vtc_flow_mask;
1586 
1587 		if (!mask)
1588 			mask = default_mask;
1589 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1590 		       RTE_DIM(ipv6.val.src_ip));
1591 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1592 		       RTE_DIM(ipv6.val.dst_ip));
1593 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1594 		       RTE_DIM(ipv6.mask.src_ip));
1595 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1596 		       RTE_DIM(ipv6.mask.dst_ip));
1597 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1598 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1599 		ipv6.val.flow_label =
1600 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1601 					 IPV6_HDR_FL_SHIFT);
1602 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1603 					 IPV6_HDR_TC_SHIFT;
1604 		ipv6.val.next_hdr = spec->hdr.proto;
1605 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1606 		ipv6.mask.flow_label =
1607 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1608 					 IPV6_HDR_FL_SHIFT);
1609 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1610 					  IPV6_HDR_TC_SHIFT;
1611 		ipv6.mask.next_hdr = mask->hdr.proto;
1612 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1613 		/* Remove unwanted bits from values. */
1614 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1615 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1616 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1617 		}
1618 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1619 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1620 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1621 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1622 	}
1623 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1624 	return 0;
1625 }
1626 
1627 /**
1628  * Convert UDP item to Verbs specification.
1629  *
1630  * @param item[in]
1631  *   Item specification.
1632  * @param default_mask[in]
1633  *   Default bit-masks to use when item->mask is not provided.
1634  * @param data[in, out]
1635  *   User structure.
1636  *
1637  * @return
1638  *   0 on success, a negative errno value otherwise and rte_errno is set.
1639  */
1640 static int
1641 mlx5_flow_create_udp(const struct rte_flow_item *item,
1642 		     const void *default_mask,
1643 		     struct mlx5_flow_data *data)
1644 {
1645 	const struct rte_flow_item_udp *spec = item->spec;
1646 	const struct rte_flow_item_udp *mask = item->mask;
1647 	struct mlx5_flow_parse *parser = data->parser;
1648 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1649 	struct ibv_flow_spec_tcp_udp udp = {
1650 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1651 		.size = udp_size,
1652 	};
1653 
1654 	if (parser->layer == HASH_RXQ_IPV4)
1655 		parser->layer = HASH_RXQ_UDPV4;
1656 	else
1657 		parser->layer = HASH_RXQ_UDPV6;
1658 	if (spec) {
1659 		if (!mask)
1660 			mask = default_mask;
1661 		udp.val.dst_port = spec->hdr.dst_port;
1662 		udp.val.src_port = spec->hdr.src_port;
1663 		udp.mask.dst_port = mask->hdr.dst_port;
1664 		udp.mask.src_port = mask->hdr.src_port;
1665 		/* Remove unwanted bits from values. */
1666 		udp.val.src_port &= udp.mask.src_port;
1667 		udp.val.dst_port &= udp.mask.dst_port;
1668 	}
1669 	mlx5_flow_create_copy(parser, &udp, udp_size);
1670 	return 0;
1671 }
1672 
1673 /**
1674  * Convert TCP item to Verbs specification.
1675  *
1676  * @param item[in]
1677  *   Item specification.
1678  * @param default_mask[in]
1679  *   Default bit-masks to use when item->mask is not provided.
1680  * @param data[in, out]
1681  *   User structure.
1682  *
1683  * @return
1684  *   0 on success, a negative errno value otherwise and rte_errno is set.
1685  */
1686 static int
1687 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1688 		     const void *default_mask,
1689 		     struct mlx5_flow_data *data)
1690 {
1691 	const struct rte_flow_item_tcp *spec = item->spec;
1692 	const struct rte_flow_item_tcp *mask = item->mask;
1693 	struct mlx5_flow_parse *parser = data->parser;
1694 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1695 	struct ibv_flow_spec_tcp_udp tcp = {
1696 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1697 		.size = tcp_size,
1698 	};
1699 
1700 	if (parser->layer == HASH_RXQ_IPV4)
1701 		parser->layer = HASH_RXQ_TCPV4;
1702 	else
1703 		parser->layer = HASH_RXQ_TCPV6;
1704 	if (spec) {
1705 		if (!mask)
1706 			mask = default_mask;
1707 		tcp.val.dst_port = spec->hdr.dst_port;
1708 		tcp.val.src_port = spec->hdr.src_port;
1709 		tcp.mask.dst_port = mask->hdr.dst_port;
1710 		tcp.mask.src_port = mask->hdr.src_port;
1711 		/* Remove unwanted bits from values. */
1712 		tcp.val.src_port &= tcp.mask.src_port;
1713 		tcp.val.dst_port &= tcp.mask.dst_port;
1714 	}
1715 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1716 	return 0;
1717 }
1718 
1719 /**
1720  * Convert VXLAN item to Verbs specification.
1721  *
1722  * @param item[in]
1723  *   Item specification.
1724  * @param default_mask[in]
1725  *   Default bit-masks to use when item->mask is not provided.
1726  * @param data[in, out]
1727  *   User structure.
1728  *
1729  * @return
1730  *   0 on success, a negative errno value otherwise and rte_errno is set.
1731  */
1732 static int
1733 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1734 		       const void *default_mask,
1735 		       struct mlx5_flow_data *data)
1736 {
1737 	const struct rte_flow_item_vxlan *spec = item->spec;
1738 	const struct rte_flow_item_vxlan *mask = item->mask;
1739 	struct mlx5_flow_parse *parser = data->parser;
1740 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1741 	struct ibv_flow_spec_tunnel vxlan = {
1742 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1743 		.size = size,
1744 	};
1745 	union vni {
1746 		uint32_t vlan_id;
1747 		uint8_t vni[4];
1748 	} id;
1749 
1750 	id.vni[0] = 0;
1751 	parser->inner = IBV_FLOW_SPEC_INNER;
1752 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1753 	parser->out_layer = parser->layer;
1754 	parser->layer = HASH_RXQ_TUNNEL;
1755 	/* Default VXLAN to outer RSS. */
1756 	if (!parser->rss_conf.level)
1757 		parser->rss_conf.level = 1;
1758 	if (spec) {
1759 		if (!mask)
1760 			mask = default_mask;
1761 		memcpy(&id.vni[1], spec->vni, 3);
1762 		vxlan.val.tunnel_id = id.vlan_id;
1763 		memcpy(&id.vni[1], mask->vni, 3);
1764 		vxlan.mask.tunnel_id = id.vlan_id;
1765 		/* Remove unwanted bits from values. */
1766 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1767 	}
1768 	/*
1769 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1770 	 * layer is defined in the Verbs specification it is interpreted as
1771 	 * wildcard and all packets will match this rule, if it follows a full
1772 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1773 	 * before will also match this rule.
1774 	 * To avoid such situation, VNI 0 is currently refused.
1775 	 */
1776 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1777 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1778 		return rte_flow_error_set(data->error, EINVAL,
1779 					  RTE_FLOW_ERROR_TYPE_ITEM,
1780 					  item,
1781 					  "VxLAN vni cannot be 0");
1782 	mlx5_flow_create_copy(parser, &vxlan, size);
1783 	return 0;
1784 }
1785 
1786 /**
1787  * Convert VXLAN-GPE item to Verbs specification.
1788  *
1789  * @param item[in]
1790  *   Item specification.
1791  * @param default_mask[in]
1792  *   Default bit-masks to use when item->mask is not provided.
1793  * @param data[in, out]
1794  *   User structure.
1795  *
1796  * @return
1797  *   0 on success, a negative errno value otherwise and rte_errno is set.
1798  */
1799 static int
1800 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1801 			   const void *default_mask,
1802 			   struct mlx5_flow_data *data)
1803 {
1804 	struct priv *priv = data->dev->data->dev_private;
1805 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1806 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1807 	struct mlx5_flow_parse *parser = data->parser;
1808 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1809 	struct ibv_flow_spec_tunnel vxlan = {
1810 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1811 		.size = size,
1812 	};
1813 	union vni {
1814 		uint32_t vlan_id;
1815 		uint8_t vni[4];
1816 	} id;
1817 
1818 	if (!priv->config.l3_vxlan_en)
1819 		return rte_flow_error_set(data->error, EINVAL,
1820 					  RTE_FLOW_ERROR_TYPE_ITEM,
1821 					  item,
1822 					  "L3 VXLAN not enabled by device"
1823 					  " parameter and/or not configured"
1824 					  " in firmware");
1825 	id.vni[0] = 0;
1826 	parser->inner = IBV_FLOW_SPEC_INNER;
1827 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1828 	parser->out_layer = parser->layer;
1829 	parser->layer = HASH_RXQ_TUNNEL;
1830 	/* Default VXLAN-GPE to outer RSS. */
1831 	if (!parser->rss_conf.level)
1832 		parser->rss_conf.level = 1;
1833 	if (spec) {
1834 		if (!mask)
1835 			mask = default_mask;
1836 		memcpy(&id.vni[1], spec->vni, 3);
1837 		vxlan.val.tunnel_id = id.vlan_id;
1838 		memcpy(&id.vni[1], mask->vni, 3);
1839 		vxlan.mask.tunnel_id = id.vlan_id;
1840 		if (spec->protocol)
1841 			return rte_flow_error_set(data->error, EINVAL,
1842 						  RTE_FLOW_ERROR_TYPE_ITEM,
1843 						  item,
1844 						  "VxLAN-GPE protocol not"
1845 						  " supported");
1846 		/* Remove unwanted bits from values. */
1847 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1848 	}
1849 	/*
1850 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1851 	 * layer is defined in the Verbs specification it is interpreted as
1852 	 * wildcard and all packets will match this rule, if it follows a full
1853 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1854 	 * before will also match this rule.
1855 	 * To avoid such situation, VNI 0 is currently refused.
1856 	 */
1857 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1858 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1859 		return rte_flow_error_set(data->error, EINVAL,
1860 					  RTE_FLOW_ERROR_TYPE_ITEM,
1861 					  item,
1862 					  "VxLAN-GPE vni cannot be 0");
1863 	mlx5_flow_create_copy(parser, &vxlan, size);
1864 	return 0;
1865 }
1866 
1867 /**
1868  * Convert GRE item to Verbs specification.
1869  *
1870  * @param item[in]
1871  *   Item specification.
1872  * @param default_mask[in]
1873  *   Default bit-masks to use when item->mask is not provided.
1874  * @param data[in, out]
1875  *   User structure.
1876  *
1877  * @return
1878  *   0 on success, a negative errno value otherwise and rte_errno is set.
1879  */
1880 static int
1881 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1882 		     const void *default_mask __rte_unused,
1883 		     struct mlx5_flow_data *data)
1884 {
1885 	struct mlx5_flow_parse *parser = data->parser;
1886 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1887 	struct ibv_flow_spec_tunnel tunnel = {
1888 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1889 		.size = size,
1890 	};
1891 	struct ibv_flow_spec_ipv4_ext *ipv4;
1892 	struct ibv_flow_spec_ipv6 *ipv6;
1893 	unsigned int i;
1894 
1895 	parser->inner = IBV_FLOW_SPEC_INNER;
1896 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1897 	parser->out_layer = parser->layer;
1898 	parser->layer = HASH_RXQ_TUNNEL;
1899 	/* Default GRE to inner RSS. */
1900 	if (!parser->rss_conf.level)
1901 		parser->rss_conf.level = 2;
1902 	/* Update encapsulation IP layer protocol. */
1903 	for (i = 0; i != hash_rxq_init_n; ++i) {
1904 		if (!parser->queue[i].ibv_attr)
1905 			continue;
1906 		if (parser->out_layer == HASH_RXQ_IPV4) {
1907 			ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1908 				parser->queue[i].offset -
1909 				sizeof(struct ibv_flow_spec_ipv4_ext));
1910 			if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1911 				break;
1912 			ipv4->val.proto = MLX5_GRE;
1913 			ipv4->mask.proto = 0xff;
1914 		} else if (parser->out_layer == HASH_RXQ_IPV6) {
1915 			ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1916 				parser->queue[i].offset -
1917 				sizeof(struct ibv_flow_spec_ipv6));
1918 			if (ipv6->mask.next_hdr &&
1919 			    ipv6->val.next_hdr != MLX5_GRE)
1920 				break;
1921 			ipv6->val.next_hdr = MLX5_GRE;
1922 			ipv6->mask.next_hdr = 0xff;
1923 		}
1924 	}
1925 	if (i != hash_rxq_init_n)
1926 		return rte_flow_error_set(data->error, EINVAL,
1927 					  RTE_FLOW_ERROR_TYPE_ITEM,
1928 					  item,
1929 					  "IP protocol of GRE must be 47");
1930 	mlx5_flow_create_copy(parser, &tunnel, size);
1931 	return 0;
1932 }
1933 
1934 /**
1935  * Convert mark/flag action to Verbs specification.
1936  *
1937  * @param parser
1938  *   Internal parser structure.
1939  * @param mark_id
1940  *   Mark identifier.
1941  *
1942  * @return
1943  *   0 on success, a negative errno value otherwise and rte_errno is set.
1944  */
1945 static int
1946 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1947 {
1948 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1949 	struct ibv_flow_spec_action_tag tag = {
1950 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1951 		.size = size,
1952 		.tag_id = mlx5_flow_mark_set(mark_id),
1953 	};
1954 
1955 	assert(parser->mark);
1956 	mlx5_flow_create_copy(parser, &tag, size);
1957 	return 0;
1958 }
1959 
1960 /**
1961  * Convert count action to Verbs specification.
1962  *
1963  * @param dev
1964  *   Pointer to Ethernet device.
1965  * @param parser
1966  *   Pointer to MLX5 flow parser structure.
1967  *
1968  * @return
1969  *   0 on success, a negative errno value otherwise and rte_errno is set.
1970  */
1971 static int
1972 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1973 		       struct mlx5_flow_parse *parser __rte_unused)
1974 {
1975 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1976 	struct priv *priv = dev->data->dev_private;
1977 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1978 	struct ibv_counter_set_init_attr init_attr = {0};
1979 	struct ibv_flow_spec_counter_action counter = {
1980 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1981 		.size = size,
1982 		.counter_set_handle = 0,
1983 	};
1984 
1985 	init_attr.counter_set_id = 0;
1986 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1987 	if (!parser->cs) {
1988 		rte_errno = EINVAL;
1989 		return -rte_errno;
1990 	}
1991 	counter.counter_set_handle = parser->cs->handle;
1992 	mlx5_flow_create_copy(parser, &counter, size);
1993 #endif
1994 	return 0;
1995 }
1996 
1997 /**
1998  * Complete flow rule creation with a drop queue.
1999  *
2000  * @param dev
2001  *   Pointer to Ethernet device.
2002  * @param parser
2003  *   Internal parser structure.
2004  * @param flow
2005  *   Pointer to the rte_flow.
2006  * @param[out] error
2007  *   Perform verbose error reporting if not NULL.
2008  *
2009  * @return
2010  *   0 on success, a negative errno value otherwise and rte_errno is set.
2011  */
2012 static int
2013 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2014 				   struct mlx5_flow_parse *parser,
2015 				   struct rte_flow *flow,
2016 				   struct rte_flow_error *error)
2017 {
2018 	struct priv *priv = dev->data->dev_private;
2019 	struct ibv_flow_spec_action_drop *drop;
2020 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2021 
2022 	assert(priv->pd);
2023 	assert(priv->ctx);
2024 	flow->drop = 1;
2025 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2026 			parser->queue[HASH_RXQ_ETH].offset);
2027 	*drop = (struct ibv_flow_spec_action_drop){
2028 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2029 			.size = size,
2030 	};
2031 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2032 	parser->queue[HASH_RXQ_ETH].offset += size;
2033 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
2034 		parser->queue[HASH_RXQ_ETH].ibv_attr;
2035 	if (parser->count)
2036 		flow->cs = parser->cs;
2037 	if (!dev->data->dev_started)
2038 		return 0;
2039 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2040 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
2041 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2042 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
2043 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2044 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2045 				   NULL, "flow rule creation failure");
2046 		goto error;
2047 	}
2048 	return 0;
2049 error:
2050 	assert(flow);
2051 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2052 		claim_zero(mlx5_glue->destroy_flow
2053 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2054 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2055 	}
2056 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2057 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2058 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2059 	}
2060 	if (flow->cs) {
2061 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2062 		flow->cs = NULL;
2063 		parser->cs = NULL;
2064 	}
2065 	return -rte_errno;
2066 }
2067 
2068 /**
2069  * Create hash Rx queues when RSS is enabled.
2070  *
2071  * @param dev
2072  *   Pointer to Ethernet device.
2073  * @param parser
2074  *   Internal parser structure.
2075  * @param flow
2076  *   Pointer to the rte_flow.
2077  * @param[out] error
2078  *   Perform verbose error reporting if not NULL.
2079  *
2080  * @return
2081  *   0 on success, a negative errno value otherwise and rte_errno is set.
2082  */
2083 static int
2084 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2085 				  struct mlx5_flow_parse *parser,
2086 				  struct rte_flow *flow,
2087 				  struct rte_flow_error *error)
2088 {
2089 	unsigned int i;
2090 
2091 	for (i = 0; i != hash_rxq_init_n; ++i) {
2092 		if (!parser->queue[i].ibv_attr)
2093 			continue;
2094 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2095 		parser->queue[i].ibv_attr = NULL;
2096 		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2097 		if (!dev->data->dev_started)
2098 			continue;
2099 		flow->frxq[i].hrxq =
2100 			mlx5_hrxq_get(dev,
2101 				      parser->rss_conf.key,
2102 				      parser->rss_conf.key_len,
2103 				      flow->frxq[i].hash_fields,
2104 				      parser->rss_conf.queue,
2105 				      parser->rss_conf.queue_num,
2106 				      parser->tunnel,
2107 				      parser->rss_conf.level);
2108 		if (flow->frxq[i].hrxq)
2109 			continue;
2110 		flow->frxq[i].hrxq =
2111 			mlx5_hrxq_new(dev,
2112 				      parser->rss_conf.key,
2113 				      parser->rss_conf.key_len,
2114 				      flow->frxq[i].hash_fields,
2115 				      parser->rss_conf.queue,
2116 				      parser->rss_conf.queue_num,
2117 				      parser->tunnel,
2118 				      parser->rss_conf.level);
2119 		if (!flow->frxq[i].hrxq) {
2120 			return rte_flow_error_set(error, ENOMEM,
2121 						  RTE_FLOW_ERROR_TYPE_HANDLE,
2122 						  NULL,
2123 						  "cannot create hash rxq");
2124 		}
2125 	}
2126 	return 0;
2127 }
2128 
2129 /**
2130  * RXQ update after flow rule creation.
2131  *
2132  * @param dev
2133  *   Pointer to Ethernet device.
2134  * @param flow
2135  *   Pointer to the flow rule.
2136  */
2137 static void
2138 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2139 {
2140 	struct priv *priv = dev->data->dev_private;
2141 	unsigned int i;
2142 	unsigned int j;
2143 
2144 	if (!dev->data->dev_started)
2145 		return;
2146 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2147 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2148 						 [(*flow->queues)[i]];
2149 		struct mlx5_rxq_ctrl *rxq_ctrl =
2150 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2151 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2152 
2153 		rxq_data->mark |= flow->mark;
2154 		if (!tunnel)
2155 			continue;
2156 		rxq_ctrl->tunnel_types[tunnel] += 1;
2157 		/* Clear tunnel type if more than one tunnel types set. */
2158 		for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2159 			if (j == tunnel)
2160 				continue;
2161 			if (rxq_ctrl->tunnel_types[j] > 0) {
2162 				rxq_data->tunnel = 0;
2163 				break;
2164 			}
2165 		}
2166 		if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2167 			rxq_data->tunnel = flow->tunnel;
2168 	}
2169 }
2170 
2171 /**
2172  * Dump flow hash RX queue detail.
2173  *
2174  * @param dev
2175  *   Pointer to Ethernet device.
2176  * @param flow
2177  *   Pointer to the rte_flow.
2178  * @param hrxq_idx
2179  *   Hash RX queue index.
2180  */
2181 static void
2182 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2183 	       struct rte_flow *flow __rte_unused,
2184 	       unsigned int hrxq_idx __rte_unused)
2185 {
2186 #ifndef NDEBUG
2187 	uintptr_t spec_ptr;
2188 	uint16_t j;
2189 	char buf[256];
2190 	uint8_t off;
2191 
2192 	spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2193 	for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2194 	     j++) {
2195 		struct ibv_flow_spec *spec = (void *)spec_ptr;
2196 		off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2197 			       spec->hdr.size);
2198 		spec_ptr += spec->hdr.size;
2199 	}
2200 	DRV_LOG(DEBUG,
2201 		"port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2202 		" hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2203 		" flags:%x, comp_mask:%x specs:%s",
2204 		dev->data->port_id, (void *)flow, hrxq_idx,
2205 		(void *)flow->frxq[hrxq_idx].hrxq,
2206 		(void *)flow->frxq[hrxq_idx].hrxq->qp,
2207 		(void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2208 		flow->frxq[hrxq_idx].hash_fields |
2209 		(flow->tunnel &&
2210 		 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2211 		flow->rss_conf.queue_num,
2212 		flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2213 		flow->frxq[hrxq_idx].ibv_attr->size,
2214 		flow->frxq[hrxq_idx].ibv_attr->priority,
2215 		flow->frxq[hrxq_idx].ibv_attr->type,
2216 		flow->frxq[hrxq_idx].ibv_attr->flags,
2217 		flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2218 		buf);
2219 #endif
2220 }
2221 
2222 /**
2223  * Complete flow rule creation.
2224  *
2225  * @param dev
2226  *   Pointer to Ethernet device.
2227  * @param parser
2228  *   Internal parser structure.
2229  * @param flow
2230  *   Pointer to the rte_flow.
2231  * @param[out] error
2232  *   Perform verbose error reporting if not NULL.
2233  *
2234  * @return
2235  *   0 on success, a negative errno value otherwise and rte_errno is set.
2236  */
2237 static int
2238 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2239 			      struct mlx5_flow_parse *parser,
2240 			      struct rte_flow *flow,
2241 			      struct rte_flow_error *error)
2242 {
2243 	struct priv *priv __rte_unused = dev->data->dev_private;
2244 	int ret;
2245 	unsigned int i;
2246 	unsigned int flows_n = 0;
2247 
2248 	assert(priv->pd);
2249 	assert(priv->ctx);
2250 	assert(!parser->drop);
2251 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2252 	if (ret)
2253 		goto error;
2254 	if (parser->count)
2255 		flow->cs = parser->cs;
2256 	if (!dev->data->dev_started)
2257 		return 0;
2258 	for (i = 0; i != hash_rxq_init_n; ++i) {
2259 		if (!flow->frxq[i].hrxq)
2260 			continue;
2261 		flow->frxq[i].ibv_flow =
2262 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2263 					       flow->frxq[i].ibv_attr);
2264 		mlx5_flow_dump(dev, flow, i);
2265 		if (!flow->frxq[i].ibv_flow) {
2266 			rte_flow_error_set(error, ENOMEM,
2267 					   RTE_FLOW_ERROR_TYPE_HANDLE,
2268 					   NULL, "flow rule creation failure");
2269 			goto error;
2270 		}
2271 		++flows_n;
2272 	}
2273 	if (!flows_n) {
2274 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2275 				   NULL, "internal error in flow creation");
2276 		goto error;
2277 	}
2278 	mlx5_flow_create_update_rxqs(dev, flow);
2279 	return 0;
2280 error:
2281 	ret = rte_errno; /* Save rte_errno before cleanup. */
2282 	assert(flow);
2283 	for (i = 0; i != hash_rxq_init_n; ++i) {
2284 		if (flow->frxq[i].ibv_flow) {
2285 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2286 
2287 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2288 		}
2289 		if (flow->frxq[i].hrxq)
2290 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2291 		if (flow->frxq[i].ibv_attr)
2292 			rte_free(flow->frxq[i].ibv_attr);
2293 	}
2294 	if (flow->cs) {
2295 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2296 		flow->cs = NULL;
2297 		parser->cs = NULL;
2298 	}
2299 	rte_errno = ret; /* Restore rte_errno. */
2300 	return -rte_errno;
2301 }
2302 
2303 /**
2304  * Convert a flow.
2305  *
2306  * @param dev
2307  *   Pointer to Ethernet device.
2308  * @param list
2309  *   Pointer to a TAILQ flow list.
2310  * @param[in] attr
2311  *   Flow rule attributes.
2312  * @param[in] pattern
2313  *   Pattern specification (list terminated by the END pattern item).
2314  * @param[in] actions
2315  *   Associated actions (list terminated by the END action).
2316  * @param[out] error
2317  *   Perform verbose error reporting if not NULL.
2318  *
2319  * @return
2320  *   A flow on success, NULL otherwise and rte_errno is set.
2321  */
2322 static struct rte_flow *
2323 mlx5_flow_list_create(struct rte_eth_dev *dev,
2324 		      struct mlx5_flows *list,
2325 		      const struct rte_flow_attr *attr,
2326 		      const struct rte_flow_item items[],
2327 		      const struct rte_flow_action actions[],
2328 		      struct rte_flow_error *error)
2329 {
2330 	struct mlx5_flow_parse parser = { .create = 1, };
2331 	struct rte_flow *flow = NULL;
2332 	unsigned int i;
2333 	int ret;
2334 
2335 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2336 	if (ret)
2337 		goto exit;
2338 	flow = rte_calloc(__func__, 1,
2339 			  sizeof(*flow) +
2340 			  parser.rss_conf.queue_num * sizeof(uint16_t),
2341 			  0);
2342 	if (!flow) {
2343 		rte_flow_error_set(error, ENOMEM,
2344 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2345 				   NULL,
2346 				   "cannot allocate flow memory");
2347 		return NULL;
2348 	}
2349 	/* Copy configuration. */
2350 	flow->queues = (uint16_t (*)[])(flow + 1);
2351 	flow->tunnel = parser.tunnel;
2352 	flow->rss_conf = (struct rte_flow_action_rss){
2353 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2354 		.level = 0,
2355 		.types = parser.rss_conf.types,
2356 		.key_len = parser.rss_conf.key_len,
2357 		.queue_num = parser.rss_conf.queue_num,
2358 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
2359 			      sizeof(*parser.rss_conf.key) *
2360 			      parser.rss_conf.key_len),
2361 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
2362 				sizeof(*parser.rss_conf.queue) *
2363 				parser.rss_conf.queue_num),
2364 	};
2365 	flow->mark = parser.mark;
2366 	/* finalise the flow. */
2367 	if (parser.drop)
2368 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2369 							 error);
2370 	else
2371 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2372 	if (ret)
2373 		goto exit;
2374 	TAILQ_INSERT_TAIL(list, flow, next);
2375 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2376 		(void *)flow);
2377 	return flow;
2378 exit:
2379 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2380 		error->message);
2381 	for (i = 0; i != hash_rxq_init_n; ++i) {
2382 		if (parser.queue[i].ibv_attr)
2383 			rte_free(parser.queue[i].ibv_attr);
2384 	}
2385 	rte_free(flow);
2386 	return NULL;
2387 }
2388 
2389 /**
2390  * Validate a flow supported by the NIC.
2391  *
2392  * @see rte_flow_validate()
2393  * @see rte_flow_ops
2394  */
2395 int
2396 mlx5_flow_validate(struct rte_eth_dev *dev,
2397 		   const struct rte_flow_attr *attr,
2398 		   const struct rte_flow_item items[],
2399 		   const struct rte_flow_action actions[],
2400 		   struct rte_flow_error *error)
2401 {
2402 	struct mlx5_flow_parse parser = { .create = 0, };
2403 
2404 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2405 }
2406 
2407 /**
2408  * Create a flow.
2409  *
2410  * @see rte_flow_create()
2411  * @see rte_flow_ops
2412  */
2413 struct rte_flow *
2414 mlx5_flow_create(struct rte_eth_dev *dev,
2415 		 const struct rte_flow_attr *attr,
2416 		 const struct rte_flow_item items[],
2417 		 const struct rte_flow_action actions[],
2418 		 struct rte_flow_error *error)
2419 {
2420 	struct priv *priv = dev->data->dev_private;
2421 
2422 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2423 				     error);
2424 }
2425 
2426 /**
2427  * Destroy a flow in a list.
2428  *
2429  * @param dev
2430  *   Pointer to Ethernet device.
2431  * @param list
2432  *   Pointer to a TAILQ flow list.
2433  * @param[in] flow
2434  *   Flow to destroy.
2435  */
2436 static void
2437 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2438 		       struct rte_flow *flow)
2439 {
2440 	struct priv *priv = dev->data->dev_private;
2441 	unsigned int i;
2442 
2443 	if (flow->drop || !dev->data->dev_started)
2444 		goto free;
2445 	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2446 		/* Update queue tunnel type. */
2447 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2448 						 [(*flow->queues)[i]];
2449 		struct mlx5_rxq_ctrl *rxq_ctrl =
2450 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2451 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2452 
2453 		assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2454 		rxq_ctrl->tunnel_types[tunnel] -= 1;
2455 		if (!rxq_ctrl->tunnel_types[tunnel]) {
2456 			/* Update tunnel type. */
2457 			uint8_t j;
2458 			uint8_t types = 0;
2459 			uint8_t last;
2460 
2461 			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2462 				if (rxq_ctrl->tunnel_types[j]) {
2463 					types += 1;
2464 					last = j;
2465 				}
2466 			/* Keep same if more than one tunnel types left. */
2467 			if (types == 1)
2468 				rxq_data->tunnel = ptype_ext[last];
2469 			else if (types == 0)
2470 				/* No tunnel type left. */
2471 				rxq_data->tunnel = 0;
2472 		}
2473 	}
2474 	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2475 		struct rte_flow *tmp;
2476 		int mark = 0;
2477 
2478 		/*
2479 		 * To remove the mark from the queue, the queue must not be
2480 		 * present in any other marked flow (RSS or not).
2481 		 */
2482 		TAILQ_FOREACH(tmp, list, next) {
2483 			unsigned int j;
2484 			uint16_t *tqs = NULL;
2485 			uint16_t tq_n = 0;
2486 
2487 			if (!tmp->mark)
2488 				continue;
2489 			for (j = 0; j != hash_rxq_init_n; ++j) {
2490 				if (!tmp->frxq[j].hrxq)
2491 					continue;
2492 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2493 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2494 			}
2495 			if (!tq_n)
2496 				continue;
2497 			for (j = 0; (j != tq_n) && !mark; j++)
2498 				if (tqs[j] == (*flow->queues)[i])
2499 					mark = 1;
2500 		}
2501 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2502 	}
2503 free:
2504 	if (flow->drop) {
2505 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2506 			claim_zero(mlx5_glue->destroy_flow
2507 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2508 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2509 	} else {
2510 		for (i = 0; i != hash_rxq_init_n; ++i) {
2511 			struct mlx5_flow *frxq = &flow->frxq[i];
2512 
2513 			if (frxq->ibv_flow)
2514 				claim_zero(mlx5_glue->destroy_flow
2515 					   (frxq->ibv_flow));
2516 			if (frxq->hrxq)
2517 				mlx5_hrxq_release(dev, frxq->hrxq);
2518 			if (frxq->ibv_attr)
2519 				rte_free(frxq->ibv_attr);
2520 		}
2521 	}
2522 	if (flow->cs) {
2523 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2524 		flow->cs = NULL;
2525 	}
2526 	TAILQ_REMOVE(list, flow, next);
2527 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2528 		(void *)flow);
2529 	rte_free(flow);
2530 }
2531 
2532 /**
2533  * Destroy all flows.
2534  *
2535  * @param dev
2536  *   Pointer to Ethernet device.
2537  * @param list
2538  *   Pointer to a TAILQ flow list.
2539  */
2540 void
2541 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2542 {
2543 	while (!TAILQ_EMPTY(list)) {
2544 		struct rte_flow *flow;
2545 
2546 		flow = TAILQ_FIRST(list);
2547 		mlx5_flow_list_destroy(dev, list, flow);
2548 	}
2549 }
2550 
2551 /**
2552  * Create drop queue.
2553  *
2554  * @param dev
2555  *   Pointer to Ethernet device.
2556  *
2557  * @return
2558  *   0 on success, a negative errno value otherwise and rte_errno is set.
2559  */
2560 int
2561 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2562 {
2563 	struct priv *priv = dev->data->dev_private;
2564 	struct mlx5_hrxq_drop *fdq = NULL;
2565 
2566 	assert(priv->pd);
2567 	assert(priv->ctx);
2568 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2569 	if (!fdq) {
2570 		DRV_LOG(WARNING,
2571 			"port %u cannot allocate memory for drop queue",
2572 			dev->data->port_id);
2573 		rte_errno = ENOMEM;
2574 		return -rte_errno;
2575 	}
2576 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2577 	if (!fdq->cq) {
2578 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2579 			dev->data->port_id);
2580 		rte_errno = errno;
2581 		goto error;
2582 	}
2583 	fdq->wq = mlx5_glue->create_wq
2584 		(priv->ctx,
2585 		 &(struct ibv_wq_init_attr){
2586 			.wq_type = IBV_WQT_RQ,
2587 			.max_wr = 1,
2588 			.max_sge = 1,
2589 			.pd = priv->pd,
2590 			.cq = fdq->cq,
2591 		 });
2592 	if (!fdq->wq) {
2593 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2594 			dev->data->port_id);
2595 		rte_errno = errno;
2596 		goto error;
2597 	}
2598 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2599 		(priv->ctx,
2600 		 &(struct ibv_rwq_ind_table_init_attr){
2601 			.log_ind_tbl_size = 0,
2602 			.ind_tbl = &fdq->wq,
2603 			.comp_mask = 0,
2604 		 });
2605 	if (!fdq->ind_table) {
2606 		DRV_LOG(WARNING,
2607 			"port %u cannot allocate indirection table for drop"
2608 			" queue",
2609 			dev->data->port_id);
2610 		rte_errno = errno;
2611 		goto error;
2612 	}
2613 	fdq->qp = mlx5_glue->create_qp_ex
2614 		(priv->ctx,
2615 		 &(struct ibv_qp_init_attr_ex){
2616 			.qp_type = IBV_QPT_RAW_PACKET,
2617 			.comp_mask =
2618 				IBV_QP_INIT_ATTR_PD |
2619 				IBV_QP_INIT_ATTR_IND_TABLE |
2620 				IBV_QP_INIT_ATTR_RX_HASH,
2621 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2622 				.rx_hash_function =
2623 					IBV_RX_HASH_FUNC_TOEPLITZ,
2624 				.rx_hash_key_len = rss_hash_default_key_len,
2625 				.rx_hash_key = rss_hash_default_key,
2626 				.rx_hash_fields_mask = 0,
2627 				},
2628 			.rwq_ind_tbl = fdq->ind_table,
2629 			.pd = priv->pd
2630 		 });
2631 	if (!fdq->qp) {
2632 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2633 			dev->data->port_id);
2634 		rte_errno = errno;
2635 		goto error;
2636 	}
2637 	priv->flow_drop_queue = fdq;
2638 	return 0;
2639 error:
2640 	if (fdq->qp)
2641 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2642 	if (fdq->ind_table)
2643 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2644 	if (fdq->wq)
2645 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2646 	if (fdq->cq)
2647 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2648 	if (fdq)
2649 		rte_free(fdq);
2650 	priv->flow_drop_queue = NULL;
2651 	return -rte_errno;
2652 }
2653 
2654 /**
2655  * Delete drop queue.
2656  *
2657  * @param dev
2658  *   Pointer to Ethernet device.
2659  */
2660 void
2661 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2662 {
2663 	struct priv *priv = dev->data->dev_private;
2664 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2665 
2666 	if (!fdq)
2667 		return;
2668 	if (fdq->qp)
2669 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2670 	if (fdq->ind_table)
2671 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2672 	if (fdq->wq)
2673 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2674 	if (fdq->cq)
2675 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2676 	rte_free(fdq);
2677 	priv->flow_drop_queue = NULL;
2678 }
2679 
2680 /**
2681  * Remove all flows.
2682  *
2683  * @param dev
2684  *   Pointer to Ethernet device.
2685  * @param list
2686  *   Pointer to a TAILQ flow list.
2687  */
2688 void
2689 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2690 {
2691 	struct priv *priv = dev->data->dev_private;
2692 	struct rte_flow *flow;
2693 	unsigned int i;
2694 
2695 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2696 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2697 
2698 		if (flow->drop) {
2699 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2700 				continue;
2701 			claim_zero(mlx5_glue->destroy_flow
2702 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2703 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2704 			DRV_LOG(DEBUG, "port %u flow %p removed",
2705 				dev->data->port_id, (void *)flow);
2706 			/* Next flow. */
2707 			continue;
2708 		}
2709 		/* Verify the flow has not already been cleaned. */
2710 		for (i = 0; i != hash_rxq_init_n; ++i) {
2711 			if (!flow->frxq[i].ibv_flow)
2712 				continue;
2713 			/*
2714 			 * Indirection table may be necessary to remove the
2715 			 * flags in the Rx queues.
2716 			 * This helps to speed-up the process by avoiding
2717 			 * another loop.
2718 			 */
2719 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2720 			break;
2721 		}
2722 		if (i == hash_rxq_init_n)
2723 			return;
2724 		if (flow->mark) {
2725 			assert(ind_tbl);
2726 			for (i = 0; i != ind_tbl->queues_n; ++i)
2727 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2728 		}
2729 		for (i = 0; i != hash_rxq_init_n; ++i) {
2730 			if (!flow->frxq[i].ibv_flow)
2731 				continue;
2732 			claim_zero(mlx5_glue->destroy_flow
2733 				   (flow->frxq[i].ibv_flow));
2734 			flow->frxq[i].ibv_flow = NULL;
2735 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2736 			flow->frxq[i].hrxq = NULL;
2737 		}
2738 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2739 			(void *)flow);
2740 	}
2741 	/* Cleanup Rx queue tunnel info. */
2742 	for (i = 0; i != priv->rxqs_n; ++i) {
2743 		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2744 		struct mlx5_rxq_ctrl *rxq_ctrl =
2745 			container_of(q, struct mlx5_rxq_ctrl, rxq);
2746 
2747 		if (!q)
2748 			continue;
2749 		memset((void *)rxq_ctrl->tunnel_types, 0,
2750 		       sizeof(rxq_ctrl->tunnel_types));
2751 		q->tunnel = 0;
2752 	}
2753 }
2754 
2755 /**
2756  * Add all flows.
2757  *
2758  * @param dev
2759  *   Pointer to Ethernet device.
2760  * @param list
2761  *   Pointer to a TAILQ flow list.
2762  *
2763  * @return
2764  *   0 on success, a negative errno value otherwise and rte_errno is set.
2765  */
2766 int
2767 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2768 {
2769 	struct priv *priv = dev->data->dev_private;
2770 	struct rte_flow *flow;
2771 
2772 	TAILQ_FOREACH(flow, list, next) {
2773 		unsigned int i;
2774 
2775 		if (flow->drop) {
2776 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2777 				mlx5_glue->create_flow
2778 				(priv->flow_drop_queue->qp,
2779 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2780 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2781 				DRV_LOG(DEBUG,
2782 					"port %u flow %p cannot be applied",
2783 					dev->data->port_id, (void *)flow);
2784 				rte_errno = EINVAL;
2785 				return -rte_errno;
2786 			}
2787 			DRV_LOG(DEBUG, "port %u flow %p applied",
2788 				dev->data->port_id, (void *)flow);
2789 			/* Next flow. */
2790 			continue;
2791 		}
2792 		for (i = 0; i != hash_rxq_init_n; ++i) {
2793 			if (!flow->frxq[i].ibv_attr)
2794 				continue;
2795 			flow->frxq[i].hrxq =
2796 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2797 					      flow->rss_conf.key_len,
2798 					      flow->frxq[i].hash_fields,
2799 					      flow->rss_conf.queue,
2800 					      flow->rss_conf.queue_num,
2801 					      flow->tunnel,
2802 					      flow->rss_conf.level);
2803 			if (flow->frxq[i].hrxq)
2804 				goto flow_create;
2805 			flow->frxq[i].hrxq =
2806 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2807 					      flow->rss_conf.key_len,
2808 					      flow->frxq[i].hash_fields,
2809 					      flow->rss_conf.queue,
2810 					      flow->rss_conf.queue_num,
2811 					      flow->tunnel,
2812 					      flow->rss_conf.level);
2813 			if (!flow->frxq[i].hrxq) {
2814 				DRV_LOG(DEBUG,
2815 					"port %u flow %p cannot create hash"
2816 					" rxq",
2817 					dev->data->port_id, (void *)flow);
2818 				rte_errno = EINVAL;
2819 				return -rte_errno;
2820 			}
2821 flow_create:
2822 			mlx5_flow_dump(dev, flow, i);
2823 			flow->frxq[i].ibv_flow =
2824 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2825 						       flow->frxq[i].ibv_attr);
2826 			if (!flow->frxq[i].ibv_flow) {
2827 				DRV_LOG(DEBUG,
2828 					"port %u flow %p type %u cannot be"
2829 					" applied",
2830 					dev->data->port_id, (void *)flow, i);
2831 				rte_errno = EINVAL;
2832 				return -rte_errno;
2833 			}
2834 		}
2835 		mlx5_flow_create_update_rxqs(dev, flow);
2836 	}
2837 	return 0;
2838 }
2839 
2840 /**
2841  * Verify the flow list is empty
2842  *
2843  * @param dev
2844  *  Pointer to Ethernet device.
2845  *
2846  * @return the number of flows not released.
2847  */
2848 int
2849 mlx5_flow_verify(struct rte_eth_dev *dev)
2850 {
2851 	struct priv *priv = dev->data->dev_private;
2852 	struct rte_flow *flow;
2853 	int ret = 0;
2854 
2855 	TAILQ_FOREACH(flow, &priv->flows, next) {
2856 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2857 			dev->data->port_id, (void *)flow);
2858 		++ret;
2859 	}
2860 	return ret;
2861 }
2862 
2863 /**
2864  * Enable a control flow configured from the control plane.
2865  *
2866  * @param dev
2867  *   Pointer to Ethernet device.
2868  * @param eth_spec
2869  *   An Ethernet flow spec to apply.
2870  * @param eth_mask
2871  *   An Ethernet flow mask to apply.
2872  * @param vlan_spec
2873  *   A VLAN flow spec to apply.
2874  * @param vlan_mask
2875  *   A VLAN flow mask to apply.
2876  *
2877  * @return
2878  *   0 on success, a negative errno value otherwise and rte_errno is set.
2879  */
2880 int
2881 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2882 		    struct rte_flow_item_eth *eth_spec,
2883 		    struct rte_flow_item_eth *eth_mask,
2884 		    struct rte_flow_item_vlan *vlan_spec,
2885 		    struct rte_flow_item_vlan *vlan_mask)
2886 {
2887 	struct priv *priv = dev->data->dev_private;
2888 	const struct rte_flow_attr attr = {
2889 		.ingress = 1,
2890 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2891 	};
2892 	struct rte_flow_item items[] = {
2893 		{
2894 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2895 			.spec = eth_spec,
2896 			.last = NULL,
2897 			.mask = eth_mask,
2898 		},
2899 		{
2900 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2901 				RTE_FLOW_ITEM_TYPE_END,
2902 			.spec = vlan_spec,
2903 			.last = NULL,
2904 			.mask = vlan_mask,
2905 		},
2906 		{
2907 			.type = RTE_FLOW_ITEM_TYPE_END,
2908 		},
2909 	};
2910 	uint16_t queue[priv->reta_idx_n];
2911 	struct rte_flow_action_rss action_rss = {
2912 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2913 		.level = 0,
2914 		.types = priv->rss_conf.rss_hf,
2915 		.key_len = priv->rss_conf.rss_key_len,
2916 		.queue_num = priv->reta_idx_n,
2917 		.key = priv->rss_conf.rss_key,
2918 		.queue = queue,
2919 	};
2920 	struct rte_flow_action actions[] = {
2921 		{
2922 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2923 			.conf = &action_rss,
2924 		},
2925 		{
2926 			.type = RTE_FLOW_ACTION_TYPE_END,
2927 		},
2928 	};
2929 	struct rte_flow *flow;
2930 	struct rte_flow_error error;
2931 	unsigned int i;
2932 
2933 	if (!priv->reta_idx_n) {
2934 		rte_errno = EINVAL;
2935 		return -rte_errno;
2936 	}
2937 	for (i = 0; i != priv->reta_idx_n; ++i)
2938 		queue[i] = (*priv->reta_idx)[i];
2939 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2940 				     actions, &error);
2941 	if (!flow)
2942 		return -rte_errno;
2943 	return 0;
2944 }
2945 
2946 /**
2947  * Enable a flow control configured from the control plane.
2948  *
2949  * @param dev
2950  *   Pointer to Ethernet device.
2951  * @param eth_spec
2952  *   An Ethernet flow spec to apply.
2953  * @param eth_mask
2954  *   An Ethernet flow mask to apply.
2955  *
2956  * @return
2957  *   0 on success, a negative errno value otherwise and rte_errno is set.
2958  */
2959 int
2960 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2961 	       struct rte_flow_item_eth *eth_spec,
2962 	       struct rte_flow_item_eth *eth_mask)
2963 {
2964 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2965 }
2966 
2967 /**
2968  * Destroy a flow.
2969  *
2970  * @see rte_flow_destroy()
2971  * @see rte_flow_ops
2972  */
2973 int
2974 mlx5_flow_destroy(struct rte_eth_dev *dev,
2975 		  struct rte_flow *flow,
2976 		  struct rte_flow_error *error __rte_unused)
2977 {
2978 	struct priv *priv = dev->data->dev_private;
2979 
2980 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2981 	return 0;
2982 }
2983 
2984 /**
2985  * Destroy all flows.
2986  *
2987  * @see rte_flow_flush()
2988  * @see rte_flow_ops
2989  */
2990 int
2991 mlx5_flow_flush(struct rte_eth_dev *dev,
2992 		struct rte_flow_error *error __rte_unused)
2993 {
2994 	struct priv *priv = dev->data->dev_private;
2995 
2996 	mlx5_flow_list_flush(dev, &priv->flows);
2997 	return 0;
2998 }
2999 
3000 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3001 /**
3002  * Query flow counter.
3003  *
3004  * @param cs
3005  *   the counter set.
3006  * @param counter_value
3007  *   returned data from the counter.
3008  *
3009  * @return
3010  *   0 on success, a negative errno value otherwise and rte_errno is set.
3011  */
3012 static int
3013 mlx5_flow_query_count(struct ibv_counter_set *cs,
3014 		      struct mlx5_flow_counter_stats *counter_stats,
3015 		      struct rte_flow_query_count *query_count,
3016 		      struct rte_flow_error *error)
3017 {
3018 	uint64_t counters[2];
3019 	struct ibv_query_counter_set_attr query_cs_attr = {
3020 		.cs = cs,
3021 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3022 	};
3023 	struct ibv_counter_set_data query_out = {
3024 		.out = counters,
3025 		.outlen = 2 * sizeof(uint64_t),
3026 	};
3027 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3028 
3029 	if (err)
3030 		return rte_flow_error_set(error, err,
3031 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3032 					  NULL,
3033 					  "cannot read counter");
3034 	query_count->hits_set = 1;
3035 	query_count->bytes_set = 1;
3036 	query_count->hits = counters[0] - counter_stats->hits;
3037 	query_count->bytes = counters[1] - counter_stats->bytes;
3038 	if (query_count->reset) {
3039 		counter_stats->hits = counters[0];
3040 		counter_stats->bytes = counters[1];
3041 	}
3042 	return 0;
3043 }
3044 
3045 /**
3046  * Query a flows.
3047  *
3048  * @see rte_flow_query()
3049  * @see rte_flow_ops
3050  */
3051 int
3052 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3053 		struct rte_flow *flow,
3054 		const struct rte_flow_action *action __rte_unused,
3055 		void *data,
3056 		struct rte_flow_error *error)
3057 {
3058 	if (flow->cs) {
3059 		int ret;
3060 
3061 		ret = mlx5_flow_query_count(flow->cs,
3062 					    &flow->counter_stats,
3063 					    (struct rte_flow_query_count *)data,
3064 					    error);
3065 		if (ret)
3066 			return ret;
3067 	} else {
3068 		return rte_flow_error_set(error, EINVAL,
3069 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3070 					  NULL,
3071 					  "no counter found for flow");
3072 	}
3073 	return 0;
3074 }
3075 #endif
3076 
3077 /**
3078  * Isolated mode.
3079  *
3080  * @see rte_flow_isolate()
3081  * @see rte_flow_ops
3082  */
3083 int
3084 mlx5_flow_isolate(struct rte_eth_dev *dev,
3085 		  int enable,
3086 		  struct rte_flow_error *error)
3087 {
3088 	struct priv *priv = dev->data->dev_private;
3089 
3090 	if (dev->data->dev_started) {
3091 		rte_flow_error_set(error, EBUSY,
3092 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3093 				   NULL,
3094 				   "port must be stopped first");
3095 		return -rte_errno;
3096 	}
3097 	priv->isolated = !!enable;
3098 	if (enable)
3099 		dev->dev_ops = &mlx5_dev_ops_isolate;
3100 	else
3101 		dev->dev_ops = &mlx5_dev_ops;
3102 	return 0;
3103 }
3104 
3105 /**
3106  * Convert a flow director filter to a generic flow.
3107  *
3108  * @param dev
3109  *   Pointer to Ethernet device.
3110  * @param fdir_filter
3111  *   Flow director filter to add.
3112  * @param attributes
3113  *   Generic flow parameters structure.
3114  *
3115  * @return
3116  *   0 on success, a negative errno value otherwise and rte_errno is set.
3117  */
3118 static int
3119 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3120 			 const struct rte_eth_fdir_filter *fdir_filter,
3121 			 struct mlx5_fdir *attributes)
3122 {
3123 	struct priv *priv = dev->data->dev_private;
3124 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
3125 	const struct rte_eth_fdir_masks *mask =
3126 		&dev->data->dev_conf.fdir_conf.mask;
3127 
3128 	/* Validate queue number. */
3129 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3130 		DRV_LOG(ERR, "port %u invalid queue number %d",
3131 			dev->data->port_id, fdir_filter->action.rx_queue);
3132 		rte_errno = EINVAL;
3133 		return -rte_errno;
3134 	}
3135 	attributes->attr.ingress = 1;
3136 	attributes->items[0] = (struct rte_flow_item) {
3137 		.type = RTE_FLOW_ITEM_TYPE_ETH,
3138 		.spec = &attributes->l2,
3139 		.mask = &attributes->l2_mask,
3140 	};
3141 	switch (fdir_filter->action.behavior) {
3142 	case RTE_ETH_FDIR_ACCEPT:
3143 		attributes->actions[0] = (struct rte_flow_action){
3144 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
3145 			.conf = &attributes->queue,
3146 		};
3147 		break;
3148 	case RTE_ETH_FDIR_REJECT:
3149 		attributes->actions[0] = (struct rte_flow_action){
3150 			.type = RTE_FLOW_ACTION_TYPE_DROP,
3151 		};
3152 		break;
3153 	default:
3154 		DRV_LOG(ERR, "port %u invalid behavior %d",
3155 			dev->data->port_id,
3156 			fdir_filter->action.behavior);
3157 		rte_errno = ENOTSUP;
3158 		return -rte_errno;
3159 	}
3160 	attributes->queue.index = fdir_filter->action.rx_queue;
3161 	/* Handle L3. */
3162 	switch (fdir_filter->input.flow_type) {
3163 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3164 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3165 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3166 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3167 			.src_addr = input->flow.ip4_flow.src_ip,
3168 			.dst_addr = input->flow.ip4_flow.dst_ip,
3169 			.time_to_live = input->flow.ip4_flow.ttl,
3170 			.type_of_service = input->flow.ip4_flow.tos,
3171 			.next_proto_id = input->flow.ip4_flow.proto,
3172 		};
3173 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3174 			.src_addr = mask->ipv4_mask.src_ip,
3175 			.dst_addr = mask->ipv4_mask.dst_ip,
3176 			.time_to_live = mask->ipv4_mask.ttl,
3177 			.type_of_service = mask->ipv4_mask.tos,
3178 			.next_proto_id = mask->ipv4_mask.proto,
3179 		};
3180 		attributes->items[1] = (struct rte_flow_item){
3181 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
3182 			.spec = &attributes->l3,
3183 			.mask = &attributes->l3_mask,
3184 		};
3185 		break;
3186 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3187 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3188 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3189 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3190 			.hop_limits = input->flow.ipv6_flow.hop_limits,
3191 			.proto = input->flow.ipv6_flow.proto,
3192 		};
3193 
3194 		memcpy(attributes->l3.ipv6.hdr.src_addr,
3195 		       input->flow.ipv6_flow.src_ip,
3196 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3197 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
3198 		       input->flow.ipv6_flow.dst_ip,
3199 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3200 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3201 		       mask->ipv6_mask.src_ip,
3202 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3203 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3204 		       mask->ipv6_mask.dst_ip,
3205 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3206 		attributes->items[1] = (struct rte_flow_item){
3207 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
3208 			.spec = &attributes->l3,
3209 			.mask = &attributes->l3_mask,
3210 		};
3211 		break;
3212 	default:
3213 		DRV_LOG(ERR, "port %u invalid flow type%d",
3214 			dev->data->port_id, fdir_filter->input.flow_type);
3215 		rte_errno = ENOTSUP;
3216 		return -rte_errno;
3217 	}
3218 	/* Handle L4. */
3219 	switch (fdir_filter->input.flow_type) {
3220 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3221 		attributes->l4.udp.hdr = (struct udp_hdr){
3222 			.src_port = input->flow.udp4_flow.src_port,
3223 			.dst_port = input->flow.udp4_flow.dst_port,
3224 		};
3225 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3226 			.src_port = mask->src_port_mask,
3227 			.dst_port = mask->dst_port_mask,
3228 		};
3229 		attributes->items[2] = (struct rte_flow_item){
3230 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3231 			.spec = &attributes->l4,
3232 			.mask = &attributes->l4_mask,
3233 		};
3234 		break;
3235 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3236 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3237 			.src_port = input->flow.tcp4_flow.src_port,
3238 			.dst_port = input->flow.tcp4_flow.dst_port,
3239 		};
3240 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3241 			.src_port = mask->src_port_mask,
3242 			.dst_port = mask->dst_port_mask,
3243 		};
3244 		attributes->items[2] = (struct rte_flow_item){
3245 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3246 			.spec = &attributes->l4,
3247 			.mask = &attributes->l4_mask,
3248 		};
3249 		break;
3250 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3251 		attributes->l4.udp.hdr = (struct udp_hdr){
3252 			.src_port = input->flow.udp6_flow.src_port,
3253 			.dst_port = input->flow.udp6_flow.dst_port,
3254 		};
3255 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3256 			.src_port = mask->src_port_mask,
3257 			.dst_port = mask->dst_port_mask,
3258 		};
3259 		attributes->items[2] = (struct rte_flow_item){
3260 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3261 			.spec = &attributes->l4,
3262 			.mask = &attributes->l4_mask,
3263 		};
3264 		break;
3265 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3266 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3267 			.src_port = input->flow.tcp6_flow.src_port,
3268 			.dst_port = input->flow.tcp6_flow.dst_port,
3269 		};
3270 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3271 			.src_port = mask->src_port_mask,
3272 			.dst_port = mask->dst_port_mask,
3273 		};
3274 		attributes->items[2] = (struct rte_flow_item){
3275 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3276 			.spec = &attributes->l4,
3277 			.mask = &attributes->l4_mask,
3278 		};
3279 		break;
3280 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3281 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3282 		break;
3283 	default:
3284 		DRV_LOG(ERR, "port %u invalid flow type%d",
3285 			dev->data->port_id, fdir_filter->input.flow_type);
3286 		rte_errno = ENOTSUP;
3287 		return -rte_errno;
3288 	}
3289 	return 0;
3290 }
3291 
3292 /**
3293  * Add new flow director filter and store it in list.
3294  *
3295  * @param dev
3296  *   Pointer to Ethernet device.
3297  * @param fdir_filter
3298  *   Flow director filter to add.
3299  *
3300  * @return
3301  *   0 on success, a negative errno value otherwise and rte_errno is set.
3302  */
3303 static int
3304 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3305 		     const struct rte_eth_fdir_filter *fdir_filter)
3306 {
3307 	struct priv *priv = dev->data->dev_private;
3308 	struct mlx5_fdir attributes = {
3309 		.attr.group = 0,
3310 		.l2_mask = {
3311 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3312 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3313 			.type = 0,
3314 		},
3315 	};
3316 	struct mlx5_flow_parse parser = {
3317 		.layer = HASH_RXQ_ETH,
3318 	};
3319 	struct rte_flow_error error;
3320 	struct rte_flow *flow;
3321 	int ret;
3322 
3323 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3324 	if (ret)
3325 		return ret;
3326 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3327 				attributes.actions, &error, &parser);
3328 	if (ret)
3329 		return ret;
3330 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3331 				     attributes.items, attributes.actions,
3332 				     &error);
3333 	if (flow) {
3334 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3335 			(void *)flow);
3336 		return 0;
3337 	}
3338 	return -rte_errno;
3339 }
3340 
3341 /**
3342  * Delete specific filter.
3343  *
3344  * @param dev
3345  *   Pointer to Ethernet device.
3346  * @param fdir_filter
3347  *   Filter to be deleted.
3348  *
3349  * @return
3350  *   0 on success, a negative errno value otherwise and rte_errno is set.
3351  */
3352 static int
3353 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3354 			const struct rte_eth_fdir_filter *fdir_filter)
3355 {
3356 	struct priv *priv = dev->data->dev_private;
3357 	struct mlx5_fdir attributes = {
3358 		.attr.group = 0,
3359 	};
3360 	struct mlx5_flow_parse parser = {
3361 		.create = 1,
3362 		.layer = HASH_RXQ_ETH,
3363 	};
3364 	struct rte_flow_error error;
3365 	struct rte_flow *flow;
3366 	unsigned int i;
3367 	int ret;
3368 
3369 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3370 	if (ret)
3371 		return ret;
3372 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3373 				attributes.actions, &error, &parser);
3374 	if (ret)
3375 		goto exit;
3376 	/*
3377 	 * Special case for drop action which is only set in the
3378 	 * specifications when the flow is created.  In this situation the
3379 	 * drop specification is missing.
3380 	 */
3381 	if (parser.drop) {
3382 		struct ibv_flow_spec_action_drop *drop;
3383 
3384 		drop = (void *)((uintptr_t)parser.queue[parser.layer].ibv_attr +
3385 				parser.queue[parser.layer].offset);
3386 		*drop = (struct ibv_flow_spec_action_drop){
3387 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3388 			.size = sizeof(struct ibv_flow_spec_action_drop),
3389 		};
3390 		parser.queue[parser.layer].ibv_attr->num_of_specs++;
3391 	}
3392 	TAILQ_FOREACH(flow, &priv->flows, next) {
3393 		struct ibv_flow_attr *attr;
3394 		struct ibv_spec_header *attr_h;
3395 		void *spec;
3396 		struct ibv_flow_attr *flow_attr;
3397 		struct ibv_spec_header *flow_h;
3398 		void *flow_spec;
3399 		unsigned int specs_n;
3400 
3401 		attr = parser.queue[parser.layer].ibv_attr;
3402 		flow_attr = flow->frxq[parser.layer].ibv_attr;
3403 		/* Compare first the attributes. */
3404 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3405 			continue;
3406 		if (attr->num_of_specs == 0)
3407 			continue;
3408 		spec = (void *)((uintptr_t)attr +
3409 				sizeof(struct ibv_flow_attr));
3410 		flow_spec = (void *)((uintptr_t)flow_attr +
3411 				     sizeof(struct ibv_flow_attr));
3412 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3413 		for (i = 0; i != specs_n; ++i) {
3414 			attr_h = spec;
3415 			flow_h = flow_spec;
3416 			if (memcmp(spec, flow_spec,
3417 				   RTE_MIN(attr_h->size, flow_h->size)))
3418 				goto wrong_flow;
3419 			spec = (void *)((uintptr_t)spec + attr_h->size);
3420 			flow_spec = (void *)((uintptr_t)flow_spec +
3421 					     flow_h->size);
3422 		}
3423 		/* At this point, the flow match. */
3424 		break;
3425 wrong_flow:
3426 		/* The flow does not match. */
3427 		continue;
3428 	}
3429 	ret = rte_errno; /* Save rte_errno before cleanup. */
3430 	if (flow)
3431 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
3432 exit:
3433 	for (i = 0; i != hash_rxq_init_n; ++i) {
3434 		if (parser.queue[i].ibv_attr)
3435 			rte_free(parser.queue[i].ibv_attr);
3436 	}
3437 	rte_errno = ret; /* Restore rte_errno. */
3438 	return -rte_errno;
3439 }
3440 
3441 /**
3442  * Update queue for specific filter.
3443  *
3444  * @param dev
3445  *   Pointer to Ethernet device.
3446  * @param fdir_filter
3447  *   Filter to be updated.
3448  *
3449  * @return
3450  *   0 on success, a negative errno value otherwise and rte_errno is set.
3451  */
3452 static int
3453 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3454 			const struct rte_eth_fdir_filter *fdir_filter)
3455 {
3456 	int ret;
3457 
3458 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3459 	if (ret)
3460 		return ret;
3461 	return mlx5_fdir_filter_add(dev, fdir_filter);
3462 }
3463 
3464 /**
3465  * Flush all filters.
3466  *
3467  * @param dev
3468  *   Pointer to Ethernet device.
3469  */
3470 static void
3471 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3472 {
3473 	struct priv *priv = dev->data->dev_private;
3474 
3475 	mlx5_flow_list_flush(dev, &priv->flows);
3476 }
3477 
3478 /**
3479  * Get flow director information.
3480  *
3481  * @param dev
3482  *   Pointer to Ethernet device.
3483  * @param[out] fdir_info
3484  *   Resulting flow director information.
3485  */
3486 static void
3487 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3488 {
3489 	struct rte_eth_fdir_masks *mask =
3490 		&dev->data->dev_conf.fdir_conf.mask;
3491 
3492 	fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3493 	fdir_info->guarant_spc = 0;
3494 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3495 	fdir_info->max_flexpayload = 0;
3496 	fdir_info->flow_types_mask[0] = 0;
3497 	fdir_info->flex_payload_unit = 0;
3498 	fdir_info->max_flex_payload_segment_num = 0;
3499 	fdir_info->flex_payload_limit = 0;
3500 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3501 }
3502 
3503 /**
3504  * Deal with flow director operations.
3505  *
3506  * @param dev
3507  *   Pointer to Ethernet device.
3508  * @param filter_op
3509  *   Operation to perform.
3510  * @param arg
3511  *   Pointer to operation-specific structure.
3512  *
3513  * @return
3514  *   0 on success, a negative errno value otherwise and rte_errno is set.
3515  */
3516 static int
3517 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3518 		    void *arg)
3519 {
3520 	enum rte_fdir_mode fdir_mode =
3521 		dev->data->dev_conf.fdir_conf.mode;
3522 
3523 	if (filter_op == RTE_ETH_FILTER_NOP)
3524 		return 0;
3525 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3526 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3527 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3528 			dev->data->port_id, fdir_mode);
3529 		rte_errno = EINVAL;
3530 		return -rte_errno;
3531 	}
3532 	switch (filter_op) {
3533 	case RTE_ETH_FILTER_ADD:
3534 		return mlx5_fdir_filter_add(dev, arg);
3535 	case RTE_ETH_FILTER_UPDATE:
3536 		return mlx5_fdir_filter_update(dev, arg);
3537 	case RTE_ETH_FILTER_DELETE:
3538 		return mlx5_fdir_filter_delete(dev, arg);
3539 	case RTE_ETH_FILTER_FLUSH:
3540 		mlx5_fdir_filter_flush(dev);
3541 		break;
3542 	case RTE_ETH_FILTER_INFO:
3543 		mlx5_fdir_info_get(dev, arg);
3544 		break;
3545 	default:
3546 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3547 			dev->data->port_id, filter_op);
3548 		rte_errno = EINVAL;
3549 		return -rte_errno;
3550 	}
3551 	return 0;
3552 }
3553 
3554 /**
3555  * Manage filter operations.
3556  *
3557  * @param dev
3558  *   Pointer to Ethernet device structure.
3559  * @param filter_type
3560  *   Filter type.
3561  * @param filter_op
3562  *   Operation to perform.
3563  * @param arg
3564  *   Pointer to operation-specific structure.
3565  *
3566  * @return
3567  *   0 on success, a negative errno value otherwise and rte_errno is set.
3568  */
3569 int
3570 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3571 		     enum rte_filter_type filter_type,
3572 		     enum rte_filter_op filter_op,
3573 		     void *arg)
3574 {
3575 	switch (filter_type) {
3576 	case RTE_ETH_FILTER_GENERIC:
3577 		if (filter_op != RTE_ETH_FILTER_GET) {
3578 			rte_errno = EINVAL;
3579 			return -rte_errno;
3580 		}
3581 		*(const void **)arg = &mlx5_flow_ops;
3582 		return 0;
3583 	case RTE_ETH_FILTER_FDIR:
3584 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3585 	default:
3586 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3587 			dev->data->port_id, filter_type);
3588 		rte_errno = ENOTSUP;
3589 		return -rte_errno;
3590 	}
3591 	return 0;
3592 }
3593 
3594 /**
3595  * Detect number of Verbs flow priorities supported.
3596  *
3597  * @param dev
3598  *   Pointer to Ethernet device.
3599  *
3600  * @return
3601  *   number of supported Verbs flow priority.
3602  */
3603 unsigned int
3604 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3605 {
3606 	struct priv *priv = dev->data->dev_private;
3607 	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3608 	struct {
3609 		struct ibv_flow_attr attr;
3610 		struct ibv_flow_spec_eth eth;
3611 		struct ibv_flow_spec_action_drop drop;
3612 	} flow_attr = {
3613 		.attr = {
3614 			.num_of_specs = 2,
3615 		},
3616 		.eth = {
3617 			.type = IBV_FLOW_SPEC_ETH,
3618 			.size = sizeof(struct ibv_flow_spec_eth),
3619 		},
3620 		.drop = {
3621 			.size = sizeof(struct ibv_flow_spec_action_drop),
3622 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3623 		},
3624 	};
3625 	struct ibv_flow *flow;
3626 
3627 	do {
3628 		flow_attr.attr.priority = verb_priorities - 1;
3629 		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3630 					      &flow_attr.attr);
3631 		if (flow) {
3632 			claim_zero(mlx5_glue->destroy_flow(flow));
3633 			/* Try more priorities. */
3634 			verb_priorities *= 2;
3635 		} else {
3636 			/* Failed, restore last right number. */
3637 			verb_priorities /= 2;
3638 			break;
3639 		}
3640 	} while (1);
3641 	DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3642 		" user flow priorities: %d",
3643 		dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3644 	return verb_priorities;
3645 }
3646