xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 1f106da2bf7b6461a18601abbda36de11920dfcd)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36 
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41 
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44 	int dummy;
45 };
46 #endif
47 
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51 
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 	struct rte_eth_dev *dev; /** Ethernet device. */
55 	struct mlx5_flow_parse *parser; /** Parser context. */
56 	struct rte_flow_error *error; /** Error context. */
57 };
58 
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 		     const void *default_mask,
62 		     struct mlx5_flow_data *data);
63 
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 		      const void *default_mask,
67 		      struct mlx5_flow_data *data);
68 
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 		      const void *default_mask,
72 		      struct mlx5_flow_data *data);
73 
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 		      const void *default_mask,
77 		      struct mlx5_flow_data *data);
78 
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 		     const void *default_mask,
82 		     struct mlx5_flow_data *data);
83 
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 		     const void *default_mask,
87 		     struct mlx5_flow_data *data);
88 
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 		       const void *default_mask,
92 		       struct mlx5_flow_data *data);
93 
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 			   const void *default_mask,
97 			   struct mlx5_flow_data *data);
98 
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 		     const void *default_mask,
102 		     struct mlx5_flow_data *data);
103 
104 static int
105 mlx5_flow_create_mpls(const struct rte_flow_item *item,
106 		      const void *default_mask,
107 		      struct mlx5_flow_data *data);
108 
109 struct mlx5_flow_parse;
110 
111 static void
112 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
113 		      unsigned int size);
114 
115 static int
116 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
117 
118 static int
119 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
120 
121 /* Hash RX queue types. */
122 enum hash_rxq_type {
123 	HASH_RXQ_TCPV4,
124 	HASH_RXQ_UDPV4,
125 	HASH_RXQ_IPV4,
126 	HASH_RXQ_TCPV6,
127 	HASH_RXQ_UDPV6,
128 	HASH_RXQ_IPV6,
129 	HASH_RXQ_ETH,
130 	HASH_RXQ_TUNNEL,
131 };
132 
133 /* Initialization data for hash RX queue. */
134 struct hash_rxq_init {
135 	uint64_t hash_fields; /* Fields that participate in the hash. */
136 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
137 	unsigned int flow_priority; /* Flow priority to use. */
138 	unsigned int ip_version; /* Internet protocol. */
139 };
140 
141 /* Initialization data for hash RX queues. */
142 const struct hash_rxq_init hash_rxq_init[] = {
143 	[HASH_RXQ_TCPV4] = {
144 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 				IBV_RX_HASH_DST_IPV4 |
146 				IBV_RX_HASH_SRC_PORT_TCP |
147 				IBV_RX_HASH_DST_PORT_TCP),
148 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
149 		.flow_priority = 0,
150 		.ip_version = MLX5_IPV4,
151 	},
152 	[HASH_RXQ_UDPV4] = {
153 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
154 				IBV_RX_HASH_DST_IPV4 |
155 				IBV_RX_HASH_SRC_PORT_UDP |
156 				IBV_RX_HASH_DST_PORT_UDP),
157 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
158 		.flow_priority = 0,
159 		.ip_version = MLX5_IPV4,
160 	},
161 	[HASH_RXQ_IPV4] = {
162 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
163 				IBV_RX_HASH_DST_IPV4),
164 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
165 				ETH_RSS_FRAG_IPV4),
166 		.flow_priority = 1,
167 		.ip_version = MLX5_IPV4,
168 	},
169 	[HASH_RXQ_TCPV6] = {
170 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 				IBV_RX_HASH_DST_IPV6 |
172 				IBV_RX_HASH_SRC_PORT_TCP |
173 				IBV_RX_HASH_DST_PORT_TCP),
174 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
175 		.flow_priority = 0,
176 		.ip_version = MLX5_IPV6,
177 	},
178 	[HASH_RXQ_UDPV6] = {
179 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
180 				IBV_RX_HASH_DST_IPV6 |
181 				IBV_RX_HASH_SRC_PORT_UDP |
182 				IBV_RX_HASH_DST_PORT_UDP),
183 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
184 		.flow_priority = 0,
185 		.ip_version = MLX5_IPV6,
186 	},
187 	[HASH_RXQ_IPV6] = {
188 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
189 				IBV_RX_HASH_DST_IPV6),
190 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
191 				ETH_RSS_FRAG_IPV6),
192 		.flow_priority = 1,
193 		.ip_version = MLX5_IPV6,
194 	},
195 	[HASH_RXQ_ETH] = {
196 		.hash_fields = 0,
197 		.dpdk_rss_hf = 0,
198 		.flow_priority = 2,
199 	},
200 };
201 
202 /* Number of entries in hash_rxq_init[]. */
203 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
204 
205 /** Structure for holding counter stats. */
206 struct mlx5_flow_counter_stats {
207 	uint64_t hits; /**< Number of packets matched by the rule. */
208 	uint64_t bytes; /**< Number of bytes matched by the rule. */
209 };
210 
211 /** Structure for Drop queue. */
212 struct mlx5_hrxq_drop {
213 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
214 	struct ibv_qp *qp; /**< Verbs queue pair. */
215 	struct ibv_wq *wq; /**< Verbs work queue. */
216 	struct ibv_cq *cq; /**< Verbs completion queue. */
217 };
218 
219 /* Flows structures. */
220 struct mlx5_flow {
221 	uint64_t hash_fields; /**< Fields that participate in the hash. */
222 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
223 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
225 };
226 
227 /* Drop flows structures. */
228 struct mlx5_flow_drop {
229 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
230 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
231 };
232 
233 struct rte_flow {
234 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
235 	uint32_t mark:1; /**< Set if the flow is marked. */
236 	uint32_t drop:1; /**< Drop queue. */
237 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
238 	uint16_t (*queues)[]; /**< Queues indexes to use. */
239 	uint8_t rss_key[40]; /**< copy of the RSS key. */
240 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
241 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
242 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
243 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
244 	/**< Flow with Rx queue. */
245 };
246 
247 /** Static initializer for items. */
248 #define ITEMS(...) \
249 	(const enum rte_flow_item_type []){ \
250 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
251 	}
252 
253 #define IS_TUNNEL(type) ( \
254 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
255 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
256 	(type) == RTE_FLOW_ITEM_TYPE_GRE || \
257 	(type) == RTE_FLOW_ITEM_TYPE_MPLS)
258 
259 const uint32_t flow_ptype[] = {
260 	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
261 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
262 	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
263 	[RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
264 };
265 
266 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
267 
268 const uint32_t ptype_ext[] = {
269 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
270 					      RTE_PTYPE_L4_UDP,
271 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]	= RTE_PTYPE_TUNNEL_VXLAN_GPE |
272 						  RTE_PTYPE_L4_UDP,
273 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
274 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
275 		RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
276 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
277 		RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
278 };
279 
280 /** Structure to generate a simple graph of layers supported by the NIC. */
281 struct mlx5_flow_items {
282 	/** List of possible actions for these items. */
283 	const enum rte_flow_action_type *const actions;
284 	/** Bit-masks corresponding to the possibilities for the item. */
285 	const void *mask;
286 	/**
287 	 * Default bit-masks to use when item->mask is not provided. When
288 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
289 	 * used instead.
290 	 */
291 	const void *default_mask;
292 	/** Bit-masks size in bytes. */
293 	const unsigned int mask_sz;
294 	/**
295 	 * Conversion function from rte_flow to NIC specific flow.
296 	 *
297 	 * @param item
298 	 *   rte_flow item to convert.
299 	 * @param default_mask
300 	 *   Default bit-masks to use when item->mask is not provided.
301 	 * @param data
302 	 *   Internal structure to store the conversion.
303 	 *
304 	 * @return
305 	 *   0 on success, a negative errno value otherwise and rte_errno is
306 	 *   set.
307 	 */
308 	int (*convert)(const struct rte_flow_item *item,
309 		       const void *default_mask,
310 		       struct mlx5_flow_data *data);
311 	/** Size in bytes of the destination structure. */
312 	const unsigned int dst_sz;
313 	/** List of possible following items.  */
314 	const enum rte_flow_item_type *const items;
315 };
316 
317 /** Valid action for this PMD. */
318 static const enum rte_flow_action_type valid_actions[] = {
319 	RTE_FLOW_ACTION_TYPE_DROP,
320 	RTE_FLOW_ACTION_TYPE_QUEUE,
321 	RTE_FLOW_ACTION_TYPE_MARK,
322 	RTE_FLOW_ACTION_TYPE_FLAG,
323 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
324 	RTE_FLOW_ACTION_TYPE_COUNT,
325 #endif
326 	RTE_FLOW_ACTION_TYPE_END,
327 };
328 
329 /** Graph of supported items and associated actions. */
330 static const struct mlx5_flow_items mlx5_flow_items[] = {
331 	[RTE_FLOW_ITEM_TYPE_END] = {
332 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
333 			       RTE_FLOW_ITEM_TYPE_VXLAN,
334 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
335 			       RTE_FLOW_ITEM_TYPE_GRE),
336 	},
337 	[RTE_FLOW_ITEM_TYPE_ETH] = {
338 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
339 			       RTE_FLOW_ITEM_TYPE_IPV4,
340 			       RTE_FLOW_ITEM_TYPE_IPV6),
341 		.actions = valid_actions,
342 		.mask = &(const struct rte_flow_item_eth){
343 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
344 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
345 			.type = -1,
346 		},
347 		.default_mask = &rte_flow_item_eth_mask,
348 		.mask_sz = sizeof(struct rte_flow_item_eth),
349 		.convert = mlx5_flow_create_eth,
350 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
351 	},
352 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
353 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
354 			       RTE_FLOW_ITEM_TYPE_IPV6),
355 		.actions = valid_actions,
356 		.mask = &(const struct rte_flow_item_vlan){
357 			.tci = -1,
358 			.inner_type = -1,
359 		},
360 		.default_mask = &rte_flow_item_vlan_mask,
361 		.mask_sz = sizeof(struct rte_flow_item_vlan),
362 		.convert = mlx5_flow_create_vlan,
363 		.dst_sz = 0,
364 	},
365 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
366 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
367 			       RTE_FLOW_ITEM_TYPE_TCP,
368 			       RTE_FLOW_ITEM_TYPE_GRE),
369 		.actions = valid_actions,
370 		.mask = &(const struct rte_flow_item_ipv4){
371 			.hdr = {
372 				.src_addr = -1,
373 				.dst_addr = -1,
374 				.type_of_service = -1,
375 				.next_proto_id = -1,
376 			},
377 		},
378 		.default_mask = &rte_flow_item_ipv4_mask,
379 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
380 		.convert = mlx5_flow_create_ipv4,
381 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
382 	},
383 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
384 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
385 			       RTE_FLOW_ITEM_TYPE_TCP,
386 			       RTE_FLOW_ITEM_TYPE_GRE),
387 		.actions = valid_actions,
388 		.mask = &(const struct rte_flow_item_ipv6){
389 			.hdr = {
390 				.src_addr = {
391 					0xff, 0xff, 0xff, 0xff,
392 					0xff, 0xff, 0xff, 0xff,
393 					0xff, 0xff, 0xff, 0xff,
394 					0xff, 0xff, 0xff, 0xff,
395 				},
396 				.dst_addr = {
397 					0xff, 0xff, 0xff, 0xff,
398 					0xff, 0xff, 0xff, 0xff,
399 					0xff, 0xff, 0xff, 0xff,
400 					0xff, 0xff, 0xff, 0xff,
401 				},
402 				.vtc_flow = -1,
403 				.proto = -1,
404 				.hop_limits = -1,
405 			},
406 		},
407 		.default_mask = &rte_flow_item_ipv6_mask,
408 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
409 		.convert = mlx5_flow_create_ipv6,
410 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
411 	},
412 	[RTE_FLOW_ITEM_TYPE_UDP] = {
413 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
414 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
415 			       RTE_FLOW_ITEM_TYPE_MPLS),
416 		.actions = valid_actions,
417 		.mask = &(const struct rte_flow_item_udp){
418 			.hdr = {
419 				.src_port = -1,
420 				.dst_port = -1,
421 			},
422 		},
423 		.default_mask = &rte_flow_item_udp_mask,
424 		.mask_sz = sizeof(struct rte_flow_item_udp),
425 		.convert = mlx5_flow_create_udp,
426 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
427 	},
428 	[RTE_FLOW_ITEM_TYPE_TCP] = {
429 		.actions = valid_actions,
430 		.mask = &(const struct rte_flow_item_tcp){
431 			.hdr = {
432 				.src_port = -1,
433 				.dst_port = -1,
434 			},
435 		},
436 		.default_mask = &rte_flow_item_tcp_mask,
437 		.mask_sz = sizeof(struct rte_flow_item_tcp),
438 		.convert = mlx5_flow_create_tcp,
439 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
440 	},
441 	[RTE_FLOW_ITEM_TYPE_GRE] = {
442 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
443 			       RTE_FLOW_ITEM_TYPE_IPV4,
444 			       RTE_FLOW_ITEM_TYPE_IPV6,
445 			       RTE_FLOW_ITEM_TYPE_MPLS),
446 		.actions = valid_actions,
447 		.mask = &(const struct rte_flow_item_gre){
448 			.protocol = -1,
449 		},
450 		.default_mask = &rte_flow_item_gre_mask,
451 		.mask_sz = sizeof(struct rte_flow_item_gre),
452 		.convert = mlx5_flow_create_gre,
453 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
454 		.dst_sz = sizeof(struct ibv_flow_spec_gre),
455 #else
456 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
457 #endif
458 	},
459 	[RTE_FLOW_ITEM_TYPE_MPLS] = {
460 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
461 			       RTE_FLOW_ITEM_TYPE_IPV4,
462 			       RTE_FLOW_ITEM_TYPE_IPV6),
463 		.actions = valid_actions,
464 		.mask = &(const struct rte_flow_item_mpls){
465 			.label_tc_s = "\xff\xff\xf0",
466 		},
467 		.default_mask = &rte_flow_item_mpls_mask,
468 		.mask_sz = sizeof(struct rte_flow_item_mpls),
469 		.convert = mlx5_flow_create_mpls,
470 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
471 		.dst_sz = sizeof(struct ibv_flow_spec_mpls),
472 #endif
473 	},
474 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
475 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
476 			       RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
477 			       RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
478 		.actions = valid_actions,
479 		.mask = &(const struct rte_flow_item_vxlan){
480 			.vni = "\xff\xff\xff",
481 		},
482 		.default_mask = &rte_flow_item_vxlan_mask,
483 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
484 		.convert = mlx5_flow_create_vxlan,
485 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
486 	},
487 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
488 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
489 			       RTE_FLOW_ITEM_TYPE_IPV4,
490 			       RTE_FLOW_ITEM_TYPE_IPV6),
491 		.actions = valid_actions,
492 		.mask = &(const struct rte_flow_item_vxlan_gpe){
493 			.vni = "\xff\xff\xff",
494 		},
495 		.default_mask = &rte_flow_item_vxlan_gpe_mask,
496 		.mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
497 		.convert = mlx5_flow_create_vxlan_gpe,
498 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
499 	},
500 };
501 
502 /** Structure to pass to the conversion function. */
503 struct mlx5_flow_parse {
504 	uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
505 	uint32_t create:1;
506 	/**< Whether resources should remain after a validate. */
507 	uint32_t drop:1; /**< Target is a drop queue. */
508 	uint32_t mark:1; /**< Mark is present in the flow. */
509 	uint32_t count:1; /**< Count is present in the flow. */
510 	uint32_t mark_id; /**< Mark identifier. */
511 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
512 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
513 	uint8_t rss_key[40]; /**< copy of the RSS key. */
514 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
515 	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
516 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
517 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
518 	struct {
519 		struct ibv_flow_attr *ibv_attr;
520 		/**< Pointer to Verbs attributes. */
521 		unsigned int offset;
522 		/**< Current position or total size of the attribute. */
523 		uint64_t hash_fields; /**< Verbs hash fields. */
524 	} queue[RTE_DIM(hash_rxq_init)];
525 };
526 
527 static const struct rte_flow_ops mlx5_flow_ops = {
528 	.validate = mlx5_flow_validate,
529 	.create = mlx5_flow_create,
530 	.destroy = mlx5_flow_destroy,
531 	.flush = mlx5_flow_flush,
532 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
533 	.query = mlx5_flow_query,
534 #else
535 	.query = NULL,
536 #endif
537 	.isolate = mlx5_flow_isolate,
538 };
539 
540 /* Convert FDIR request to Generic flow. */
541 struct mlx5_fdir {
542 	struct rte_flow_attr attr;
543 	struct rte_flow_action actions[2];
544 	struct rte_flow_item items[4];
545 	struct rte_flow_item_eth l2;
546 	struct rte_flow_item_eth l2_mask;
547 	union {
548 		struct rte_flow_item_ipv4 ipv4;
549 		struct rte_flow_item_ipv6 ipv6;
550 	} l3;
551 	union {
552 		struct rte_flow_item_ipv4 ipv4;
553 		struct rte_flow_item_ipv6 ipv6;
554 	} l3_mask;
555 	union {
556 		struct rte_flow_item_udp udp;
557 		struct rte_flow_item_tcp tcp;
558 	} l4;
559 	union {
560 		struct rte_flow_item_udp udp;
561 		struct rte_flow_item_tcp tcp;
562 	} l4_mask;
563 	struct rte_flow_action_queue queue;
564 };
565 
566 /* Verbs specification header. */
567 struct ibv_spec_header {
568 	enum ibv_flow_spec_type type;
569 	uint16_t size;
570 };
571 
572 /**
573  * Check item is fully supported by the NIC matching capability.
574  *
575  * @param item[in]
576  *   Item specification.
577  * @param mask[in]
578  *   Bit-masks covering supported fields to compare with spec, last and mask in
579  *   \item.
580  * @param size
581  *   Bit-Mask size in bytes.
582  *
583  * @return
584  *   0 on success, a negative errno value otherwise and rte_errno is set.
585  */
586 static int
587 mlx5_flow_item_validate(const struct rte_flow_item *item,
588 			const uint8_t *mask, unsigned int size)
589 {
590 	unsigned int i;
591 	const uint8_t *spec = item->spec;
592 	const uint8_t *last = item->last;
593 	const uint8_t *m = item->mask ? item->mask : mask;
594 
595 	if (!spec && (item->mask || last))
596 		goto error;
597 	if (!spec)
598 		return 0;
599 	/*
600 	 * Single-pass check to make sure that:
601 	 * - item->mask is supported, no bits are set outside mask.
602 	 * - Both masked item->spec and item->last are equal (no range
603 	 *   supported).
604 	 */
605 	for (i = 0; i < size; i++) {
606 		if (!m[i])
607 			continue;
608 		if ((m[i] | mask[i]) != mask[i])
609 			goto error;
610 		if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
611 			goto error;
612 	}
613 	return 0;
614 error:
615 	rte_errno = ENOTSUP;
616 	return -rte_errno;
617 }
618 
619 /**
620  * Extract attribute to the parser.
621  *
622  * @param[in] attr
623  *   Flow rule attributes.
624  * @param[out] error
625  *   Perform verbose error reporting if not NULL.
626  *
627  * @return
628  *   0 on success, a negative errno value otherwise and rte_errno is set.
629  */
630 static int
631 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
632 			     struct rte_flow_error *error)
633 {
634 	if (attr->group) {
635 		rte_flow_error_set(error, ENOTSUP,
636 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
637 				   NULL,
638 				   "groups are not supported");
639 		return -rte_errno;
640 	}
641 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
642 		rte_flow_error_set(error, ENOTSUP,
643 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
644 				   NULL,
645 				   "priorities are not supported");
646 		return -rte_errno;
647 	}
648 	if (attr->egress) {
649 		rte_flow_error_set(error, ENOTSUP,
650 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
651 				   NULL,
652 				   "egress is not supported");
653 		return -rte_errno;
654 	}
655 	if (attr->transfer) {
656 		rte_flow_error_set(error, ENOTSUP,
657 				   RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
658 				   NULL,
659 				   "transfer is not supported");
660 		return -rte_errno;
661 	}
662 	if (!attr->ingress) {
663 		rte_flow_error_set(error, ENOTSUP,
664 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
665 				   NULL,
666 				   "only ingress is supported");
667 		return -rte_errno;
668 	}
669 	return 0;
670 }
671 
672 /**
673  * Extract actions request to the parser.
674  *
675  * @param dev
676  *   Pointer to Ethernet device.
677  * @param[in] actions
678  *   Associated actions (list terminated by the END action).
679  * @param[out] error
680  *   Perform verbose error reporting if not NULL.
681  * @param[in, out] parser
682  *   Internal parser structure.
683  *
684  * @return
685  *   0 on success, a negative errno value otherwise and rte_errno is set.
686  */
687 static int
688 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
689 			  const struct rte_flow_action actions[],
690 			  struct rte_flow_error *error,
691 			  struct mlx5_flow_parse *parser)
692 {
693 	enum { FATE = 1, MARK = 2, COUNT = 4, };
694 	uint32_t overlap = 0;
695 	struct priv *priv = dev->data->dev_private;
696 
697 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
698 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
699 			continue;
700 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
701 			if (overlap & FATE)
702 				goto exit_action_overlap;
703 			overlap |= FATE;
704 			parser->drop = 1;
705 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
706 			const struct rte_flow_action_queue *queue =
707 				(const struct rte_flow_action_queue *)
708 				actions->conf;
709 
710 			if (overlap & FATE)
711 				goto exit_action_overlap;
712 			overlap |= FATE;
713 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
714 				goto exit_action_not_supported;
715 			parser->queues[0] = queue->index;
716 			parser->rss_conf = (struct rte_flow_action_rss){
717 				.queue_num = 1,
718 				.queue = parser->queues,
719 			};
720 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
721 			const struct rte_flow_action_rss *rss =
722 				(const struct rte_flow_action_rss *)
723 				actions->conf;
724 			const uint8_t *rss_key;
725 			uint32_t rss_key_len;
726 			uint16_t n;
727 
728 			if (overlap & FATE)
729 				goto exit_action_overlap;
730 			overlap |= FATE;
731 			if (rss->func &&
732 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
733 				rte_flow_error_set(error, EINVAL,
734 						   RTE_FLOW_ERROR_TYPE_ACTION,
735 						   actions,
736 						   "the only supported RSS hash"
737 						   " function is Toeplitz");
738 				return -rte_errno;
739 			}
740 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
741 			if (parser->rss_conf.level > 1) {
742 				rte_flow_error_set(error, EINVAL,
743 						   RTE_FLOW_ERROR_TYPE_ACTION,
744 						   actions,
745 						   "a nonzero RSS encapsulation"
746 						   " level is not supported");
747 				return -rte_errno;
748 			}
749 #endif
750 			if (parser->rss_conf.level > 2) {
751 				rte_flow_error_set(error, EINVAL,
752 						   RTE_FLOW_ERROR_TYPE_ACTION,
753 						   actions,
754 						   "RSS encapsulation level"
755 						   " > 1 is not supported");
756 				return -rte_errno;
757 			}
758 			if (rss->types & MLX5_RSS_HF_MASK) {
759 				rte_flow_error_set(error, EINVAL,
760 						   RTE_FLOW_ERROR_TYPE_ACTION,
761 						   actions,
762 						   "unsupported RSS type"
763 						   " requested");
764 				return -rte_errno;
765 			}
766 			if (rss->key_len) {
767 				rss_key_len = rss->key_len;
768 				rss_key = rss->key;
769 			} else {
770 				rss_key_len = rss_hash_default_key_len;
771 				rss_key = rss_hash_default_key;
772 			}
773 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
774 				rte_flow_error_set(error, EINVAL,
775 						   RTE_FLOW_ERROR_TYPE_ACTION,
776 						   actions,
777 						   "RSS hash key must be"
778 						   " exactly 40 bytes long");
779 				return -rte_errno;
780 			}
781 			if (!rss->queue_num) {
782 				rte_flow_error_set(error, EINVAL,
783 						   RTE_FLOW_ERROR_TYPE_ACTION,
784 						   actions,
785 						   "no valid queues");
786 				return -rte_errno;
787 			}
788 			if (rss->queue_num > RTE_DIM(parser->queues)) {
789 				rte_flow_error_set(error, EINVAL,
790 						   RTE_FLOW_ERROR_TYPE_ACTION,
791 						   actions,
792 						   "too many queues for RSS"
793 						   " context");
794 				return -rte_errno;
795 			}
796 			for (n = 0; n < rss->queue_num; ++n) {
797 				if (rss->queue[n] >= priv->rxqs_n) {
798 					rte_flow_error_set(error, EINVAL,
799 						   RTE_FLOW_ERROR_TYPE_ACTION,
800 						   actions,
801 						   "queue id > number of"
802 						   " queues");
803 					return -rte_errno;
804 				}
805 			}
806 			parser->rss_conf = (struct rte_flow_action_rss){
807 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
808 				.level = rss->level,
809 				.types = rss->types,
810 				.key_len = rss_key_len,
811 				.queue_num = rss->queue_num,
812 				.key = memcpy(parser->rss_key, rss_key,
813 					      sizeof(*rss_key) * rss_key_len),
814 				.queue = memcpy(parser->queues, rss->queue,
815 						sizeof(*rss->queue) *
816 						rss->queue_num),
817 			};
818 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
819 			const struct rte_flow_action_mark *mark =
820 				(const struct rte_flow_action_mark *)
821 				actions->conf;
822 
823 			if (overlap & MARK)
824 				goto exit_action_overlap;
825 			overlap |= MARK;
826 			if (!mark) {
827 				rte_flow_error_set(error, EINVAL,
828 						   RTE_FLOW_ERROR_TYPE_ACTION,
829 						   actions,
830 						   "mark must be defined");
831 				return -rte_errno;
832 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
833 				rte_flow_error_set(error, ENOTSUP,
834 						   RTE_FLOW_ERROR_TYPE_ACTION,
835 						   actions,
836 						   "mark must be between 0"
837 						   " and 16777199");
838 				return -rte_errno;
839 			}
840 			parser->mark = 1;
841 			parser->mark_id = mark->id;
842 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
843 			if (overlap & MARK)
844 				goto exit_action_overlap;
845 			overlap |= MARK;
846 			parser->mark = 1;
847 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
848 			   priv->config.flow_counter_en) {
849 			if (overlap & COUNT)
850 				goto exit_action_overlap;
851 			overlap |= COUNT;
852 			parser->count = 1;
853 		} else {
854 			goto exit_action_not_supported;
855 		}
856 	}
857 	/* When fate is unknown, drop traffic. */
858 	if (!(overlap & FATE))
859 		parser->drop = 1;
860 	if (parser->drop && parser->mark)
861 		parser->mark = 0;
862 	if (!parser->rss_conf.queue_num && !parser->drop) {
863 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
864 				   NULL, "no valid action");
865 		return -rte_errno;
866 	}
867 	return 0;
868 exit_action_not_supported:
869 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
870 			   actions, "action not supported");
871 	return -rte_errno;
872 exit_action_overlap:
873 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
874 			   actions, "overlapping actions are not supported");
875 	return -rte_errno;
876 }
877 
878 /**
879  * Validate items.
880  *
881  * @param[in] items
882  *   Pattern specification (list terminated by the END pattern item).
883  * @param[out] error
884  *   Perform verbose error reporting if not NULL.
885  * @param[in, out] parser
886  *   Internal parser structure.
887  *
888  * @return
889  *   0 on success, a negative errno value otherwise and rte_errno is set.
890  */
891 static int
892 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
893 				 const struct rte_flow_item items[],
894 				 struct rte_flow_error *error,
895 				 struct mlx5_flow_parse *parser)
896 {
897 	struct priv *priv = dev->data->dev_private;
898 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
899 	unsigned int i;
900 	unsigned int last_voids = 0;
901 	int ret = 0;
902 
903 	/* Initialise the offsets to start after verbs attribute. */
904 	for (i = 0; i != hash_rxq_init_n; ++i)
905 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
906 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
907 		const struct mlx5_flow_items *token = NULL;
908 		unsigned int n;
909 
910 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
911 			last_voids++;
912 			continue;
913 		}
914 		for (i = 0;
915 		     cur_item->items &&
916 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
917 		     ++i) {
918 			if (cur_item->items[i] == items->type) {
919 				token = &mlx5_flow_items[items->type];
920 				break;
921 			}
922 		}
923 		if (!token) {
924 			ret = -ENOTSUP;
925 			goto exit_item_not_supported;
926 		}
927 		cur_item = token;
928 		ret = mlx5_flow_item_validate(items,
929 					      (const uint8_t *)cur_item->mask,
930 					      cur_item->mask_sz);
931 		if (ret)
932 			goto exit_item_not_supported;
933 		if (IS_TUNNEL(items->type)) {
934 			if (parser->tunnel &&
935 			    !((items - last_voids - 1)->type ==
936 			      RTE_FLOW_ITEM_TYPE_GRE && items->type ==
937 			      RTE_FLOW_ITEM_TYPE_MPLS)) {
938 				rte_flow_error_set(error, ENOTSUP,
939 						   RTE_FLOW_ERROR_TYPE_ITEM,
940 						   items,
941 						   "Cannot recognize multiple"
942 						   " tunnel encapsulations.");
943 				return -rte_errno;
944 			}
945 			if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
946 			    !priv->config.mpls_en) {
947 				rte_flow_error_set(error, ENOTSUP,
948 						   RTE_FLOW_ERROR_TYPE_ITEM,
949 						   items,
950 						   "MPLS not supported or"
951 						   " disabled in firmware"
952 						   " configuration.");
953 				return -rte_errno;
954 			}
955 			if (!priv->config.tunnel_en &&
956 			    parser->rss_conf.level > 1) {
957 				rte_flow_error_set(error, ENOTSUP,
958 					RTE_FLOW_ERROR_TYPE_ITEM,
959 					items,
960 					"RSS on tunnel is not supported");
961 				return -rte_errno;
962 			}
963 			parser->inner = IBV_FLOW_SPEC_INNER;
964 			parser->tunnel = flow_ptype[items->type];
965 		}
966 		if (parser->drop) {
967 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
968 		} else {
969 			for (n = 0; n != hash_rxq_init_n; ++n)
970 				parser->queue[n].offset += cur_item->dst_sz;
971 		}
972 		last_voids = 0;
973 	}
974 	if (parser->drop) {
975 		parser->queue[HASH_RXQ_ETH].offset +=
976 			sizeof(struct ibv_flow_spec_action_drop);
977 	}
978 	if (parser->mark) {
979 		for (i = 0; i != hash_rxq_init_n; ++i)
980 			parser->queue[i].offset +=
981 				sizeof(struct ibv_flow_spec_action_tag);
982 	}
983 	if (parser->count) {
984 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
985 
986 		for (i = 0; i != hash_rxq_init_n; ++i)
987 			parser->queue[i].offset += size;
988 	}
989 	return 0;
990 exit_item_not_supported:
991 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
992 				  items, "item not supported");
993 }
994 
995 /**
996  * Allocate memory space to store verbs flow attributes.
997  *
998  * @param[in] size
999  *   Amount of byte to allocate.
1000  * @param[out] error
1001  *   Perform verbose error reporting if not NULL.
1002  *
1003  * @return
1004  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
1005  */
1006 static struct ibv_flow_attr *
1007 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
1008 {
1009 	struct ibv_flow_attr *ibv_attr;
1010 
1011 	ibv_attr = rte_calloc(__func__, 1, size, 0);
1012 	if (!ibv_attr) {
1013 		rte_flow_error_set(error, ENOMEM,
1014 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1015 				   NULL,
1016 				   "cannot allocate verbs spec attributes");
1017 		return NULL;
1018 	}
1019 	return ibv_attr;
1020 }
1021 
1022 /**
1023  * Make inner packet matching with an higher priority from the non Inner
1024  * matching.
1025  *
1026  * @param dev
1027  *   Pointer to Ethernet device.
1028  * @param[in, out] parser
1029  *   Internal parser structure.
1030  * @param attr
1031  *   User flow attribute.
1032  */
1033 static void
1034 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1035 			  struct mlx5_flow_parse *parser,
1036 			  const struct rte_flow_attr *attr)
1037 {
1038 	struct priv *priv = dev->data->dev_private;
1039 	unsigned int i;
1040 	uint16_t priority;
1041 
1042 	/*			8 priorities	>= 16 priorities
1043 	 * Control flow:	4-7		8-15
1044 	 * User normal flow:	1-3		4-7
1045 	 * User tunnel flow:	0-2		0-3
1046 	 */
1047 	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1048 	if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1049 		priority /= 2;
1050 	/*
1051 	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1052 	 * priorities, lower 4 otherwise.
1053 	 */
1054 	if (!parser->inner) {
1055 		if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1056 			priority += 1;
1057 		else
1058 			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1059 	}
1060 	if (parser->drop) {
1061 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1062 				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1063 		return;
1064 	}
1065 	for (i = 0; i != hash_rxq_init_n; ++i) {
1066 		if (!parser->queue[i].ibv_attr)
1067 			continue;
1068 		parser->queue[i].ibv_attr->priority = priority +
1069 				hash_rxq_init[i].flow_priority;
1070 	}
1071 }
1072 
1073 /**
1074  * Finalise verbs flow attributes.
1075  *
1076  * @param[in, out] parser
1077  *   Internal parser structure.
1078  */
1079 static void
1080 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1081 {
1082 	unsigned int i;
1083 	uint32_t inner = parser->inner;
1084 
1085 	/* Don't create extra flows for outer RSS. */
1086 	if (parser->tunnel && parser->rss_conf.level < 2)
1087 		return;
1088 	/*
1089 	 * Fill missing layers in verbs specifications, or compute the correct
1090 	 * offset to allocate the memory space for the attributes and
1091 	 * specifications.
1092 	 */
1093 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1094 		union {
1095 			struct ibv_flow_spec_ipv4_ext ipv4;
1096 			struct ibv_flow_spec_ipv6 ipv6;
1097 			struct ibv_flow_spec_tcp_udp udp_tcp;
1098 			struct ibv_flow_spec_eth eth;
1099 		} specs;
1100 		void *dst;
1101 		uint16_t size;
1102 
1103 		if (i == parser->layer)
1104 			continue;
1105 		if (parser->layer == HASH_RXQ_ETH ||
1106 		    parser->layer == HASH_RXQ_TUNNEL) {
1107 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1108 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1109 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1110 					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1111 					.size = size,
1112 				};
1113 			} else {
1114 				size = sizeof(struct ibv_flow_spec_ipv6);
1115 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1116 					.type = inner | IBV_FLOW_SPEC_IPV6,
1117 					.size = size,
1118 				};
1119 			}
1120 			if (parser->queue[i].ibv_attr) {
1121 				dst = (void *)((uintptr_t)
1122 					       parser->queue[i].ibv_attr +
1123 					       parser->queue[i].offset);
1124 				memcpy(dst, &specs, size);
1125 				++parser->queue[i].ibv_attr->num_of_specs;
1126 			}
1127 			parser->queue[i].offset += size;
1128 		}
1129 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1130 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1131 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1132 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1133 				.type = inner | ((i == HASH_RXQ_UDPV4 ||
1134 					  i == HASH_RXQ_UDPV6) ?
1135 					 IBV_FLOW_SPEC_UDP :
1136 					 IBV_FLOW_SPEC_TCP),
1137 				.size = size,
1138 			};
1139 			if (parser->queue[i].ibv_attr) {
1140 				dst = (void *)((uintptr_t)
1141 					       parser->queue[i].ibv_attr +
1142 					       parser->queue[i].offset);
1143 				memcpy(dst, &specs, size);
1144 				++parser->queue[i].ibv_attr->num_of_specs;
1145 			}
1146 			parser->queue[i].offset += size;
1147 		}
1148 	}
1149 }
1150 
1151 /**
1152  * Update flows according to pattern and RSS hash fields.
1153  *
1154  * @param[in, out] parser
1155  *   Internal parser structure.
1156  *
1157  * @return
1158  *   0 on success, a negative errno value otherwise and rte_errno is set.
1159  */
1160 static int
1161 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1162 {
1163 	unsigned int i;
1164 	enum hash_rxq_type start;
1165 	enum hash_rxq_type layer;
1166 	int outer = parser->tunnel && parser->rss_conf.level < 2;
1167 	uint64_t rss = parser->rss_conf.types;
1168 
1169 	/* Default to outer RSS. */
1170 	if (!parser->rss_conf.level)
1171 		parser->rss_conf.level = 1;
1172 	layer = outer ? parser->out_layer : parser->layer;
1173 	if (layer == HASH_RXQ_TUNNEL)
1174 		layer = HASH_RXQ_ETH;
1175 	if (outer) {
1176 		/* Only one hash type for outer RSS. */
1177 		if (rss && layer == HASH_RXQ_ETH) {
1178 			start = HASH_RXQ_TCPV4;
1179 		} else if (rss && layer != HASH_RXQ_ETH &&
1180 			   !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1181 			/* If RSS not match L4 pattern, try L3 RSS. */
1182 			if (layer < HASH_RXQ_IPV4)
1183 				layer = HASH_RXQ_IPV4;
1184 			else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1185 				layer = HASH_RXQ_IPV6;
1186 			start = layer;
1187 		} else {
1188 			start = layer;
1189 		}
1190 		/* Scan first valid hash type. */
1191 		for (i = start; rss && i <= layer; ++i) {
1192 			if (!parser->queue[i].ibv_attr)
1193 				continue;
1194 			if (hash_rxq_init[i].dpdk_rss_hf & rss)
1195 				break;
1196 		}
1197 		if (rss && i <= layer)
1198 			parser->queue[layer].hash_fields =
1199 					hash_rxq_init[i].hash_fields;
1200 		/* Trim unused hash types. */
1201 		for (i = 0; i != hash_rxq_init_n; ++i) {
1202 			if (parser->queue[i].ibv_attr && i != layer) {
1203 				rte_free(parser->queue[i].ibv_attr);
1204 				parser->queue[i].ibv_attr = NULL;
1205 			}
1206 		}
1207 	} else {
1208 		/* Expand for inner or normal RSS. */
1209 		if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1210 			start = HASH_RXQ_TCPV4;
1211 		else if (rss && layer == HASH_RXQ_IPV6)
1212 			start = HASH_RXQ_TCPV6;
1213 		else
1214 			start = layer;
1215 		/* For L4 pattern, try L3 RSS if no L4 RSS. */
1216 		/* Trim unused hash types. */
1217 		for (i = 0; i != hash_rxq_init_n; ++i) {
1218 			if (!parser->queue[i].ibv_attr)
1219 				continue;
1220 			if (i < start || i > layer) {
1221 				rte_free(parser->queue[i].ibv_attr);
1222 				parser->queue[i].ibv_attr = NULL;
1223 				continue;
1224 			}
1225 			if (!rss)
1226 				continue;
1227 			if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1228 				parser->queue[i].hash_fields =
1229 						hash_rxq_init[i].hash_fields;
1230 			} else if (i != layer) {
1231 				/* Remove unused RSS expansion. */
1232 				rte_free(parser->queue[i].ibv_attr);
1233 				parser->queue[i].ibv_attr = NULL;
1234 			} else if (layer < HASH_RXQ_IPV4 &&
1235 				   (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1236 				    rss)) {
1237 				/* Allow IPv4 RSS on L4 pattern. */
1238 				parser->queue[i].hash_fields =
1239 					hash_rxq_init[HASH_RXQ_IPV4]
1240 						.hash_fields;
1241 			} else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1242 				   (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1243 				    rss)) {
1244 				/* Allow IPv4 RSS on L4 pattern. */
1245 				parser->queue[i].hash_fields =
1246 					hash_rxq_init[HASH_RXQ_IPV6]
1247 						.hash_fields;
1248 			}
1249 		}
1250 	}
1251 	return 0;
1252 }
1253 
1254 /**
1255  * Validate and convert a flow supported by the NIC.
1256  *
1257  * @param dev
1258  *   Pointer to Ethernet device.
1259  * @param[in] attr
1260  *   Flow rule attributes.
1261  * @param[in] pattern
1262  *   Pattern specification (list terminated by the END pattern item).
1263  * @param[in] actions
1264  *   Associated actions (list terminated by the END action).
1265  * @param[out] error
1266  *   Perform verbose error reporting if not NULL.
1267  * @param[in, out] parser
1268  *   Internal parser structure.
1269  *
1270  * @return
1271  *   0 on success, a negative errno value otherwise and rte_errno is set.
1272  */
1273 static int
1274 mlx5_flow_convert(struct rte_eth_dev *dev,
1275 		  const struct rte_flow_attr *attr,
1276 		  const struct rte_flow_item items[],
1277 		  const struct rte_flow_action actions[],
1278 		  struct rte_flow_error *error,
1279 		  struct mlx5_flow_parse *parser)
1280 {
1281 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1282 	unsigned int i;
1283 	int ret;
1284 
1285 	/* First step. Validate the attributes, items and actions. */
1286 	*parser = (struct mlx5_flow_parse){
1287 		.create = parser->create,
1288 		.layer = HASH_RXQ_ETH,
1289 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1290 	};
1291 	ret = mlx5_flow_convert_attributes(attr, error);
1292 	if (ret)
1293 		return ret;
1294 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1295 	if (ret)
1296 		return ret;
1297 	ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1298 	if (ret)
1299 		return ret;
1300 	mlx5_flow_convert_finalise(parser);
1301 	/*
1302 	 * Second step.
1303 	 * Allocate the memory space to store verbs specifications.
1304 	 */
1305 	if (parser->drop) {
1306 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1307 
1308 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1309 			mlx5_flow_convert_allocate(offset, error);
1310 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1311 			goto exit_enomem;
1312 		parser->queue[HASH_RXQ_ETH].offset =
1313 			sizeof(struct ibv_flow_attr);
1314 	} else {
1315 		for (i = 0; i != hash_rxq_init_n; ++i) {
1316 			unsigned int offset;
1317 
1318 			offset = parser->queue[i].offset;
1319 			parser->queue[i].ibv_attr =
1320 				mlx5_flow_convert_allocate(offset, error);
1321 			if (!parser->queue[i].ibv_attr)
1322 				goto exit_enomem;
1323 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1324 		}
1325 	}
1326 	/* Third step. Conversion parse, fill the specifications. */
1327 	parser->inner = 0;
1328 	parser->tunnel = 0;
1329 	parser->layer = HASH_RXQ_ETH;
1330 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1331 		struct mlx5_flow_data data = {
1332 			.dev = dev,
1333 			.parser = parser,
1334 			.error = error,
1335 		};
1336 
1337 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1338 			continue;
1339 		cur_item = &mlx5_flow_items[items->type];
1340 		ret = cur_item->convert(items,
1341 					(cur_item->default_mask ?
1342 					 cur_item->default_mask :
1343 					 cur_item->mask),
1344 					 &data);
1345 		if (ret)
1346 			goto exit_free;
1347 	}
1348 	if (!parser->drop) {
1349 		/* RSS check, remove unused hash types. */
1350 		ret = mlx5_flow_convert_rss(parser);
1351 		if (ret)
1352 			goto exit_free;
1353 		/* Complete missing specification. */
1354 		mlx5_flow_convert_finalise(parser);
1355 	}
1356 	mlx5_flow_update_priority(dev, parser, attr);
1357 	if (parser->mark)
1358 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1359 	if (parser->count && parser->create) {
1360 		mlx5_flow_create_count(dev, parser);
1361 		if (!parser->cs)
1362 			goto exit_count_error;
1363 	}
1364 exit_free:
1365 	/* Only verification is expected, all resources should be released. */
1366 	if (!parser->create) {
1367 		for (i = 0; i != hash_rxq_init_n; ++i) {
1368 			if (parser->queue[i].ibv_attr) {
1369 				rte_free(parser->queue[i].ibv_attr);
1370 				parser->queue[i].ibv_attr = NULL;
1371 			}
1372 		}
1373 	}
1374 	return ret;
1375 exit_enomem:
1376 	for (i = 0; i != hash_rxq_init_n; ++i) {
1377 		if (parser->queue[i].ibv_attr) {
1378 			rte_free(parser->queue[i].ibv_attr);
1379 			parser->queue[i].ibv_attr = NULL;
1380 		}
1381 	}
1382 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1383 			   NULL, "cannot allocate verbs spec attributes");
1384 	return -rte_errno;
1385 exit_count_error:
1386 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1387 			   NULL, "cannot create counter");
1388 	return -rte_errno;
1389 }
1390 
1391 /**
1392  * Copy the specification created into the flow.
1393  *
1394  * @param parser
1395  *   Internal parser structure.
1396  * @param src
1397  *   Create specification.
1398  * @param size
1399  *   Size in bytes of the specification to copy.
1400  */
1401 static void
1402 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1403 		      unsigned int size)
1404 {
1405 	unsigned int i;
1406 	void *dst;
1407 
1408 	for (i = 0; i != hash_rxq_init_n; ++i) {
1409 		if (!parser->queue[i].ibv_attr)
1410 			continue;
1411 		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1412 				parser->queue[i].offset);
1413 		memcpy(dst, src, size);
1414 		++parser->queue[i].ibv_attr->num_of_specs;
1415 		parser->queue[i].offset += size;
1416 	}
1417 }
1418 
1419 /**
1420  * Convert Ethernet item to Verbs specification.
1421  *
1422  * @param item[in]
1423  *   Item specification.
1424  * @param default_mask[in]
1425  *   Default bit-masks to use when item->mask is not provided.
1426  * @param data[in, out]
1427  *   User structure.
1428  *
1429  * @return
1430  *   0 on success, a negative errno value otherwise and rte_errno is set.
1431  */
1432 static int
1433 mlx5_flow_create_eth(const struct rte_flow_item *item,
1434 		     const void *default_mask,
1435 		     struct mlx5_flow_data *data)
1436 {
1437 	const struct rte_flow_item_eth *spec = item->spec;
1438 	const struct rte_flow_item_eth *mask = item->mask;
1439 	struct mlx5_flow_parse *parser = data->parser;
1440 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1441 	struct ibv_flow_spec_eth eth = {
1442 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1443 		.size = eth_size,
1444 	};
1445 
1446 	parser->layer = HASH_RXQ_ETH;
1447 	if (spec) {
1448 		unsigned int i;
1449 
1450 		if (!mask)
1451 			mask = default_mask;
1452 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1453 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1454 		eth.val.ether_type = spec->type;
1455 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1456 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1457 		eth.mask.ether_type = mask->type;
1458 		/* Remove unwanted bits from values. */
1459 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1460 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1461 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1462 		}
1463 		eth.val.ether_type &= eth.mask.ether_type;
1464 	}
1465 	mlx5_flow_create_copy(parser, &eth, eth_size);
1466 	return 0;
1467 }
1468 
1469 /**
1470  * Convert VLAN item to Verbs specification.
1471  *
1472  * @param item[in]
1473  *   Item specification.
1474  * @param default_mask[in]
1475  *   Default bit-masks to use when item->mask is not provided.
1476  * @param data[in, out]
1477  *   User structure.
1478  *
1479  * @return
1480  *   0 on success, a negative errno value otherwise and rte_errno is set.
1481  */
1482 static int
1483 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1484 		      const void *default_mask,
1485 		      struct mlx5_flow_data *data)
1486 {
1487 	const struct rte_flow_item_vlan *spec = item->spec;
1488 	const struct rte_flow_item_vlan *mask = item->mask;
1489 	struct mlx5_flow_parse *parser = data->parser;
1490 	struct ibv_flow_spec_eth *eth;
1491 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1492 	const char *msg = "VLAN cannot be empty";
1493 
1494 	if (spec) {
1495 		unsigned int i;
1496 		if (!mask)
1497 			mask = default_mask;
1498 
1499 		for (i = 0; i != hash_rxq_init_n; ++i) {
1500 			if (!parser->queue[i].ibv_attr)
1501 				continue;
1502 
1503 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1504 				       parser->queue[i].offset - eth_size);
1505 			eth->val.vlan_tag = spec->tci;
1506 			eth->mask.vlan_tag = mask->tci;
1507 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1508 			/*
1509 			 * From verbs perspective an empty VLAN is equivalent
1510 			 * to a packet without VLAN layer.
1511 			 */
1512 			if (!eth->mask.vlan_tag)
1513 				goto error;
1514 			/* Outer TPID cannot be matched. */
1515 			if (eth->mask.ether_type) {
1516 				msg = "VLAN TPID matching is not supported";
1517 				goto error;
1518 			}
1519 			eth->val.ether_type = spec->inner_type;
1520 			eth->mask.ether_type = mask->inner_type;
1521 			eth->val.ether_type &= eth->mask.ether_type;
1522 		}
1523 		return 0;
1524 	}
1525 error:
1526 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1527 				  item, msg);
1528 }
1529 
1530 /**
1531  * Convert IPv4 item to Verbs specification.
1532  *
1533  * @param item[in]
1534  *   Item specification.
1535  * @param default_mask[in]
1536  *   Default bit-masks to use when item->mask is not provided.
1537  * @param data[in, out]
1538  *   User structure.
1539  *
1540  * @return
1541  *   0 on success, a negative errno value otherwise and rte_errno is set.
1542  */
1543 static int
1544 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1545 		      const void *default_mask,
1546 		      struct mlx5_flow_data *data)
1547 {
1548 	struct priv *priv = data->dev->data->dev_private;
1549 	const struct rte_flow_item_ipv4 *spec = item->spec;
1550 	const struct rte_flow_item_ipv4 *mask = item->mask;
1551 	struct mlx5_flow_parse *parser = data->parser;
1552 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1553 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1554 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1555 		.size = ipv4_size,
1556 	};
1557 
1558 	if (parser->layer == HASH_RXQ_TUNNEL &&
1559 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1560 	    !priv->config.l3_vxlan_en)
1561 		return rte_flow_error_set(data->error, EINVAL,
1562 					  RTE_FLOW_ERROR_TYPE_ITEM,
1563 					  item,
1564 					  "L3 VXLAN not enabled by device"
1565 					  " parameter and/or not configured"
1566 					  " in firmware");
1567 	parser->layer = HASH_RXQ_IPV4;
1568 	if (spec) {
1569 		if (!mask)
1570 			mask = default_mask;
1571 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1572 			.src_ip = spec->hdr.src_addr,
1573 			.dst_ip = spec->hdr.dst_addr,
1574 			.proto = spec->hdr.next_proto_id,
1575 			.tos = spec->hdr.type_of_service,
1576 		};
1577 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1578 			.src_ip = mask->hdr.src_addr,
1579 			.dst_ip = mask->hdr.dst_addr,
1580 			.proto = mask->hdr.next_proto_id,
1581 			.tos = mask->hdr.type_of_service,
1582 		};
1583 		/* Remove unwanted bits from values. */
1584 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1585 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1586 		ipv4.val.proto &= ipv4.mask.proto;
1587 		ipv4.val.tos &= ipv4.mask.tos;
1588 	}
1589 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1590 	return 0;
1591 }
1592 
1593 /**
1594  * Convert IPv6 item to Verbs specification.
1595  *
1596  * @param item[in]
1597  *   Item specification.
1598  * @param default_mask[in]
1599  *   Default bit-masks to use when item->mask is not provided.
1600  * @param data[in, out]
1601  *   User structure.
1602  *
1603  * @return
1604  *   0 on success, a negative errno value otherwise and rte_errno is set.
1605  */
1606 static int
1607 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1608 		      const void *default_mask,
1609 		      struct mlx5_flow_data *data)
1610 {
1611 	struct priv *priv = data->dev->data->dev_private;
1612 	const struct rte_flow_item_ipv6 *spec = item->spec;
1613 	const struct rte_flow_item_ipv6 *mask = item->mask;
1614 	struct mlx5_flow_parse *parser = data->parser;
1615 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1616 	struct ibv_flow_spec_ipv6 ipv6 = {
1617 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1618 		.size = ipv6_size,
1619 	};
1620 
1621 	if (parser->layer == HASH_RXQ_TUNNEL &&
1622 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1623 	    !priv->config.l3_vxlan_en)
1624 		return rte_flow_error_set(data->error, EINVAL,
1625 					  RTE_FLOW_ERROR_TYPE_ITEM,
1626 					  item,
1627 					  "L3 VXLAN not enabled by device"
1628 					  " parameter and/or not configured"
1629 					  " in firmware");
1630 	parser->layer = HASH_RXQ_IPV6;
1631 	if (spec) {
1632 		unsigned int i;
1633 		uint32_t vtc_flow_val;
1634 		uint32_t vtc_flow_mask;
1635 
1636 		if (!mask)
1637 			mask = default_mask;
1638 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1639 		       RTE_DIM(ipv6.val.src_ip));
1640 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1641 		       RTE_DIM(ipv6.val.dst_ip));
1642 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1643 		       RTE_DIM(ipv6.mask.src_ip));
1644 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1645 		       RTE_DIM(ipv6.mask.dst_ip));
1646 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1647 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1648 		ipv6.val.flow_label =
1649 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1650 					 IPV6_HDR_FL_SHIFT);
1651 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1652 					 IPV6_HDR_TC_SHIFT;
1653 		ipv6.val.next_hdr = spec->hdr.proto;
1654 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1655 		ipv6.mask.flow_label =
1656 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1657 					 IPV6_HDR_FL_SHIFT);
1658 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1659 					  IPV6_HDR_TC_SHIFT;
1660 		ipv6.mask.next_hdr = mask->hdr.proto;
1661 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1662 		/* Remove unwanted bits from values. */
1663 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1664 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1665 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1666 		}
1667 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1668 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1669 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1670 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1671 	}
1672 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1673 	return 0;
1674 }
1675 
1676 /**
1677  * Convert UDP item to Verbs specification.
1678  *
1679  * @param item[in]
1680  *   Item specification.
1681  * @param default_mask[in]
1682  *   Default bit-masks to use when item->mask is not provided.
1683  * @param data[in, out]
1684  *   User structure.
1685  *
1686  * @return
1687  *   0 on success, a negative errno value otherwise and rte_errno is set.
1688  */
1689 static int
1690 mlx5_flow_create_udp(const struct rte_flow_item *item,
1691 		     const void *default_mask,
1692 		     struct mlx5_flow_data *data)
1693 {
1694 	const struct rte_flow_item_udp *spec = item->spec;
1695 	const struct rte_flow_item_udp *mask = item->mask;
1696 	struct mlx5_flow_parse *parser = data->parser;
1697 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1698 	struct ibv_flow_spec_tcp_udp udp = {
1699 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1700 		.size = udp_size,
1701 	};
1702 
1703 	if (parser->layer == HASH_RXQ_IPV4)
1704 		parser->layer = HASH_RXQ_UDPV4;
1705 	else
1706 		parser->layer = HASH_RXQ_UDPV6;
1707 	if (spec) {
1708 		if (!mask)
1709 			mask = default_mask;
1710 		udp.val.dst_port = spec->hdr.dst_port;
1711 		udp.val.src_port = spec->hdr.src_port;
1712 		udp.mask.dst_port = mask->hdr.dst_port;
1713 		udp.mask.src_port = mask->hdr.src_port;
1714 		/* Remove unwanted bits from values. */
1715 		udp.val.src_port &= udp.mask.src_port;
1716 		udp.val.dst_port &= udp.mask.dst_port;
1717 	}
1718 	mlx5_flow_create_copy(parser, &udp, udp_size);
1719 	return 0;
1720 }
1721 
1722 /**
1723  * Convert TCP item to Verbs specification.
1724  *
1725  * @param item[in]
1726  *   Item specification.
1727  * @param default_mask[in]
1728  *   Default bit-masks to use when item->mask is not provided.
1729  * @param data[in, out]
1730  *   User structure.
1731  *
1732  * @return
1733  *   0 on success, a negative errno value otherwise and rte_errno is set.
1734  */
1735 static int
1736 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1737 		     const void *default_mask,
1738 		     struct mlx5_flow_data *data)
1739 {
1740 	const struct rte_flow_item_tcp *spec = item->spec;
1741 	const struct rte_flow_item_tcp *mask = item->mask;
1742 	struct mlx5_flow_parse *parser = data->parser;
1743 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1744 	struct ibv_flow_spec_tcp_udp tcp = {
1745 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1746 		.size = tcp_size,
1747 	};
1748 
1749 	if (parser->layer == HASH_RXQ_IPV4)
1750 		parser->layer = HASH_RXQ_TCPV4;
1751 	else
1752 		parser->layer = HASH_RXQ_TCPV6;
1753 	if (spec) {
1754 		if (!mask)
1755 			mask = default_mask;
1756 		tcp.val.dst_port = spec->hdr.dst_port;
1757 		tcp.val.src_port = spec->hdr.src_port;
1758 		tcp.mask.dst_port = mask->hdr.dst_port;
1759 		tcp.mask.src_port = mask->hdr.src_port;
1760 		/* Remove unwanted bits from values. */
1761 		tcp.val.src_port &= tcp.mask.src_port;
1762 		tcp.val.dst_port &= tcp.mask.dst_port;
1763 	}
1764 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1765 	return 0;
1766 }
1767 
1768 /**
1769  * Convert VXLAN item to Verbs specification.
1770  *
1771  * @param item[in]
1772  *   Item specification.
1773  * @param default_mask[in]
1774  *   Default bit-masks to use when item->mask is not provided.
1775  * @param data[in, out]
1776  *   User structure.
1777  *
1778  * @return
1779  *   0 on success, a negative errno value otherwise and rte_errno is set.
1780  */
1781 static int
1782 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1783 		       const void *default_mask,
1784 		       struct mlx5_flow_data *data)
1785 {
1786 	const struct rte_flow_item_vxlan *spec = item->spec;
1787 	const struct rte_flow_item_vxlan *mask = item->mask;
1788 	struct mlx5_flow_parse *parser = data->parser;
1789 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1790 	struct ibv_flow_spec_tunnel vxlan = {
1791 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1792 		.size = size,
1793 	};
1794 	union vni {
1795 		uint32_t vlan_id;
1796 		uint8_t vni[4];
1797 	} id;
1798 
1799 	id.vni[0] = 0;
1800 	parser->inner = IBV_FLOW_SPEC_INNER;
1801 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1802 	parser->out_layer = parser->layer;
1803 	parser->layer = HASH_RXQ_TUNNEL;
1804 	/* Default VXLAN to outer RSS. */
1805 	if (!parser->rss_conf.level)
1806 		parser->rss_conf.level = 1;
1807 	if (spec) {
1808 		if (!mask)
1809 			mask = default_mask;
1810 		memcpy(&id.vni[1], spec->vni, 3);
1811 		vxlan.val.tunnel_id = id.vlan_id;
1812 		memcpy(&id.vni[1], mask->vni, 3);
1813 		vxlan.mask.tunnel_id = id.vlan_id;
1814 		/* Remove unwanted bits from values. */
1815 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1816 	}
1817 	/*
1818 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1819 	 * layer is defined in the Verbs specification it is interpreted as
1820 	 * wildcard and all packets will match this rule, if it follows a full
1821 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1822 	 * before will also match this rule.
1823 	 * To avoid such situation, VNI 0 is currently refused.
1824 	 */
1825 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1826 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1827 		return rte_flow_error_set(data->error, EINVAL,
1828 					  RTE_FLOW_ERROR_TYPE_ITEM,
1829 					  item,
1830 					  "VxLAN vni cannot be 0");
1831 	mlx5_flow_create_copy(parser, &vxlan, size);
1832 	return 0;
1833 }
1834 
1835 /**
1836  * Convert VXLAN-GPE item to Verbs specification.
1837  *
1838  * @param item[in]
1839  *   Item specification.
1840  * @param default_mask[in]
1841  *   Default bit-masks to use when item->mask is not provided.
1842  * @param data[in, out]
1843  *   User structure.
1844  *
1845  * @return
1846  *   0 on success, a negative errno value otherwise and rte_errno is set.
1847  */
1848 static int
1849 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1850 			   const void *default_mask,
1851 			   struct mlx5_flow_data *data)
1852 {
1853 	struct priv *priv = data->dev->data->dev_private;
1854 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1855 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1856 	struct mlx5_flow_parse *parser = data->parser;
1857 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1858 	struct ibv_flow_spec_tunnel vxlan = {
1859 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1860 		.size = size,
1861 	};
1862 	union vni {
1863 		uint32_t vlan_id;
1864 		uint8_t vni[4];
1865 	} id;
1866 
1867 	if (!priv->config.l3_vxlan_en)
1868 		return rte_flow_error_set(data->error, EINVAL,
1869 					  RTE_FLOW_ERROR_TYPE_ITEM,
1870 					  item,
1871 					  "L3 VXLAN not enabled by device"
1872 					  " parameter and/or not configured"
1873 					  " in firmware");
1874 	id.vni[0] = 0;
1875 	parser->inner = IBV_FLOW_SPEC_INNER;
1876 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1877 	parser->out_layer = parser->layer;
1878 	parser->layer = HASH_RXQ_TUNNEL;
1879 	/* Default VXLAN-GPE to outer RSS. */
1880 	if (!parser->rss_conf.level)
1881 		parser->rss_conf.level = 1;
1882 	if (spec) {
1883 		if (!mask)
1884 			mask = default_mask;
1885 		memcpy(&id.vni[1], spec->vni, 3);
1886 		vxlan.val.tunnel_id = id.vlan_id;
1887 		memcpy(&id.vni[1], mask->vni, 3);
1888 		vxlan.mask.tunnel_id = id.vlan_id;
1889 		if (spec->protocol)
1890 			return rte_flow_error_set(data->error, EINVAL,
1891 						  RTE_FLOW_ERROR_TYPE_ITEM,
1892 						  item,
1893 						  "VxLAN-GPE protocol not"
1894 						  " supported");
1895 		/* Remove unwanted bits from values. */
1896 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1897 	}
1898 	/*
1899 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1900 	 * layer is defined in the Verbs specification it is interpreted as
1901 	 * wildcard and all packets will match this rule, if it follows a full
1902 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1903 	 * before will also match this rule.
1904 	 * To avoid such situation, VNI 0 is currently refused.
1905 	 */
1906 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1907 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1908 		return rte_flow_error_set(data->error, EINVAL,
1909 					  RTE_FLOW_ERROR_TYPE_ITEM,
1910 					  item,
1911 					  "VxLAN-GPE vni cannot be 0");
1912 	mlx5_flow_create_copy(parser, &vxlan, size);
1913 	return 0;
1914 }
1915 
1916 /**
1917  * Convert GRE item to Verbs specification.
1918  *
1919  * @param item[in]
1920  *   Item specification.
1921  * @param default_mask[in]
1922  *   Default bit-masks to use when item->mask is not provided.
1923  * @param data[in, out]
1924  *   User structure.
1925  *
1926  * @return
1927  *   0 on success, a negative errno value otherwise and rte_errno is set.
1928  */
1929 static int
1930 mlx5_flow_create_gre(const struct rte_flow_item *item,
1931 		     const void *default_mask,
1932 		     struct mlx5_flow_data *data)
1933 {
1934 	struct mlx5_flow_parse *parser = data->parser;
1935 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
1936 	(void)default_mask;
1937 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1938 	struct ibv_flow_spec_tunnel tunnel = {
1939 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1940 		.size = size,
1941 	};
1942 #else
1943 	const struct rte_flow_item_gre *spec = item->spec;
1944 	const struct rte_flow_item_gre *mask = item->mask;
1945 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
1946 	struct ibv_flow_spec_gre tunnel = {
1947 		.type = parser->inner | IBV_FLOW_SPEC_GRE,
1948 		.size = size,
1949 	};
1950 #endif
1951 	struct ibv_flow_spec_ipv4_ext *ipv4;
1952 	struct ibv_flow_spec_ipv6 *ipv6;
1953 	unsigned int i;
1954 
1955 	parser->inner = IBV_FLOW_SPEC_INNER;
1956 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1957 	parser->out_layer = parser->layer;
1958 	parser->layer = HASH_RXQ_TUNNEL;
1959 	/* Default GRE to inner RSS. */
1960 	if (!parser->rss_conf.level)
1961 		parser->rss_conf.level = 2;
1962 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1963 	if (spec) {
1964 		if (!mask)
1965 			mask = default_mask;
1966 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1967 		tunnel.val.protocol = spec->protocol;
1968 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1969 		tunnel.mask.protocol = mask->protocol;
1970 		/* Remove unwanted bits from values. */
1971 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1972 		tunnel.val.protocol &= tunnel.mask.protocol;
1973 		tunnel.val.key &= tunnel.mask.key;
1974 	}
1975 #endif
1976 	/* Update encapsulation IP layer protocol. */
1977 	for (i = 0; i != hash_rxq_init_n; ++i) {
1978 		if (!parser->queue[i].ibv_attr)
1979 			continue;
1980 		if (parser->out_layer == HASH_RXQ_IPV4) {
1981 			ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1982 				parser->queue[i].offset -
1983 				sizeof(struct ibv_flow_spec_ipv4_ext));
1984 			if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1985 				break;
1986 			ipv4->val.proto = MLX5_GRE;
1987 			ipv4->mask.proto = 0xff;
1988 		} else if (parser->out_layer == HASH_RXQ_IPV6) {
1989 			ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1990 				parser->queue[i].offset -
1991 				sizeof(struct ibv_flow_spec_ipv6));
1992 			if (ipv6->mask.next_hdr &&
1993 			    ipv6->val.next_hdr != MLX5_GRE)
1994 				break;
1995 			ipv6->val.next_hdr = MLX5_GRE;
1996 			ipv6->mask.next_hdr = 0xff;
1997 		}
1998 	}
1999 	if (i != hash_rxq_init_n)
2000 		return rte_flow_error_set(data->error, EINVAL,
2001 					  RTE_FLOW_ERROR_TYPE_ITEM,
2002 					  item,
2003 					  "IP protocol of GRE must be 47");
2004 	mlx5_flow_create_copy(parser, &tunnel, size);
2005 	return 0;
2006 }
2007 
2008 /**
2009  * Convert MPLS item to Verbs specification.
2010  * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
2011  *
2012  * @param item[in]
2013  *   Item specification.
2014  * @param default_mask[in]
2015  *   Default bit-masks to use when item->mask is not provided.
2016  * @param data[in, out]
2017  *   User structure.
2018  *
2019  * @return
2020  *   0 on success, a negative errno value otherwise and rte_errno is set.
2021  */
2022 static int
2023 mlx5_flow_create_mpls(const struct rte_flow_item *item,
2024 		      const void *default_mask,
2025 		      struct mlx5_flow_data *data)
2026 {
2027 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2028 	(void)default_mask;
2029 	return rte_flow_error_set(data->error, ENOTSUP,
2030 				  RTE_FLOW_ERROR_TYPE_ITEM,
2031 				  item,
2032 				  "MPLS is not supported by driver");
2033 #else
2034 	const struct rte_flow_item_mpls *spec = item->spec;
2035 	const struct rte_flow_item_mpls *mask = item->mask;
2036 	struct mlx5_flow_parse *parser = data->parser;
2037 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
2038 	struct ibv_flow_spec_mpls mpls = {
2039 		.type = IBV_FLOW_SPEC_MPLS,
2040 		.size = size,
2041 	};
2042 
2043 	parser->inner = IBV_FLOW_SPEC_INNER;
2044 	if (parser->layer == HASH_RXQ_UDPV4 ||
2045 	    parser->layer == HASH_RXQ_UDPV6) {
2046 		parser->tunnel =
2047 			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
2048 		parser->out_layer = parser->layer;
2049 	} else {
2050 		parser->tunnel =
2051 			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
2052 		/* parser->out_layer stays as in GRE out_layer. */
2053 	}
2054 	parser->layer = HASH_RXQ_TUNNEL;
2055 	/*
2056 	 * For MPLS-in-GRE, RSS level should have been set.
2057 	 * For MPLS-in-UDP, use outer RSS.
2058 	 */
2059 	if (!parser->rss_conf.level)
2060 		parser->rss_conf.level = 1;
2061 	if (spec) {
2062 		if (!mask)
2063 			mask = default_mask;
2064 		/*
2065 		 * The verbs label field includes the entire MPLS header:
2066 		 * bits 0:19 - label value field.
2067 		 * bits 20:22 - traffic class field.
2068 		 * bits 23 - bottom of stack bit.
2069 		 * bits 24:31 - ttl field.
2070 		 */
2071 		mpls.val.label = *(const uint32_t *)spec;
2072 		mpls.mask.label = *(const uint32_t *)mask;
2073 		/* Remove unwanted bits from values. */
2074 		mpls.val.label &= mpls.mask.label;
2075 	}
2076 	mlx5_flow_create_copy(parser, &mpls, size);
2077 	return 0;
2078 #endif
2079 }
2080 
2081 /**
2082  * Convert mark/flag action to Verbs specification.
2083  *
2084  * @param parser
2085  *   Internal parser structure.
2086  * @param mark_id
2087  *   Mark identifier.
2088  *
2089  * @return
2090  *   0 on success, a negative errno value otherwise and rte_errno is set.
2091  */
2092 static int
2093 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
2094 {
2095 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2096 	struct ibv_flow_spec_action_tag tag = {
2097 		.type = IBV_FLOW_SPEC_ACTION_TAG,
2098 		.size = size,
2099 		.tag_id = mlx5_flow_mark_set(mark_id),
2100 	};
2101 
2102 	assert(parser->mark);
2103 	mlx5_flow_create_copy(parser, &tag, size);
2104 	return 0;
2105 }
2106 
2107 /**
2108  * Convert count action to Verbs specification.
2109  *
2110  * @param dev
2111  *   Pointer to Ethernet device.
2112  * @param parser
2113  *   Pointer to MLX5 flow parser structure.
2114  *
2115  * @return
2116  *   0 on success, a negative errno value otherwise and rte_errno is set.
2117  */
2118 static int
2119 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2120 		       struct mlx5_flow_parse *parser __rte_unused)
2121 {
2122 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2123 	struct priv *priv = dev->data->dev_private;
2124 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2125 	struct ibv_counter_set_init_attr init_attr = {0};
2126 	struct ibv_flow_spec_counter_action counter = {
2127 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
2128 		.size = size,
2129 		.counter_set_handle = 0,
2130 	};
2131 
2132 	init_attr.counter_set_id = 0;
2133 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2134 	if (!parser->cs) {
2135 		rte_errno = EINVAL;
2136 		return -rte_errno;
2137 	}
2138 	counter.counter_set_handle = parser->cs->handle;
2139 	mlx5_flow_create_copy(parser, &counter, size);
2140 #endif
2141 	return 0;
2142 }
2143 
2144 /**
2145  * Complete flow rule creation with a drop queue.
2146  *
2147  * @param dev
2148  *   Pointer to Ethernet device.
2149  * @param parser
2150  *   Internal parser structure.
2151  * @param flow
2152  *   Pointer to the rte_flow.
2153  * @param[out] error
2154  *   Perform verbose error reporting if not NULL.
2155  *
2156  * @return
2157  *   0 on success, a negative errno value otherwise and rte_errno is set.
2158  */
2159 static int
2160 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2161 				   struct mlx5_flow_parse *parser,
2162 				   struct rte_flow *flow,
2163 				   struct rte_flow_error *error)
2164 {
2165 	struct priv *priv = dev->data->dev_private;
2166 	struct ibv_flow_spec_action_drop *drop;
2167 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2168 
2169 	assert(priv->pd);
2170 	assert(priv->ctx);
2171 	flow->drop = 1;
2172 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2173 			parser->queue[HASH_RXQ_ETH].offset);
2174 	*drop = (struct ibv_flow_spec_action_drop){
2175 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2176 			.size = size,
2177 	};
2178 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2179 	parser->queue[HASH_RXQ_ETH].offset += size;
2180 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
2181 		parser->queue[HASH_RXQ_ETH].ibv_attr;
2182 	if (parser->count)
2183 		flow->cs = parser->cs;
2184 	if (!dev->data->dev_started)
2185 		return 0;
2186 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2187 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
2188 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2189 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
2190 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2191 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2192 				   NULL, "flow rule creation failure");
2193 		goto error;
2194 	}
2195 	return 0;
2196 error:
2197 	assert(flow);
2198 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2199 		claim_zero(mlx5_glue->destroy_flow
2200 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2201 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2202 	}
2203 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2204 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2205 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2206 	}
2207 	if (flow->cs) {
2208 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2209 		flow->cs = NULL;
2210 		parser->cs = NULL;
2211 	}
2212 	return -rte_errno;
2213 }
2214 
2215 /**
2216  * Create hash Rx queues when RSS is enabled.
2217  *
2218  * @param dev
2219  *   Pointer to Ethernet device.
2220  * @param parser
2221  *   Internal parser structure.
2222  * @param flow
2223  *   Pointer to the rte_flow.
2224  * @param[out] error
2225  *   Perform verbose error reporting if not NULL.
2226  *
2227  * @return
2228  *   0 on success, a negative errno value otherwise and rte_errno is set.
2229  */
2230 static int
2231 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2232 				  struct mlx5_flow_parse *parser,
2233 				  struct rte_flow *flow,
2234 				  struct rte_flow_error *error)
2235 {
2236 	unsigned int i;
2237 
2238 	for (i = 0; i != hash_rxq_init_n; ++i) {
2239 		if (!parser->queue[i].ibv_attr)
2240 			continue;
2241 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2242 		parser->queue[i].ibv_attr = NULL;
2243 		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2244 		if (!dev->data->dev_started)
2245 			continue;
2246 		flow->frxq[i].hrxq =
2247 			mlx5_hrxq_get(dev,
2248 				      parser->rss_conf.key,
2249 				      parser->rss_conf.key_len,
2250 				      flow->frxq[i].hash_fields,
2251 				      parser->rss_conf.queue,
2252 				      parser->rss_conf.queue_num,
2253 				      parser->tunnel,
2254 				      parser->rss_conf.level);
2255 		if (flow->frxq[i].hrxq)
2256 			continue;
2257 		flow->frxq[i].hrxq =
2258 			mlx5_hrxq_new(dev,
2259 				      parser->rss_conf.key,
2260 				      parser->rss_conf.key_len,
2261 				      flow->frxq[i].hash_fields,
2262 				      parser->rss_conf.queue,
2263 				      parser->rss_conf.queue_num,
2264 				      parser->tunnel,
2265 				      parser->rss_conf.level);
2266 		if (!flow->frxq[i].hrxq) {
2267 			return rte_flow_error_set(error, ENOMEM,
2268 						  RTE_FLOW_ERROR_TYPE_HANDLE,
2269 						  NULL,
2270 						  "cannot create hash rxq");
2271 		}
2272 	}
2273 	return 0;
2274 }
2275 
2276 /**
2277  * RXQ update after flow rule creation.
2278  *
2279  * @param dev
2280  *   Pointer to Ethernet device.
2281  * @param flow
2282  *   Pointer to the flow rule.
2283  */
2284 static void
2285 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2286 {
2287 	struct priv *priv = dev->data->dev_private;
2288 	unsigned int i;
2289 	unsigned int j;
2290 
2291 	if (!dev->data->dev_started)
2292 		return;
2293 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2294 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2295 						 [(*flow->queues)[i]];
2296 		struct mlx5_rxq_ctrl *rxq_ctrl =
2297 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2298 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2299 
2300 		rxq_data->mark |= flow->mark;
2301 		if (!tunnel)
2302 			continue;
2303 		rxq_ctrl->tunnel_types[tunnel] += 1;
2304 		/* Clear tunnel type if more than one tunnel types set. */
2305 		for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2306 			if (j == tunnel)
2307 				continue;
2308 			if (rxq_ctrl->tunnel_types[j] > 0) {
2309 				rxq_data->tunnel = 0;
2310 				break;
2311 			}
2312 		}
2313 		if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2314 			rxq_data->tunnel = flow->tunnel;
2315 	}
2316 }
2317 
2318 /**
2319  * Dump flow hash RX queue detail.
2320  *
2321  * @param dev
2322  *   Pointer to Ethernet device.
2323  * @param flow
2324  *   Pointer to the rte_flow.
2325  * @param hrxq_idx
2326  *   Hash RX queue index.
2327  */
2328 static void
2329 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2330 	       struct rte_flow *flow __rte_unused,
2331 	       unsigned int hrxq_idx __rte_unused)
2332 {
2333 #ifndef NDEBUG
2334 	uintptr_t spec_ptr;
2335 	uint16_t j;
2336 	char buf[256];
2337 	uint8_t off;
2338 
2339 	spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2340 	for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2341 	     j++) {
2342 		struct ibv_flow_spec *spec = (void *)spec_ptr;
2343 		off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2344 			       spec->hdr.size);
2345 		spec_ptr += spec->hdr.size;
2346 	}
2347 	DRV_LOG(DEBUG,
2348 		"port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2349 		" hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2350 		" flags:%x, comp_mask:%x specs:%s",
2351 		dev->data->port_id, (void *)flow, hrxq_idx,
2352 		(void *)flow->frxq[hrxq_idx].hrxq,
2353 		(void *)flow->frxq[hrxq_idx].hrxq->qp,
2354 		(void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2355 		flow->frxq[hrxq_idx].hash_fields |
2356 		(flow->tunnel &&
2357 		 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2358 		flow->rss_conf.queue_num,
2359 		flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2360 		flow->frxq[hrxq_idx].ibv_attr->size,
2361 		flow->frxq[hrxq_idx].ibv_attr->priority,
2362 		flow->frxq[hrxq_idx].ibv_attr->type,
2363 		flow->frxq[hrxq_idx].ibv_attr->flags,
2364 		flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2365 		buf);
2366 #endif
2367 }
2368 
2369 /**
2370  * Complete flow rule creation.
2371  *
2372  * @param dev
2373  *   Pointer to Ethernet device.
2374  * @param parser
2375  *   Internal parser structure.
2376  * @param flow
2377  *   Pointer to the rte_flow.
2378  * @param[out] error
2379  *   Perform verbose error reporting if not NULL.
2380  *
2381  * @return
2382  *   0 on success, a negative errno value otherwise and rte_errno is set.
2383  */
2384 static int
2385 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2386 			      struct mlx5_flow_parse *parser,
2387 			      struct rte_flow *flow,
2388 			      struct rte_flow_error *error)
2389 {
2390 	struct priv *priv __rte_unused = dev->data->dev_private;
2391 	int ret;
2392 	unsigned int i;
2393 	unsigned int flows_n = 0;
2394 
2395 	assert(priv->pd);
2396 	assert(priv->ctx);
2397 	assert(!parser->drop);
2398 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2399 	if (ret)
2400 		goto error;
2401 	if (parser->count)
2402 		flow->cs = parser->cs;
2403 	if (!dev->data->dev_started)
2404 		return 0;
2405 	for (i = 0; i != hash_rxq_init_n; ++i) {
2406 		if (!flow->frxq[i].hrxq)
2407 			continue;
2408 		flow->frxq[i].ibv_flow =
2409 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2410 					       flow->frxq[i].ibv_attr);
2411 		mlx5_flow_dump(dev, flow, i);
2412 		if (!flow->frxq[i].ibv_flow) {
2413 			rte_flow_error_set(error, ENOMEM,
2414 					   RTE_FLOW_ERROR_TYPE_HANDLE,
2415 					   NULL, "flow rule creation failure");
2416 			goto error;
2417 		}
2418 		++flows_n;
2419 	}
2420 	if (!flows_n) {
2421 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2422 				   NULL, "internal error in flow creation");
2423 		goto error;
2424 	}
2425 	mlx5_flow_create_update_rxqs(dev, flow);
2426 	return 0;
2427 error:
2428 	ret = rte_errno; /* Save rte_errno before cleanup. */
2429 	assert(flow);
2430 	for (i = 0; i != hash_rxq_init_n; ++i) {
2431 		if (flow->frxq[i].ibv_flow) {
2432 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2433 
2434 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2435 		}
2436 		if (flow->frxq[i].hrxq)
2437 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2438 		if (flow->frxq[i].ibv_attr)
2439 			rte_free(flow->frxq[i].ibv_attr);
2440 	}
2441 	if (flow->cs) {
2442 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2443 		flow->cs = NULL;
2444 		parser->cs = NULL;
2445 	}
2446 	rte_errno = ret; /* Restore rte_errno. */
2447 	return -rte_errno;
2448 }
2449 
2450 /**
2451  * Convert a flow.
2452  *
2453  * @param dev
2454  *   Pointer to Ethernet device.
2455  * @param list
2456  *   Pointer to a TAILQ flow list.
2457  * @param[in] attr
2458  *   Flow rule attributes.
2459  * @param[in] pattern
2460  *   Pattern specification (list terminated by the END pattern item).
2461  * @param[in] actions
2462  *   Associated actions (list terminated by the END action).
2463  * @param[out] error
2464  *   Perform verbose error reporting if not NULL.
2465  *
2466  * @return
2467  *   A flow on success, NULL otherwise and rte_errno is set.
2468  */
2469 static struct rte_flow *
2470 mlx5_flow_list_create(struct rte_eth_dev *dev,
2471 		      struct mlx5_flows *list,
2472 		      const struct rte_flow_attr *attr,
2473 		      const struct rte_flow_item items[],
2474 		      const struct rte_flow_action actions[],
2475 		      struct rte_flow_error *error)
2476 {
2477 	struct mlx5_flow_parse parser = { .create = 1, };
2478 	struct rte_flow *flow = NULL;
2479 	unsigned int i;
2480 	int ret;
2481 
2482 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2483 	if (ret)
2484 		goto exit;
2485 	flow = rte_calloc(__func__, 1,
2486 			  sizeof(*flow) +
2487 			  parser.rss_conf.queue_num * sizeof(uint16_t),
2488 			  0);
2489 	if (!flow) {
2490 		rte_flow_error_set(error, ENOMEM,
2491 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2492 				   NULL,
2493 				   "cannot allocate flow memory");
2494 		return NULL;
2495 	}
2496 	/* Copy configuration. */
2497 	flow->queues = (uint16_t (*)[])(flow + 1);
2498 	flow->tunnel = parser.tunnel;
2499 	flow->rss_conf = (struct rte_flow_action_rss){
2500 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2501 		.level = 0,
2502 		.types = parser.rss_conf.types,
2503 		.key_len = parser.rss_conf.key_len,
2504 		.queue_num = parser.rss_conf.queue_num,
2505 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
2506 			      sizeof(*parser.rss_conf.key) *
2507 			      parser.rss_conf.key_len),
2508 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
2509 				sizeof(*parser.rss_conf.queue) *
2510 				parser.rss_conf.queue_num),
2511 	};
2512 	flow->mark = parser.mark;
2513 	/* finalise the flow. */
2514 	if (parser.drop)
2515 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2516 							 error);
2517 	else
2518 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2519 	if (ret)
2520 		goto exit;
2521 	TAILQ_INSERT_TAIL(list, flow, next);
2522 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2523 		(void *)flow);
2524 	return flow;
2525 exit:
2526 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2527 		error->message);
2528 	for (i = 0; i != hash_rxq_init_n; ++i) {
2529 		if (parser.queue[i].ibv_attr)
2530 			rte_free(parser.queue[i].ibv_attr);
2531 	}
2532 	rte_free(flow);
2533 	return NULL;
2534 }
2535 
2536 /**
2537  * Validate a flow supported by the NIC.
2538  *
2539  * @see rte_flow_validate()
2540  * @see rte_flow_ops
2541  */
2542 int
2543 mlx5_flow_validate(struct rte_eth_dev *dev,
2544 		   const struct rte_flow_attr *attr,
2545 		   const struct rte_flow_item items[],
2546 		   const struct rte_flow_action actions[],
2547 		   struct rte_flow_error *error)
2548 {
2549 	struct mlx5_flow_parse parser = { .create = 0, };
2550 
2551 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2552 }
2553 
2554 /**
2555  * Create a flow.
2556  *
2557  * @see rte_flow_create()
2558  * @see rte_flow_ops
2559  */
2560 struct rte_flow *
2561 mlx5_flow_create(struct rte_eth_dev *dev,
2562 		 const struct rte_flow_attr *attr,
2563 		 const struct rte_flow_item items[],
2564 		 const struct rte_flow_action actions[],
2565 		 struct rte_flow_error *error)
2566 {
2567 	struct priv *priv = dev->data->dev_private;
2568 
2569 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2570 				     error);
2571 }
2572 
2573 /**
2574  * Destroy a flow in a list.
2575  *
2576  * @param dev
2577  *   Pointer to Ethernet device.
2578  * @param list
2579  *   Pointer to a TAILQ flow list.
2580  * @param[in] flow
2581  *   Flow to destroy.
2582  */
2583 static void
2584 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2585 		       struct rte_flow *flow)
2586 {
2587 	struct priv *priv = dev->data->dev_private;
2588 	unsigned int i;
2589 
2590 	if (flow->drop || !dev->data->dev_started)
2591 		goto free;
2592 	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2593 		/* Update queue tunnel type. */
2594 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2595 						 [(*flow->queues)[i]];
2596 		struct mlx5_rxq_ctrl *rxq_ctrl =
2597 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2598 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2599 
2600 		assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2601 		rxq_ctrl->tunnel_types[tunnel] -= 1;
2602 		if (!rxq_ctrl->tunnel_types[tunnel]) {
2603 			/* Update tunnel type. */
2604 			uint8_t j;
2605 			uint8_t types = 0;
2606 			uint8_t last;
2607 
2608 			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2609 				if (rxq_ctrl->tunnel_types[j]) {
2610 					types += 1;
2611 					last = j;
2612 				}
2613 			/* Keep same if more than one tunnel types left. */
2614 			if (types == 1)
2615 				rxq_data->tunnel = ptype_ext[last];
2616 			else if (types == 0)
2617 				/* No tunnel type left. */
2618 				rxq_data->tunnel = 0;
2619 		}
2620 	}
2621 	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2622 		struct rte_flow *tmp;
2623 		int mark = 0;
2624 
2625 		/*
2626 		 * To remove the mark from the queue, the queue must not be
2627 		 * present in any other marked flow (RSS or not).
2628 		 */
2629 		TAILQ_FOREACH(tmp, list, next) {
2630 			unsigned int j;
2631 			uint16_t *tqs = NULL;
2632 			uint16_t tq_n = 0;
2633 
2634 			if (!tmp->mark)
2635 				continue;
2636 			for (j = 0; j != hash_rxq_init_n; ++j) {
2637 				if (!tmp->frxq[j].hrxq)
2638 					continue;
2639 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2640 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2641 			}
2642 			if (!tq_n)
2643 				continue;
2644 			for (j = 0; (j != tq_n) && !mark; j++)
2645 				if (tqs[j] == (*flow->queues)[i])
2646 					mark = 1;
2647 		}
2648 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2649 	}
2650 free:
2651 	if (flow->drop) {
2652 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2653 			claim_zero(mlx5_glue->destroy_flow
2654 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2655 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2656 	} else {
2657 		for (i = 0; i != hash_rxq_init_n; ++i) {
2658 			struct mlx5_flow *frxq = &flow->frxq[i];
2659 
2660 			if (frxq->ibv_flow)
2661 				claim_zero(mlx5_glue->destroy_flow
2662 					   (frxq->ibv_flow));
2663 			if (frxq->hrxq)
2664 				mlx5_hrxq_release(dev, frxq->hrxq);
2665 			if (frxq->ibv_attr)
2666 				rte_free(frxq->ibv_attr);
2667 		}
2668 	}
2669 	if (flow->cs) {
2670 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2671 		flow->cs = NULL;
2672 	}
2673 	TAILQ_REMOVE(list, flow, next);
2674 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2675 		(void *)flow);
2676 	rte_free(flow);
2677 }
2678 
2679 /**
2680  * Destroy all flows.
2681  *
2682  * @param dev
2683  *   Pointer to Ethernet device.
2684  * @param list
2685  *   Pointer to a TAILQ flow list.
2686  */
2687 void
2688 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2689 {
2690 	while (!TAILQ_EMPTY(list)) {
2691 		struct rte_flow *flow;
2692 
2693 		flow = TAILQ_FIRST(list);
2694 		mlx5_flow_list_destroy(dev, list, flow);
2695 	}
2696 }
2697 
2698 /**
2699  * Create drop queue.
2700  *
2701  * @param dev
2702  *   Pointer to Ethernet device.
2703  *
2704  * @return
2705  *   0 on success, a negative errno value otherwise and rte_errno is set.
2706  */
2707 int
2708 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2709 {
2710 	struct priv *priv = dev->data->dev_private;
2711 	struct mlx5_hrxq_drop *fdq = NULL;
2712 
2713 	assert(priv->pd);
2714 	assert(priv->ctx);
2715 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2716 	if (!fdq) {
2717 		DRV_LOG(WARNING,
2718 			"port %u cannot allocate memory for drop queue",
2719 			dev->data->port_id);
2720 		rte_errno = ENOMEM;
2721 		return -rte_errno;
2722 	}
2723 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2724 	if (!fdq->cq) {
2725 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2726 			dev->data->port_id);
2727 		rte_errno = errno;
2728 		goto error;
2729 	}
2730 	fdq->wq = mlx5_glue->create_wq
2731 		(priv->ctx,
2732 		 &(struct ibv_wq_init_attr){
2733 			.wq_type = IBV_WQT_RQ,
2734 			.max_wr = 1,
2735 			.max_sge = 1,
2736 			.pd = priv->pd,
2737 			.cq = fdq->cq,
2738 		 });
2739 	if (!fdq->wq) {
2740 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2741 			dev->data->port_id);
2742 		rte_errno = errno;
2743 		goto error;
2744 	}
2745 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2746 		(priv->ctx,
2747 		 &(struct ibv_rwq_ind_table_init_attr){
2748 			.log_ind_tbl_size = 0,
2749 			.ind_tbl = &fdq->wq,
2750 			.comp_mask = 0,
2751 		 });
2752 	if (!fdq->ind_table) {
2753 		DRV_LOG(WARNING,
2754 			"port %u cannot allocate indirection table for drop"
2755 			" queue",
2756 			dev->data->port_id);
2757 		rte_errno = errno;
2758 		goto error;
2759 	}
2760 	fdq->qp = mlx5_glue->create_qp_ex
2761 		(priv->ctx,
2762 		 &(struct ibv_qp_init_attr_ex){
2763 			.qp_type = IBV_QPT_RAW_PACKET,
2764 			.comp_mask =
2765 				IBV_QP_INIT_ATTR_PD |
2766 				IBV_QP_INIT_ATTR_IND_TABLE |
2767 				IBV_QP_INIT_ATTR_RX_HASH,
2768 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2769 				.rx_hash_function =
2770 					IBV_RX_HASH_FUNC_TOEPLITZ,
2771 				.rx_hash_key_len = rss_hash_default_key_len,
2772 				.rx_hash_key = rss_hash_default_key,
2773 				.rx_hash_fields_mask = 0,
2774 				},
2775 			.rwq_ind_tbl = fdq->ind_table,
2776 			.pd = priv->pd
2777 		 });
2778 	if (!fdq->qp) {
2779 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2780 			dev->data->port_id);
2781 		rte_errno = errno;
2782 		goto error;
2783 	}
2784 	priv->flow_drop_queue = fdq;
2785 	return 0;
2786 error:
2787 	if (fdq->qp)
2788 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2789 	if (fdq->ind_table)
2790 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2791 	if (fdq->wq)
2792 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2793 	if (fdq->cq)
2794 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2795 	if (fdq)
2796 		rte_free(fdq);
2797 	priv->flow_drop_queue = NULL;
2798 	return -rte_errno;
2799 }
2800 
2801 /**
2802  * Delete drop queue.
2803  *
2804  * @param dev
2805  *   Pointer to Ethernet device.
2806  */
2807 void
2808 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2809 {
2810 	struct priv *priv = dev->data->dev_private;
2811 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2812 
2813 	if (!fdq)
2814 		return;
2815 	if (fdq->qp)
2816 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2817 	if (fdq->ind_table)
2818 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2819 	if (fdq->wq)
2820 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2821 	if (fdq->cq)
2822 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2823 	rte_free(fdq);
2824 	priv->flow_drop_queue = NULL;
2825 }
2826 
2827 /**
2828  * Remove all flows.
2829  *
2830  * @param dev
2831  *   Pointer to Ethernet device.
2832  * @param list
2833  *   Pointer to a TAILQ flow list.
2834  */
2835 void
2836 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2837 {
2838 	struct priv *priv = dev->data->dev_private;
2839 	struct rte_flow *flow;
2840 	unsigned int i;
2841 
2842 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2843 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2844 
2845 		if (flow->drop) {
2846 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2847 				continue;
2848 			claim_zero(mlx5_glue->destroy_flow
2849 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2850 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2851 			DRV_LOG(DEBUG, "port %u flow %p removed",
2852 				dev->data->port_id, (void *)flow);
2853 			/* Next flow. */
2854 			continue;
2855 		}
2856 		/* Verify the flow has not already been cleaned. */
2857 		for (i = 0; i != hash_rxq_init_n; ++i) {
2858 			if (!flow->frxq[i].ibv_flow)
2859 				continue;
2860 			/*
2861 			 * Indirection table may be necessary to remove the
2862 			 * flags in the Rx queues.
2863 			 * This helps to speed-up the process by avoiding
2864 			 * another loop.
2865 			 */
2866 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2867 			break;
2868 		}
2869 		if (i == hash_rxq_init_n)
2870 			return;
2871 		if (flow->mark) {
2872 			assert(ind_tbl);
2873 			for (i = 0; i != ind_tbl->queues_n; ++i)
2874 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2875 		}
2876 		for (i = 0; i != hash_rxq_init_n; ++i) {
2877 			if (!flow->frxq[i].ibv_flow)
2878 				continue;
2879 			claim_zero(mlx5_glue->destroy_flow
2880 				   (flow->frxq[i].ibv_flow));
2881 			flow->frxq[i].ibv_flow = NULL;
2882 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2883 			flow->frxq[i].hrxq = NULL;
2884 		}
2885 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2886 			(void *)flow);
2887 	}
2888 	/* Cleanup Rx queue tunnel info. */
2889 	for (i = 0; i != priv->rxqs_n; ++i) {
2890 		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2891 		struct mlx5_rxq_ctrl *rxq_ctrl =
2892 			container_of(q, struct mlx5_rxq_ctrl, rxq);
2893 
2894 		if (!q)
2895 			continue;
2896 		memset((void *)rxq_ctrl->tunnel_types, 0,
2897 		       sizeof(rxq_ctrl->tunnel_types));
2898 		q->tunnel = 0;
2899 	}
2900 }
2901 
2902 /**
2903  * Add all flows.
2904  *
2905  * @param dev
2906  *   Pointer to Ethernet device.
2907  * @param list
2908  *   Pointer to a TAILQ flow list.
2909  *
2910  * @return
2911  *   0 on success, a negative errno value otherwise and rte_errno is set.
2912  */
2913 int
2914 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2915 {
2916 	struct priv *priv = dev->data->dev_private;
2917 	struct rte_flow *flow;
2918 
2919 	TAILQ_FOREACH(flow, list, next) {
2920 		unsigned int i;
2921 
2922 		if (flow->drop) {
2923 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2924 				mlx5_glue->create_flow
2925 				(priv->flow_drop_queue->qp,
2926 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2927 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2928 				DRV_LOG(DEBUG,
2929 					"port %u flow %p cannot be applied",
2930 					dev->data->port_id, (void *)flow);
2931 				rte_errno = EINVAL;
2932 				return -rte_errno;
2933 			}
2934 			DRV_LOG(DEBUG, "port %u flow %p applied",
2935 				dev->data->port_id, (void *)flow);
2936 			/* Next flow. */
2937 			continue;
2938 		}
2939 		for (i = 0; i != hash_rxq_init_n; ++i) {
2940 			if (!flow->frxq[i].ibv_attr)
2941 				continue;
2942 			flow->frxq[i].hrxq =
2943 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2944 					      flow->rss_conf.key_len,
2945 					      flow->frxq[i].hash_fields,
2946 					      flow->rss_conf.queue,
2947 					      flow->rss_conf.queue_num,
2948 					      flow->tunnel,
2949 					      flow->rss_conf.level);
2950 			if (flow->frxq[i].hrxq)
2951 				goto flow_create;
2952 			flow->frxq[i].hrxq =
2953 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2954 					      flow->rss_conf.key_len,
2955 					      flow->frxq[i].hash_fields,
2956 					      flow->rss_conf.queue,
2957 					      flow->rss_conf.queue_num,
2958 					      flow->tunnel,
2959 					      flow->rss_conf.level);
2960 			if (!flow->frxq[i].hrxq) {
2961 				DRV_LOG(DEBUG,
2962 					"port %u flow %p cannot create hash"
2963 					" rxq",
2964 					dev->data->port_id, (void *)flow);
2965 				rte_errno = EINVAL;
2966 				return -rte_errno;
2967 			}
2968 flow_create:
2969 			mlx5_flow_dump(dev, flow, i);
2970 			flow->frxq[i].ibv_flow =
2971 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2972 						       flow->frxq[i].ibv_attr);
2973 			if (!flow->frxq[i].ibv_flow) {
2974 				DRV_LOG(DEBUG,
2975 					"port %u flow %p type %u cannot be"
2976 					" applied",
2977 					dev->data->port_id, (void *)flow, i);
2978 				rte_errno = EINVAL;
2979 				return -rte_errno;
2980 			}
2981 		}
2982 		mlx5_flow_create_update_rxqs(dev, flow);
2983 	}
2984 	return 0;
2985 }
2986 
2987 /**
2988  * Verify the flow list is empty
2989  *
2990  * @param dev
2991  *  Pointer to Ethernet device.
2992  *
2993  * @return the number of flows not released.
2994  */
2995 int
2996 mlx5_flow_verify(struct rte_eth_dev *dev)
2997 {
2998 	struct priv *priv = dev->data->dev_private;
2999 	struct rte_flow *flow;
3000 	int ret = 0;
3001 
3002 	TAILQ_FOREACH(flow, &priv->flows, next) {
3003 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
3004 			dev->data->port_id, (void *)flow);
3005 		++ret;
3006 	}
3007 	return ret;
3008 }
3009 
3010 /**
3011  * Enable a control flow configured from the control plane.
3012  *
3013  * @param dev
3014  *   Pointer to Ethernet device.
3015  * @param eth_spec
3016  *   An Ethernet flow spec to apply.
3017  * @param eth_mask
3018  *   An Ethernet flow mask to apply.
3019  * @param vlan_spec
3020  *   A VLAN flow spec to apply.
3021  * @param vlan_mask
3022  *   A VLAN flow mask to apply.
3023  *
3024  * @return
3025  *   0 on success, a negative errno value otherwise and rte_errno is set.
3026  */
3027 int
3028 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3029 		    struct rte_flow_item_eth *eth_spec,
3030 		    struct rte_flow_item_eth *eth_mask,
3031 		    struct rte_flow_item_vlan *vlan_spec,
3032 		    struct rte_flow_item_vlan *vlan_mask)
3033 {
3034 	struct priv *priv = dev->data->dev_private;
3035 	const struct rte_flow_attr attr = {
3036 		.ingress = 1,
3037 		.priority = MLX5_CTRL_FLOW_PRIORITY,
3038 	};
3039 	struct rte_flow_item items[] = {
3040 		{
3041 			.type = RTE_FLOW_ITEM_TYPE_ETH,
3042 			.spec = eth_spec,
3043 			.last = NULL,
3044 			.mask = eth_mask,
3045 		},
3046 		{
3047 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3048 				RTE_FLOW_ITEM_TYPE_END,
3049 			.spec = vlan_spec,
3050 			.last = NULL,
3051 			.mask = vlan_mask,
3052 		},
3053 		{
3054 			.type = RTE_FLOW_ITEM_TYPE_END,
3055 		},
3056 	};
3057 	uint16_t queue[priv->reta_idx_n];
3058 	struct rte_flow_action_rss action_rss = {
3059 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3060 		.level = 0,
3061 		.types = priv->rss_conf.rss_hf,
3062 		.key_len = priv->rss_conf.rss_key_len,
3063 		.queue_num = priv->reta_idx_n,
3064 		.key = priv->rss_conf.rss_key,
3065 		.queue = queue,
3066 	};
3067 	struct rte_flow_action actions[] = {
3068 		{
3069 			.type = RTE_FLOW_ACTION_TYPE_RSS,
3070 			.conf = &action_rss,
3071 		},
3072 		{
3073 			.type = RTE_FLOW_ACTION_TYPE_END,
3074 		},
3075 	};
3076 	struct rte_flow *flow;
3077 	struct rte_flow_error error;
3078 	unsigned int i;
3079 
3080 	if (!priv->reta_idx_n) {
3081 		rte_errno = EINVAL;
3082 		return -rte_errno;
3083 	}
3084 	for (i = 0; i != priv->reta_idx_n; ++i)
3085 		queue[i] = (*priv->reta_idx)[i];
3086 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3087 				     actions, &error);
3088 	if (!flow)
3089 		return -rte_errno;
3090 	return 0;
3091 }
3092 
3093 /**
3094  * Enable a flow control configured from the control plane.
3095  *
3096  * @param dev
3097  *   Pointer to Ethernet device.
3098  * @param eth_spec
3099  *   An Ethernet flow spec to apply.
3100  * @param eth_mask
3101  *   An Ethernet flow mask to apply.
3102  *
3103  * @return
3104  *   0 on success, a negative errno value otherwise and rte_errno is set.
3105  */
3106 int
3107 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3108 	       struct rte_flow_item_eth *eth_spec,
3109 	       struct rte_flow_item_eth *eth_mask)
3110 {
3111 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3112 }
3113 
3114 /**
3115  * Destroy a flow.
3116  *
3117  * @see rte_flow_destroy()
3118  * @see rte_flow_ops
3119  */
3120 int
3121 mlx5_flow_destroy(struct rte_eth_dev *dev,
3122 		  struct rte_flow *flow,
3123 		  struct rte_flow_error *error __rte_unused)
3124 {
3125 	struct priv *priv = dev->data->dev_private;
3126 
3127 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
3128 	return 0;
3129 }
3130 
3131 /**
3132  * Destroy all flows.
3133  *
3134  * @see rte_flow_flush()
3135  * @see rte_flow_ops
3136  */
3137 int
3138 mlx5_flow_flush(struct rte_eth_dev *dev,
3139 		struct rte_flow_error *error __rte_unused)
3140 {
3141 	struct priv *priv = dev->data->dev_private;
3142 
3143 	mlx5_flow_list_flush(dev, &priv->flows);
3144 	return 0;
3145 }
3146 
3147 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3148 /**
3149  * Query flow counter.
3150  *
3151  * @param cs
3152  *   the counter set.
3153  * @param counter_value
3154  *   returned data from the counter.
3155  *
3156  * @return
3157  *   0 on success, a negative errno value otherwise and rte_errno is set.
3158  */
3159 static int
3160 mlx5_flow_query_count(struct ibv_counter_set *cs,
3161 		      struct mlx5_flow_counter_stats *counter_stats,
3162 		      struct rte_flow_query_count *query_count,
3163 		      struct rte_flow_error *error)
3164 {
3165 	uint64_t counters[2];
3166 	struct ibv_query_counter_set_attr query_cs_attr = {
3167 		.cs = cs,
3168 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3169 	};
3170 	struct ibv_counter_set_data query_out = {
3171 		.out = counters,
3172 		.outlen = 2 * sizeof(uint64_t),
3173 	};
3174 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3175 
3176 	if (err)
3177 		return rte_flow_error_set(error, err,
3178 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3179 					  NULL,
3180 					  "cannot read counter");
3181 	query_count->hits_set = 1;
3182 	query_count->bytes_set = 1;
3183 	query_count->hits = counters[0] - counter_stats->hits;
3184 	query_count->bytes = counters[1] - counter_stats->bytes;
3185 	if (query_count->reset) {
3186 		counter_stats->hits = counters[0];
3187 		counter_stats->bytes = counters[1];
3188 	}
3189 	return 0;
3190 }
3191 
3192 /**
3193  * Query a flows.
3194  *
3195  * @see rte_flow_query()
3196  * @see rte_flow_ops
3197  */
3198 int
3199 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3200 		struct rte_flow *flow,
3201 		const struct rte_flow_action *action __rte_unused,
3202 		void *data,
3203 		struct rte_flow_error *error)
3204 {
3205 	if (flow->cs) {
3206 		int ret;
3207 
3208 		ret = mlx5_flow_query_count(flow->cs,
3209 					    &flow->counter_stats,
3210 					    (struct rte_flow_query_count *)data,
3211 					    error);
3212 		if (ret)
3213 			return ret;
3214 	} else {
3215 		return rte_flow_error_set(error, EINVAL,
3216 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3217 					  NULL,
3218 					  "no counter found for flow");
3219 	}
3220 	return 0;
3221 }
3222 #endif
3223 
3224 /**
3225  * Isolated mode.
3226  *
3227  * @see rte_flow_isolate()
3228  * @see rte_flow_ops
3229  */
3230 int
3231 mlx5_flow_isolate(struct rte_eth_dev *dev,
3232 		  int enable,
3233 		  struct rte_flow_error *error)
3234 {
3235 	struct priv *priv = dev->data->dev_private;
3236 
3237 	if (dev->data->dev_started) {
3238 		rte_flow_error_set(error, EBUSY,
3239 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3240 				   NULL,
3241 				   "port must be stopped first");
3242 		return -rte_errno;
3243 	}
3244 	priv->isolated = !!enable;
3245 	if (enable)
3246 		dev->dev_ops = &mlx5_dev_ops_isolate;
3247 	else
3248 		dev->dev_ops = &mlx5_dev_ops;
3249 	return 0;
3250 }
3251 
3252 /**
3253  * Convert a flow director filter to a generic flow.
3254  *
3255  * @param dev
3256  *   Pointer to Ethernet device.
3257  * @param fdir_filter
3258  *   Flow director filter to add.
3259  * @param attributes
3260  *   Generic flow parameters structure.
3261  *
3262  * @return
3263  *   0 on success, a negative errno value otherwise and rte_errno is set.
3264  */
3265 static int
3266 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3267 			 const struct rte_eth_fdir_filter *fdir_filter,
3268 			 struct mlx5_fdir *attributes)
3269 {
3270 	struct priv *priv = dev->data->dev_private;
3271 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
3272 	const struct rte_eth_fdir_masks *mask =
3273 		&dev->data->dev_conf.fdir_conf.mask;
3274 
3275 	/* Validate queue number. */
3276 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3277 		DRV_LOG(ERR, "port %u invalid queue number %d",
3278 			dev->data->port_id, fdir_filter->action.rx_queue);
3279 		rte_errno = EINVAL;
3280 		return -rte_errno;
3281 	}
3282 	attributes->attr.ingress = 1;
3283 	attributes->items[0] = (struct rte_flow_item) {
3284 		.type = RTE_FLOW_ITEM_TYPE_ETH,
3285 		.spec = &attributes->l2,
3286 		.mask = &attributes->l2_mask,
3287 	};
3288 	switch (fdir_filter->action.behavior) {
3289 	case RTE_ETH_FDIR_ACCEPT:
3290 		attributes->actions[0] = (struct rte_flow_action){
3291 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
3292 			.conf = &attributes->queue,
3293 		};
3294 		break;
3295 	case RTE_ETH_FDIR_REJECT:
3296 		attributes->actions[0] = (struct rte_flow_action){
3297 			.type = RTE_FLOW_ACTION_TYPE_DROP,
3298 		};
3299 		break;
3300 	default:
3301 		DRV_LOG(ERR, "port %u invalid behavior %d",
3302 			dev->data->port_id,
3303 			fdir_filter->action.behavior);
3304 		rte_errno = ENOTSUP;
3305 		return -rte_errno;
3306 	}
3307 	attributes->queue.index = fdir_filter->action.rx_queue;
3308 	/* Handle L3. */
3309 	switch (fdir_filter->input.flow_type) {
3310 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3311 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3312 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3313 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3314 			.src_addr = input->flow.ip4_flow.src_ip,
3315 			.dst_addr = input->flow.ip4_flow.dst_ip,
3316 			.time_to_live = input->flow.ip4_flow.ttl,
3317 			.type_of_service = input->flow.ip4_flow.tos,
3318 			.next_proto_id = input->flow.ip4_flow.proto,
3319 		};
3320 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3321 			.src_addr = mask->ipv4_mask.src_ip,
3322 			.dst_addr = mask->ipv4_mask.dst_ip,
3323 			.time_to_live = mask->ipv4_mask.ttl,
3324 			.type_of_service = mask->ipv4_mask.tos,
3325 			.next_proto_id = mask->ipv4_mask.proto,
3326 		};
3327 		attributes->items[1] = (struct rte_flow_item){
3328 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
3329 			.spec = &attributes->l3,
3330 			.mask = &attributes->l3_mask,
3331 		};
3332 		break;
3333 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3334 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3335 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3336 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3337 			.hop_limits = input->flow.ipv6_flow.hop_limits,
3338 			.proto = input->flow.ipv6_flow.proto,
3339 		};
3340 
3341 		memcpy(attributes->l3.ipv6.hdr.src_addr,
3342 		       input->flow.ipv6_flow.src_ip,
3343 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3344 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
3345 		       input->flow.ipv6_flow.dst_ip,
3346 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3347 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3348 		       mask->ipv6_mask.src_ip,
3349 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3350 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3351 		       mask->ipv6_mask.dst_ip,
3352 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3353 		attributes->items[1] = (struct rte_flow_item){
3354 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
3355 			.spec = &attributes->l3,
3356 			.mask = &attributes->l3_mask,
3357 		};
3358 		break;
3359 	default:
3360 		DRV_LOG(ERR, "port %u invalid flow type%d",
3361 			dev->data->port_id, fdir_filter->input.flow_type);
3362 		rte_errno = ENOTSUP;
3363 		return -rte_errno;
3364 	}
3365 	/* Handle L4. */
3366 	switch (fdir_filter->input.flow_type) {
3367 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3368 		attributes->l4.udp.hdr = (struct udp_hdr){
3369 			.src_port = input->flow.udp4_flow.src_port,
3370 			.dst_port = input->flow.udp4_flow.dst_port,
3371 		};
3372 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3373 			.src_port = mask->src_port_mask,
3374 			.dst_port = mask->dst_port_mask,
3375 		};
3376 		attributes->items[2] = (struct rte_flow_item){
3377 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3378 			.spec = &attributes->l4,
3379 			.mask = &attributes->l4_mask,
3380 		};
3381 		break;
3382 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3383 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3384 			.src_port = input->flow.tcp4_flow.src_port,
3385 			.dst_port = input->flow.tcp4_flow.dst_port,
3386 		};
3387 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3388 			.src_port = mask->src_port_mask,
3389 			.dst_port = mask->dst_port_mask,
3390 		};
3391 		attributes->items[2] = (struct rte_flow_item){
3392 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3393 			.spec = &attributes->l4,
3394 			.mask = &attributes->l4_mask,
3395 		};
3396 		break;
3397 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3398 		attributes->l4.udp.hdr = (struct udp_hdr){
3399 			.src_port = input->flow.udp6_flow.src_port,
3400 			.dst_port = input->flow.udp6_flow.dst_port,
3401 		};
3402 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3403 			.src_port = mask->src_port_mask,
3404 			.dst_port = mask->dst_port_mask,
3405 		};
3406 		attributes->items[2] = (struct rte_flow_item){
3407 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3408 			.spec = &attributes->l4,
3409 			.mask = &attributes->l4_mask,
3410 		};
3411 		break;
3412 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3413 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3414 			.src_port = input->flow.tcp6_flow.src_port,
3415 			.dst_port = input->flow.tcp6_flow.dst_port,
3416 		};
3417 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3418 			.src_port = mask->src_port_mask,
3419 			.dst_port = mask->dst_port_mask,
3420 		};
3421 		attributes->items[2] = (struct rte_flow_item){
3422 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3423 			.spec = &attributes->l4,
3424 			.mask = &attributes->l4_mask,
3425 		};
3426 		break;
3427 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3428 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3429 		break;
3430 	default:
3431 		DRV_LOG(ERR, "port %u invalid flow type%d",
3432 			dev->data->port_id, fdir_filter->input.flow_type);
3433 		rte_errno = ENOTSUP;
3434 		return -rte_errno;
3435 	}
3436 	return 0;
3437 }
3438 
3439 /**
3440  * Add new flow director filter and store it in list.
3441  *
3442  * @param dev
3443  *   Pointer to Ethernet device.
3444  * @param fdir_filter
3445  *   Flow director filter to add.
3446  *
3447  * @return
3448  *   0 on success, a negative errno value otherwise and rte_errno is set.
3449  */
3450 static int
3451 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3452 		     const struct rte_eth_fdir_filter *fdir_filter)
3453 {
3454 	struct priv *priv = dev->data->dev_private;
3455 	struct mlx5_fdir attributes = {
3456 		.attr.group = 0,
3457 		.l2_mask = {
3458 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3459 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3460 			.type = 0,
3461 		},
3462 	};
3463 	struct mlx5_flow_parse parser = {
3464 		.layer = HASH_RXQ_ETH,
3465 	};
3466 	struct rte_flow_error error;
3467 	struct rte_flow *flow;
3468 	int ret;
3469 
3470 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3471 	if (ret)
3472 		return ret;
3473 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3474 				attributes.actions, &error, &parser);
3475 	if (ret)
3476 		return ret;
3477 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3478 				     attributes.items, attributes.actions,
3479 				     &error);
3480 	if (flow) {
3481 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3482 			(void *)flow);
3483 		return 0;
3484 	}
3485 	return -rte_errno;
3486 }
3487 
3488 /**
3489  * Delete specific filter.
3490  *
3491  * @param dev
3492  *   Pointer to Ethernet device.
3493  * @param fdir_filter
3494  *   Filter to be deleted.
3495  *
3496  * @return
3497  *   0 on success, a negative errno value otherwise and rte_errno is set.
3498  */
3499 static int
3500 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3501 			const struct rte_eth_fdir_filter *fdir_filter)
3502 {
3503 	struct priv *priv = dev->data->dev_private;
3504 	struct mlx5_fdir attributes = {
3505 		.attr.group = 0,
3506 	};
3507 	struct mlx5_flow_parse parser = {
3508 		.create = 1,
3509 		.layer = HASH_RXQ_ETH,
3510 	};
3511 	struct rte_flow_error error;
3512 	struct rte_flow *flow;
3513 	unsigned int i;
3514 	int ret;
3515 
3516 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3517 	if (ret)
3518 		return ret;
3519 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3520 				attributes.actions, &error, &parser);
3521 	if (ret)
3522 		goto exit;
3523 	/*
3524 	 * Special case for drop action which is only set in the
3525 	 * specifications when the flow is created.  In this situation the
3526 	 * drop specification is missing.
3527 	 */
3528 	if (parser.drop) {
3529 		struct ibv_flow_spec_action_drop *drop;
3530 
3531 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3532 				parser.queue[HASH_RXQ_ETH].offset);
3533 		*drop = (struct ibv_flow_spec_action_drop){
3534 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3535 			.size = sizeof(struct ibv_flow_spec_action_drop),
3536 		};
3537 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3538 	}
3539 	TAILQ_FOREACH(flow, &priv->flows, next) {
3540 		struct ibv_flow_attr *attr;
3541 		struct ibv_spec_header *attr_h;
3542 		void *spec;
3543 		struct ibv_flow_attr *flow_attr;
3544 		struct ibv_spec_header *flow_h;
3545 		void *flow_spec;
3546 		unsigned int specs_n;
3547 		unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
3548 						      parser.layer;
3549 
3550 		attr = parser.queue[queue_id].ibv_attr;
3551 		flow_attr = flow->frxq[queue_id].ibv_attr;
3552 		/* Compare first the attributes. */
3553 		if (!flow_attr ||
3554 		    memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3555 			continue;
3556 		if (attr->num_of_specs == 0)
3557 			continue;
3558 		spec = (void *)((uintptr_t)attr +
3559 				sizeof(struct ibv_flow_attr));
3560 		flow_spec = (void *)((uintptr_t)flow_attr +
3561 				     sizeof(struct ibv_flow_attr));
3562 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3563 		for (i = 0; i != specs_n; ++i) {
3564 			attr_h = spec;
3565 			flow_h = flow_spec;
3566 			if (memcmp(spec, flow_spec,
3567 				   RTE_MIN(attr_h->size, flow_h->size)))
3568 				goto wrong_flow;
3569 			spec = (void *)((uintptr_t)spec + attr_h->size);
3570 			flow_spec = (void *)((uintptr_t)flow_spec +
3571 					     flow_h->size);
3572 		}
3573 		/* At this point, the flow match. */
3574 		break;
3575 wrong_flow:
3576 		/* The flow does not match. */
3577 		continue;
3578 	}
3579 	ret = rte_errno; /* Save rte_errno before cleanup. */
3580 	if (flow)
3581 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
3582 exit:
3583 	for (i = 0; i != hash_rxq_init_n; ++i) {
3584 		if (parser.queue[i].ibv_attr)
3585 			rte_free(parser.queue[i].ibv_attr);
3586 	}
3587 	rte_errno = ret; /* Restore rte_errno. */
3588 	return -rte_errno;
3589 }
3590 
3591 /**
3592  * Update queue for specific filter.
3593  *
3594  * @param dev
3595  *   Pointer to Ethernet device.
3596  * @param fdir_filter
3597  *   Filter to be updated.
3598  *
3599  * @return
3600  *   0 on success, a negative errno value otherwise and rte_errno is set.
3601  */
3602 static int
3603 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3604 			const struct rte_eth_fdir_filter *fdir_filter)
3605 {
3606 	int ret;
3607 
3608 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3609 	if (ret)
3610 		return ret;
3611 	return mlx5_fdir_filter_add(dev, fdir_filter);
3612 }
3613 
3614 /**
3615  * Flush all filters.
3616  *
3617  * @param dev
3618  *   Pointer to Ethernet device.
3619  */
3620 static void
3621 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3622 {
3623 	struct priv *priv = dev->data->dev_private;
3624 
3625 	mlx5_flow_list_flush(dev, &priv->flows);
3626 }
3627 
3628 /**
3629  * Get flow director information.
3630  *
3631  * @param dev
3632  *   Pointer to Ethernet device.
3633  * @param[out] fdir_info
3634  *   Resulting flow director information.
3635  */
3636 static void
3637 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3638 {
3639 	struct rte_eth_fdir_masks *mask =
3640 		&dev->data->dev_conf.fdir_conf.mask;
3641 
3642 	fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3643 	fdir_info->guarant_spc = 0;
3644 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3645 	fdir_info->max_flexpayload = 0;
3646 	fdir_info->flow_types_mask[0] = 0;
3647 	fdir_info->flex_payload_unit = 0;
3648 	fdir_info->max_flex_payload_segment_num = 0;
3649 	fdir_info->flex_payload_limit = 0;
3650 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3651 }
3652 
3653 /**
3654  * Deal with flow director operations.
3655  *
3656  * @param dev
3657  *   Pointer to Ethernet device.
3658  * @param filter_op
3659  *   Operation to perform.
3660  * @param arg
3661  *   Pointer to operation-specific structure.
3662  *
3663  * @return
3664  *   0 on success, a negative errno value otherwise and rte_errno is set.
3665  */
3666 static int
3667 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3668 		    void *arg)
3669 {
3670 	enum rte_fdir_mode fdir_mode =
3671 		dev->data->dev_conf.fdir_conf.mode;
3672 
3673 	if (filter_op == RTE_ETH_FILTER_NOP)
3674 		return 0;
3675 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3676 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3677 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3678 			dev->data->port_id, fdir_mode);
3679 		rte_errno = EINVAL;
3680 		return -rte_errno;
3681 	}
3682 	switch (filter_op) {
3683 	case RTE_ETH_FILTER_ADD:
3684 		return mlx5_fdir_filter_add(dev, arg);
3685 	case RTE_ETH_FILTER_UPDATE:
3686 		return mlx5_fdir_filter_update(dev, arg);
3687 	case RTE_ETH_FILTER_DELETE:
3688 		return mlx5_fdir_filter_delete(dev, arg);
3689 	case RTE_ETH_FILTER_FLUSH:
3690 		mlx5_fdir_filter_flush(dev);
3691 		break;
3692 	case RTE_ETH_FILTER_INFO:
3693 		mlx5_fdir_info_get(dev, arg);
3694 		break;
3695 	default:
3696 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3697 			dev->data->port_id, filter_op);
3698 		rte_errno = EINVAL;
3699 		return -rte_errno;
3700 	}
3701 	return 0;
3702 }
3703 
3704 /**
3705  * Manage filter operations.
3706  *
3707  * @param dev
3708  *   Pointer to Ethernet device structure.
3709  * @param filter_type
3710  *   Filter type.
3711  * @param filter_op
3712  *   Operation to perform.
3713  * @param arg
3714  *   Pointer to operation-specific structure.
3715  *
3716  * @return
3717  *   0 on success, a negative errno value otherwise and rte_errno is set.
3718  */
3719 int
3720 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3721 		     enum rte_filter_type filter_type,
3722 		     enum rte_filter_op filter_op,
3723 		     void *arg)
3724 {
3725 	switch (filter_type) {
3726 	case RTE_ETH_FILTER_GENERIC:
3727 		if (filter_op != RTE_ETH_FILTER_GET) {
3728 			rte_errno = EINVAL;
3729 			return -rte_errno;
3730 		}
3731 		*(const void **)arg = &mlx5_flow_ops;
3732 		return 0;
3733 	case RTE_ETH_FILTER_FDIR:
3734 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3735 	default:
3736 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3737 			dev->data->port_id, filter_type);
3738 		rte_errno = ENOTSUP;
3739 		return -rte_errno;
3740 	}
3741 	return 0;
3742 }
3743 
3744 /**
3745  * Detect number of Verbs flow priorities supported.
3746  *
3747  * @param dev
3748  *   Pointer to Ethernet device.
3749  *
3750  * @return
3751  *   number of supported Verbs flow priority.
3752  */
3753 unsigned int
3754 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3755 {
3756 	struct priv *priv = dev->data->dev_private;
3757 	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3758 	struct {
3759 		struct ibv_flow_attr attr;
3760 		struct ibv_flow_spec_eth eth;
3761 		struct ibv_flow_spec_action_drop drop;
3762 	} flow_attr = {
3763 		.attr = {
3764 			.num_of_specs = 2,
3765 		},
3766 		.eth = {
3767 			.type = IBV_FLOW_SPEC_ETH,
3768 			.size = sizeof(struct ibv_flow_spec_eth),
3769 		},
3770 		.drop = {
3771 			.size = sizeof(struct ibv_flow_spec_action_drop),
3772 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3773 		},
3774 	};
3775 	struct ibv_flow *flow;
3776 
3777 	do {
3778 		flow_attr.attr.priority = verb_priorities - 1;
3779 		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3780 					      &flow_attr.attr);
3781 		if (flow) {
3782 			claim_zero(mlx5_glue->destroy_flow(flow));
3783 			/* Try more priorities. */
3784 			verb_priorities *= 2;
3785 		} else {
3786 			/* Failed, restore last right number. */
3787 			verb_priorities /= 2;
3788 			break;
3789 		}
3790 	} while (1);
3791 	DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3792 		" user flow priorities: %d",
3793 		dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3794 	return verb_priorities;
3795 }
3796