xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision b7a7c97a40cf8c513ca4526c4734d7401907692c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36 
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41 
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44 	int dummy;
45 };
46 #endif
47 
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51 
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 	struct rte_eth_dev *dev; /** Ethernet device. */
55 	struct mlx5_flow_parse *parser; /** Parser context. */
56 	struct rte_flow_error *error; /** Error context. */
57 };
58 
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 		     const void *default_mask,
62 		     struct mlx5_flow_data *data);
63 
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 		      const void *default_mask,
67 		      struct mlx5_flow_data *data);
68 
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 		      const void *default_mask,
72 		      struct mlx5_flow_data *data);
73 
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 		      const void *default_mask,
77 		      struct mlx5_flow_data *data);
78 
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 		     const void *default_mask,
82 		     struct mlx5_flow_data *data);
83 
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 		     const void *default_mask,
87 		     struct mlx5_flow_data *data);
88 
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 		       const void *default_mask,
92 		       struct mlx5_flow_data *data);
93 
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 			   const void *default_mask,
97 			   struct mlx5_flow_data *data);
98 
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 		     const void *default_mask,
102 		     struct mlx5_flow_data *data);
103 
104 struct mlx5_flow_parse;
105 
106 static void
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
108 		      unsigned int size);
109 
110 static int
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
112 
113 static int
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
115 
116 /* Hash RX queue types. */
117 enum hash_rxq_type {
118 	HASH_RXQ_TCPV4,
119 	HASH_RXQ_UDPV4,
120 	HASH_RXQ_IPV4,
121 	HASH_RXQ_TCPV6,
122 	HASH_RXQ_UDPV6,
123 	HASH_RXQ_IPV6,
124 	HASH_RXQ_ETH,
125 	HASH_RXQ_TUNNEL,
126 };
127 
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130 	uint64_t hash_fields; /* Fields that participate in the hash. */
131 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132 	unsigned int flow_priority; /* Flow priority to use. */
133 	unsigned int ip_version; /* Internet protocol. */
134 };
135 
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
138 	[HASH_RXQ_TCPV4] = {
139 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140 				IBV_RX_HASH_DST_IPV4 |
141 				IBV_RX_HASH_SRC_PORT_TCP |
142 				IBV_RX_HASH_DST_PORT_TCP),
143 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
144 		.flow_priority = 0,
145 		.ip_version = MLX5_IPV4,
146 	},
147 	[HASH_RXQ_UDPV4] = {
148 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149 				IBV_RX_HASH_DST_IPV4 |
150 				IBV_RX_HASH_SRC_PORT_UDP |
151 				IBV_RX_HASH_DST_PORT_UDP),
152 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
153 		.flow_priority = 0,
154 		.ip_version = MLX5_IPV4,
155 	},
156 	[HASH_RXQ_IPV4] = {
157 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158 				IBV_RX_HASH_DST_IPV4),
159 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
160 				ETH_RSS_FRAG_IPV4),
161 		.flow_priority = 1,
162 		.ip_version = MLX5_IPV4,
163 	},
164 	[HASH_RXQ_TCPV6] = {
165 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166 				IBV_RX_HASH_DST_IPV6 |
167 				IBV_RX_HASH_SRC_PORT_TCP |
168 				IBV_RX_HASH_DST_PORT_TCP),
169 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
170 		.flow_priority = 0,
171 		.ip_version = MLX5_IPV6,
172 	},
173 	[HASH_RXQ_UDPV6] = {
174 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175 				IBV_RX_HASH_DST_IPV6 |
176 				IBV_RX_HASH_SRC_PORT_UDP |
177 				IBV_RX_HASH_DST_PORT_UDP),
178 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
179 		.flow_priority = 0,
180 		.ip_version = MLX5_IPV6,
181 	},
182 	[HASH_RXQ_IPV6] = {
183 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184 				IBV_RX_HASH_DST_IPV6),
185 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
186 				ETH_RSS_FRAG_IPV6),
187 		.flow_priority = 1,
188 		.ip_version = MLX5_IPV6,
189 	},
190 	[HASH_RXQ_ETH] = {
191 		.hash_fields = 0,
192 		.dpdk_rss_hf = 0,
193 		.flow_priority = 2,
194 	},
195 };
196 
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
199 
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202 	uint64_t hits; /**< Number of packets matched by the rule. */
203 	uint64_t bytes; /**< Number of bytes matched by the rule. */
204 };
205 
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209 	struct ibv_qp *qp; /**< Verbs queue pair. */
210 	struct ibv_wq *wq; /**< Verbs work queue. */
211 	struct ibv_cq *cq; /**< Verbs completion queue. */
212 };
213 
214 /* Flows structures. */
215 struct mlx5_flow {
216 	uint64_t hash_fields; /**< Fields that participate in the hash. */
217 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
219 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
220 };
221 
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
226 };
227 
228 struct rte_flow {
229 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230 	uint32_t mark:1; /**< Set if the flow is marked. */
231 	uint32_t drop:1; /**< Drop queue. */
232 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233 	uint16_t (*queues)[]; /**< Queues indexes to use. */
234 	uint8_t rss_key[40]; /**< copy of the RSS key. */
235 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239 	/**< Flow with Rx queue. */
240 };
241 
242 /** Static initializer for items. */
243 #define ITEMS(...) \
244 	(const enum rte_flow_item_type []){ \
245 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
246 	}
247 
248 #define IS_TUNNEL(type) ( \
249 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251 	(type) == RTE_FLOW_ITEM_TYPE_GRE)
252 
253 const uint32_t flow_ptype[] = {
254 	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256 	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
257 };
258 
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
260 
261 const uint32_t ptype_ext[] = {
262 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
263 					      RTE_PTYPE_L4_UDP,
264 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]	= RTE_PTYPE_TUNNEL_VXLAN_GPE |
265 						  RTE_PTYPE_L4_UDP,
266 	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
267 };
268 
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271 	/** List of possible actions for these items. */
272 	const enum rte_flow_action_type *const actions;
273 	/** Bit-masks corresponding to the possibilities for the item. */
274 	const void *mask;
275 	/**
276 	 * Default bit-masks to use when item->mask is not provided. When
277 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
278 	 * used instead.
279 	 */
280 	const void *default_mask;
281 	/** Bit-masks size in bytes. */
282 	const unsigned int mask_sz;
283 	/**
284 	 * Conversion function from rte_flow to NIC specific flow.
285 	 *
286 	 * @param item
287 	 *   rte_flow item to convert.
288 	 * @param default_mask
289 	 *   Default bit-masks to use when item->mask is not provided.
290 	 * @param data
291 	 *   Internal structure to store the conversion.
292 	 *
293 	 * @return
294 	 *   0 on success, a negative errno value otherwise and rte_errno is
295 	 *   set.
296 	 */
297 	int (*convert)(const struct rte_flow_item *item,
298 		       const void *default_mask,
299 		       struct mlx5_flow_data *data);
300 	/** Size in bytes of the destination structure. */
301 	const unsigned int dst_sz;
302 	/** List of possible following items.  */
303 	const enum rte_flow_item_type *const items;
304 };
305 
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308 	RTE_FLOW_ACTION_TYPE_DROP,
309 	RTE_FLOW_ACTION_TYPE_QUEUE,
310 	RTE_FLOW_ACTION_TYPE_MARK,
311 	RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313 	RTE_FLOW_ACTION_TYPE_COUNT,
314 #endif
315 	RTE_FLOW_ACTION_TYPE_END,
316 };
317 
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320 	[RTE_FLOW_ITEM_TYPE_END] = {
321 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322 			       RTE_FLOW_ITEM_TYPE_VXLAN,
323 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324 			       RTE_FLOW_ITEM_TYPE_GRE),
325 	},
326 	[RTE_FLOW_ITEM_TYPE_ETH] = {
327 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328 			       RTE_FLOW_ITEM_TYPE_IPV4,
329 			       RTE_FLOW_ITEM_TYPE_IPV6),
330 		.actions = valid_actions,
331 		.mask = &(const struct rte_flow_item_eth){
332 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
334 			.type = -1,
335 		},
336 		.default_mask = &rte_flow_item_eth_mask,
337 		.mask_sz = sizeof(struct rte_flow_item_eth),
338 		.convert = mlx5_flow_create_eth,
339 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
340 	},
341 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
342 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343 			       RTE_FLOW_ITEM_TYPE_IPV6),
344 		.actions = valid_actions,
345 		.mask = &(const struct rte_flow_item_vlan){
346 			.tci = -1,
347 			.inner_type = -1,
348 		},
349 		.default_mask = &rte_flow_item_vlan_mask,
350 		.mask_sz = sizeof(struct rte_flow_item_vlan),
351 		.convert = mlx5_flow_create_vlan,
352 		.dst_sz = 0,
353 	},
354 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
355 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356 			       RTE_FLOW_ITEM_TYPE_TCP,
357 			       RTE_FLOW_ITEM_TYPE_GRE),
358 		.actions = valid_actions,
359 		.mask = &(const struct rte_flow_item_ipv4){
360 			.hdr = {
361 				.src_addr = -1,
362 				.dst_addr = -1,
363 				.type_of_service = -1,
364 				.next_proto_id = -1,
365 			},
366 		},
367 		.default_mask = &rte_flow_item_ipv4_mask,
368 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
369 		.convert = mlx5_flow_create_ipv4,
370 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
371 	},
372 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
373 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374 			       RTE_FLOW_ITEM_TYPE_TCP,
375 			       RTE_FLOW_ITEM_TYPE_GRE),
376 		.actions = valid_actions,
377 		.mask = &(const struct rte_flow_item_ipv6){
378 			.hdr = {
379 				.src_addr = {
380 					0xff, 0xff, 0xff, 0xff,
381 					0xff, 0xff, 0xff, 0xff,
382 					0xff, 0xff, 0xff, 0xff,
383 					0xff, 0xff, 0xff, 0xff,
384 				},
385 				.dst_addr = {
386 					0xff, 0xff, 0xff, 0xff,
387 					0xff, 0xff, 0xff, 0xff,
388 					0xff, 0xff, 0xff, 0xff,
389 					0xff, 0xff, 0xff, 0xff,
390 				},
391 				.vtc_flow = -1,
392 				.proto = -1,
393 				.hop_limits = -1,
394 			},
395 		},
396 		.default_mask = &rte_flow_item_ipv6_mask,
397 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
398 		.convert = mlx5_flow_create_ipv6,
399 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
400 	},
401 	[RTE_FLOW_ITEM_TYPE_UDP] = {
402 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403 			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404 		.actions = valid_actions,
405 		.mask = &(const struct rte_flow_item_udp){
406 			.hdr = {
407 				.src_port = -1,
408 				.dst_port = -1,
409 			},
410 		},
411 		.default_mask = &rte_flow_item_udp_mask,
412 		.mask_sz = sizeof(struct rte_flow_item_udp),
413 		.convert = mlx5_flow_create_udp,
414 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415 	},
416 	[RTE_FLOW_ITEM_TYPE_TCP] = {
417 		.actions = valid_actions,
418 		.mask = &(const struct rte_flow_item_tcp){
419 			.hdr = {
420 				.src_port = -1,
421 				.dst_port = -1,
422 			},
423 		},
424 		.default_mask = &rte_flow_item_tcp_mask,
425 		.mask_sz = sizeof(struct rte_flow_item_tcp),
426 		.convert = mlx5_flow_create_tcp,
427 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428 	},
429 	[RTE_FLOW_ITEM_TYPE_GRE] = {
430 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431 			       RTE_FLOW_ITEM_TYPE_IPV4,
432 			       RTE_FLOW_ITEM_TYPE_IPV6),
433 		.actions = valid_actions,
434 		.mask = &(const struct rte_flow_item_gre){
435 			.protocol = -1,
436 		},
437 		.default_mask = &rte_flow_item_gre_mask,
438 		.mask_sz = sizeof(struct rte_flow_item_gre),
439 		.convert = mlx5_flow_create_gre,
440 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
441 	},
442 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
443 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444 			       RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445 			       RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446 		.actions = valid_actions,
447 		.mask = &(const struct rte_flow_item_vxlan){
448 			.vni = "\xff\xff\xff",
449 		},
450 		.default_mask = &rte_flow_item_vxlan_mask,
451 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
452 		.convert = mlx5_flow_create_vxlan,
453 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
454 	},
455 	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457 			       RTE_FLOW_ITEM_TYPE_IPV4,
458 			       RTE_FLOW_ITEM_TYPE_IPV6),
459 		.actions = valid_actions,
460 		.mask = &(const struct rte_flow_item_vxlan_gpe){
461 			.vni = "\xff\xff\xff",
462 		},
463 		.default_mask = &rte_flow_item_vxlan_gpe_mask,
464 		.mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465 		.convert = mlx5_flow_create_vxlan_gpe,
466 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
467 	},
468 };
469 
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472 	uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
473 	uint32_t create:1;
474 	/**< Whether resources should remain after a validate. */
475 	uint32_t drop:1; /**< Target is a drop queue. */
476 	uint32_t mark:1; /**< Mark is present in the flow. */
477 	uint32_t count:1; /**< Count is present in the flow. */
478 	uint32_t mark_id; /**< Mark identifier. */
479 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481 	uint8_t rss_key[40]; /**< copy of the RSS key. */
482 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
483 	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
486 	struct {
487 		struct ibv_flow_attr *ibv_attr;
488 		/**< Pointer to Verbs attributes. */
489 		unsigned int offset;
490 		/**< Current position or total size of the attribute. */
491 		uint64_t hash_fields; /**< Verbs hash fields. */
492 	} queue[RTE_DIM(hash_rxq_init)];
493 };
494 
495 static const struct rte_flow_ops mlx5_flow_ops = {
496 	.validate = mlx5_flow_validate,
497 	.create = mlx5_flow_create,
498 	.destroy = mlx5_flow_destroy,
499 	.flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501 	.query = mlx5_flow_query,
502 #else
503 	.query = NULL,
504 #endif
505 	.isolate = mlx5_flow_isolate,
506 };
507 
508 /* Convert FDIR request to Generic flow. */
509 struct mlx5_fdir {
510 	struct rte_flow_attr attr;
511 	struct rte_flow_action actions[2];
512 	struct rte_flow_item items[4];
513 	struct rte_flow_item_eth l2;
514 	struct rte_flow_item_eth l2_mask;
515 	union {
516 		struct rte_flow_item_ipv4 ipv4;
517 		struct rte_flow_item_ipv6 ipv6;
518 	} l3;
519 	union {
520 		struct rte_flow_item_ipv4 ipv4;
521 		struct rte_flow_item_ipv6 ipv6;
522 	} l3_mask;
523 	union {
524 		struct rte_flow_item_udp udp;
525 		struct rte_flow_item_tcp tcp;
526 	} l4;
527 	union {
528 		struct rte_flow_item_udp udp;
529 		struct rte_flow_item_tcp tcp;
530 	} l4_mask;
531 	struct rte_flow_action_queue queue;
532 };
533 
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536 	enum ibv_flow_spec_type type;
537 	uint16_t size;
538 };
539 
540 /**
541  * Check item is fully supported by the NIC matching capability.
542  *
543  * @param item[in]
544  *   Item specification.
545  * @param mask[in]
546  *   Bit-masks covering supported fields to compare with spec, last and mask in
547  *   \item.
548  * @param size
549  *   Bit-Mask size in bytes.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556 			const uint8_t *mask, unsigned int size)
557 {
558 	unsigned int i;
559 	const uint8_t *spec = item->spec;
560 	const uint8_t *last = item->last;
561 	const uint8_t *m = item->mask ? item->mask : mask;
562 
563 	if (!spec && (item->mask || last))
564 		goto error;
565 	if (!spec)
566 		return 0;
567 	/*
568 	 * Single-pass check to make sure that:
569 	 * - item->mask is supported, no bits are set outside mask.
570 	 * - Both masked item->spec and item->last are equal (no range
571 	 *   supported).
572 	 */
573 	for (i = 0; i < size; i++) {
574 		if (!m[i])
575 			continue;
576 		if ((m[i] | mask[i]) != mask[i])
577 			goto error;
578 		if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
579 			goto error;
580 	}
581 	return 0;
582 error:
583 	rte_errno = ENOTSUP;
584 	return -rte_errno;
585 }
586 
587 /**
588  * Extract attribute to the parser.
589  *
590  * @param[in] attr
591  *   Flow rule attributes.
592  * @param[out] error
593  *   Perform verbose error reporting if not NULL.
594  *
595  * @return
596  *   0 on success, a negative errno value otherwise and rte_errno is set.
597  */
598 static int
599 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
600 			     struct rte_flow_error *error)
601 {
602 	if (attr->group) {
603 		rte_flow_error_set(error, ENOTSUP,
604 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
605 				   NULL,
606 				   "groups are not supported");
607 		return -rte_errno;
608 	}
609 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
610 		rte_flow_error_set(error, ENOTSUP,
611 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
612 				   NULL,
613 				   "priorities are not supported");
614 		return -rte_errno;
615 	}
616 	if (attr->egress) {
617 		rte_flow_error_set(error, ENOTSUP,
618 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
619 				   NULL,
620 				   "egress is not supported");
621 		return -rte_errno;
622 	}
623 	if (attr->transfer) {
624 		rte_flow_error_set(error, ENOTSUP,
625 				   RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
626 				   NULL,
627 				   "transfer is not supported");
628 		return -rte_errno;
629 	}
630 	if (!attr->ingress) {
631 		rte_flow_error_set(error, ENOTSUP,
632 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
633 				   NULL,
634 				   "only ingress is supported");
635 		return -rte_errno;
636 	}
637 	return 0;
638 }
639 
640 /**
641  * Extract actions request to the parser.
642  *
643  * @param dev
644  *   Pointer to Ethernet device.
645  * @param[in] actions
646  *   Associated actions (list terminated by the END action).
647  * @param[out] error
648  *   Perform verbose error reporting if not NULL.
649  * @param[in, out] parser
650  *   Internal parser structure.
651  *
652  * @return
653  *   0 on success, a negative errno value otherwise and rte_errno is set.
654  */
655 static int
656 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
657 			  const struct rte_flow_action actions[],
658 			  struct rte_flow_error *error,
659 			  struct mlx5_flow_parse *parser)
660 {
661 	enum { FATE = 1, MARK = 2, COUNT = 4, };
662 	uint32_t overlap = 0;
663 	struct priv *priv = dev->data->dev_private;
664 
665 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
666 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
667 			continue;
668 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
669 			if (overlap & FATE)
670 				goto exit_action_overlap;
671 			overlap |= FATE;
672 			parser->drop = 1;
673 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
674 			const struct rte_flow_action_queue *queue =
675 				(const struct rte_flow_action_queue *)
676 				actions->conf;
677 
678 			if (overlap & FATE)
679 				goto exit_action_overlap;
680 			overlap |= FATE;
681 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
682 				goto exit_action_not_supported;
683 			parser->queues[0] = queue->index;
684 			parser->rss_conf = (struct rte_flow_action_rss){
685 				.queue_num = 1,
686 				.queue = parser->queues,
687 			};
688 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
689 			const struct rte_flow_action_rss *rss =
690 				(const struct rte_flow_action_rss *)
691 				actions->conf;
692 			const uint8_t *rss_key;
693 			uint32_t rss_key_len;
694 			uint16_t n;
695 
696 			if (overlap & FATE)
697 				goto exit_action_overlap;
698 			overlap |= FATE;
699 			if (rss->func &&
700 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
701 				rte_flow_error_set(error, EINVAL,
702 						   RTE_FLOW_ERROR_TYPE_ACTION,
703 						   actions,
704 						   "the only supported RSS hash"
705 						   " function is Toeplitz");
706 				return -rte_errno;
707 			}
708 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
709 			if (parser->rss_conf.level > 1) {
710 				rte_flow_error_set(error, EINVAL,
711 						   RTE_FLOW_ERROR_TYPE_ACTION,
712 						   actions,
713 						   "a nonzero RSS encapsulation"
714 						   " level is not supported");
715 				return -rte_errno;
716 			}
717 #endif
718 			if (parser->rss_conf.level > 2) {
719 				rte_flow_error_set(error, EINVAL,
720 						   RTE_FLOW_ERROR_TYPE_ACTION,
721 						   actions,
722 						   "RSS encapsulation level"
723 						   " > 1 is not supported");
724 				return -rte_errno;
725 			}
726 			if (rss->types & MLX5_RSS_HF_MASK) {
727 				rte_flow_error_set(error, EINVAL,
728 						   RTE_FLOW_ERROR_TYPE_ACTION,
729 						   actions,
730 						   "unsupported RSS type"
731 						   " requested");
732 				return -rte_errno;
733 			}
734 			if (rss->key_len) {
735 				rss_key_len = rss->key_len;
736 				rss_key = rss->key;
737 			} else {
738 				rss_key_len = rss_hash_default_key_len;
739 				rss_key = rss_hash_default_key;
740 			}
741 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
742 				rte_flow_error_set(error, EINVAL,
743 						   RTE_FLOW_ERROR_TYPE_ACTION,
744 						   actions,
745 						   "RSS hash key must be"
746 						   " exactly 40 bytes long");
747 				return -rte_errno;
748 			}
749 			if (!rss->queue_num) {
750 				rte_flow_error_set(error, EINVAL,
751 						   RTE_FLOW_ERROR_TYPE_ACTION,
752 						   actions,
753 						   "no valid queues");
754 				return -rte_errno;
755 			}
756 			if (rss->queue_num > RTE_DIM(parser->queues)) {
757 				rte_flow_error_set(error, EINVAL,
758 						   RTE_FLOW_ERROR_TYPE_ACTION,
759 						   actions,
760 						   "too many queues for RSS"
761 						   " context");
762 				return -rte_errno;
763 			}
764 			for (n = 0; n < rss->queue_num; ++n) {
765 				if (rss->queue[n] >= priv->rxqs_n) {
766 					rte_flow_error_set(error, EINVAL,
767 						   RTE_FLOW_ERROR_TYPE_ACTION,
768 						   actions,
769 						   "queue id > number of"
770 						   " queues");
771 					return -rte_errno;
772 				}
773 			}
774 			parser->rss_conf = (struct rte_flow_action_rss){
775 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
776 				.level = rss->level,
777 				.types = rss->types,
778 				.key_len = rss_key_len,
779 				.queue_num = rss->queue_num,
780 				.key = memcpy(parser->rss_key, rss_key,
781 					      sizeof(*rss_key) * rss_key_len),
782 				.queue = memcpy(parser->queues, rss->queue,
783 						sizeof(*rss->queue) *
784 						rss->queue_num),
785 			};
786 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
787 			const struct rte_flow_action_mark *mark =
788 				(const struct rte_flow_action_mark *)
789 				actions->conf;
790 
791 			if (overlap & MARK)
792 				goto exit_action_overlap;
793 			overlap |= MARK;
794 			if (!mark) {
795 				rte_flow_error_set(error, EINVAL,
796 						   RTE_FLOW_ERROR_TYPE_ACTION,
797 						   actions,
798 						   "mark must be defined");
799 				return -rte_errno;
800 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
801 				rte_flow_error_set(error, ENOTSUP,
802 						   RTE_FLOW_ERROR_TYPE_ACTION,
803 						   actions,
804 						   "mark must be between 0"
805 						   " and 16777199");
806 				return -rte_errno;
807 			}
808 			parser->mark = 1;
809 			parser->mark_id = mark->id;
810 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
811 			if (overlap & MARK)
812 				goto exit_action_overlap;
813 			overlap |= MARK;
814 			parser->mark = 1;
815 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
816 			   priv->config.flow_counter_en) {
817 			if (overlap & COUNT)
818 				goto exit_action_overlap;
819 			overlap |= COUNT;
820 			parser->count = 1;
821 		} else {
822 			goto exit_action_not_supported;
823 		}
824 	}
825 	/* When fate is unknown, drop traffic. */
826 	if (!(overlap & FATE))
827 		parser->drop = 1;
828 	if (parser->drop && parser->mark)
829 		parser->mark = 0;
830 	if (!parser->rss_conf.queue_num && !parser->drop) {
831 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
832 				   NULL, "no valid action");
833 		return -rte_errno;
834 	}
835 	return 0;
836 exit_action_not_supported:
837 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
838 			   actions, "action not supported");
839 	return -rte_errno;
840 exit_action_overlap:
841 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
842 			   actions, "overlapping actions are not supported");
843 	return -rte_errno;
844 }
845 
846 /**
847  * Validate items.
848  *
849  * @param[in] items
850  *   Pattern specification (list terminated by the END pattern item).
851  * @param[out] error
852  *   Perform verbose error reporting if not NULL.
853  * @param[in, out] parser
854  *   Internal parser structure.
855  *
856  * @return
857  *   0 on success, a negative errno value otherwise and rte_errno is set.
858  */
859 static int
860 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
861 				 const struct rte_flow_item items[],
862 				 struct rte_flow_error *error,
863 				 struct mlx5_flow_parse *parser)
864 {
865 	struct priv *priv = dev->data->dev_private;
866 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
867 	unsigned int i;
868 	int ret = 0;
869 
870 	/* Initialise the offsets to start after verbs attribute. */
871 	for (i = 0; i != hash_rxq_init_n; ++i)
872 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
873 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
874 		const struct mlx5_flow_items *token = NULL;
875 		unsigned int n;
876 
877 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
878 			continue;
879 		for (i = 0;
880 		     cur_item->items &&
881 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
882 		     ++i) {
883 			if (cur_item->items[i] == items->type) {
884 				token = &mlx5_flow_items[items->type];
885 				break;
886 			}
887 		}
888 		if (!token) {
889 			ret = -ENOTSUP;
890 			goto exit_item_not_supported;
891 		}
892 		cur_item = token;
893 		ret = mlx5_flow_item_validate(items,
894 					      (const uint8_t *)cur_item->mask,
895 					      cur_item->mask_sz);
896 		if (ret)
897 			goto exit_item_not_supported;
898 		if (IS_TUNNEL(items->type)) {
899 			if (parser->tunnel) {
900 				rte_flow_error_set(error, ENOTSUP,
901 						   RTE_FLOW_ERROR_TYPE_ITEM,
902 						   items,
903 						   "Cannot recognize multiple"
904 						   " tunnel encapsulations.");
905 				return -rte_errno;
906 			}
907 			if (!priv->config.tunnel_en &&
908 			    parser->rss_conf.level > 1) {
909 				rte_flow_error_set(error, ENOTSUP,
910 					RTE_FLOW_ERROR_TYPE_ITEM,
911 					items,
912 					"RSS on tunnel is not supported");
913 				return -rte_errno;
914 			}
915 			parser->inner = IBV_FLOW_SPEC_INNER;
916 			parser->tunnel = flow_ptype[items->type];
917 		}
918 		if (parser->drop) {
919 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
920 		} else {
921 			for (n = 0; n != hash_rxq_init_n; ++n)
922 				parser->queue[n].offset += cur_item->dst_sz;
923 		}
924 	}
925 	if (parser->drop) {
926 		parser->queue[HASH_RXQ_ETH].offset +=
927 			sizeof(struct ibv_flow_spec_action_drop);
928 	}
929 	if (parser->mark) {
930 		for (i = 0; i != hash_rxq_init_n; ++i)
931 			parser->queue[i].offset +=
932 				sizeof(struct ibv_flow_spec_action_tag);
933 	}
934 	if (parser->count) {
935 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
936 
937 		for (i = 0; i != hash_rxq_init_n; ++i)
938 			parser->queue[i].offset += size;
939 	}
940 	return 0;
941 exit_item_not_supported:
942 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
943 				  items, "item not supported");
944 }
945 
946 /**
947  * Allocate memory space to store verbs flow attributes.
948  *
949  * @param[in] size
950  *   Amount of byte to allocate.
951  * @param[out] error
952  *   Perform verbose error reporting if not NULL.
953  *
954  * @return
955  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
956  */
957 static struct ibv_flow_attr *
958 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
959 {
960 	struct ibv_flow_attr *ibv_attr;
961 
962 	ibv_attr = rte_calloc(__func__, 1, size, 0);
963 	if (!ibv_attr) {
964 		rte_flow_error_set(error, ENOMEM,
965 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
966 				   NULL,
967 				   "cannot allocate verbs spec attributes");
968 		return NULL;
969 	}
970 	return ibv_attr;
971 }
972 
973 /**
974  * Make inner packet matching with an higher priority from the non Inner
975  * matching.
976  *
977  * @param dev
978  *   Pointer to Ethernet device.
979  * @param[in, out] parser
980  *   Internal parser structure.
981  * @param attr
982  *   User flow attribute.
983  */
984 static void
985 mlx5_flow_update_priority(struct rte_eth_dev *dev,
986 			  struct mlx5_flow_parse *parser,
987 			  const struct rte_flow_attr *attr)
988 {
989 	struct priv *priv = dev->data->dev_private;
990 	unsigned int i;
991 	uint16_t priority;
992 
993 	/*			8 priorities	>= 16 priorities
994 	 * Control flow:	4-7		8-15
995 	 * User normal flow:	1-3		4-7
996 	 * User tunnel flow:	0-2		0-3
997 	 */
998 	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
999 	if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1000 		priority /= 2;
1001 	/*
1002 	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1003 	 * priorities, lower 4 otherwise.
1004 	 */
1005 	if (!parser->inner) {
1006 		if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1007 			priority += 1;
1008 		else
1009 			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1010 	}
1011 	if (parser->drop) {
1012 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1013 				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1014 		return;
1015 	}
1016 	for (i = 0; i != hash_rxq_init_n; ++i) {
1017 		if (!parser->queue[i].ibv_attr)
1018 			continue;
1019 		parser->queue[i].ibv_attr->priority = priority +
1020 				hash_rxq_init[i].flow_priority;
1021 	}
1022 }
1023 
1024 /**
1025  * Finalise verbs flow attributes.
1026  *
1027  * @param[in, out] parser
1028  *   Internal parser structure.
1029  */
1030 static void
1031 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1032 {
1033 	unsigned int i;
1034 	uint32_t inner = parser->inner;
1035 
1036 	/* Don't create extra flows for outer RSS. */
1037 	if (parser->tunnel && parser->rss_conf.level < 2)
1038 		return;
1039 	/*
1040 	 * Fill missing layers in verbs specifications, or compute the correct
1041 	 * offset to allocate the memory space for the attributes and
1042 	 * specifications.
1043 	 */
1044 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1045 		union {
1046 			struct ibv_flow_spec_ipv4_ext ipv4;
1047 			struct ibv_flow_spec_ipv6 ipv6;
1048 			struct ibv_flow_spec_tcp_udp udp_tcp;
1049 			struct ibv_flow_spec_eth eth;
1050 		} specs;
1051 		void *dst;
1052 		uint16_t size;
1053 
1054 		if (i == parser->layer)
1055 			continue;
1056 		if (parser->layer == HASH_RXQ_ETH ||
1057 		    parser->layer == HASH_RXQ_TUNNEL) {
1058 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1059 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1060 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1061 					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1062 					.size = size,
1063 				};
1064 			} else {
1065 				size = sizeof(struct ibv_flow_spec_ipv6);
1066 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1067 					.type = inner | IBV_FLOW_SPEC_IPV6,
1068 					.size = size,
1069 				};
1070 			}
1071 			if (parser->queue[i].ibv_attr) {
1072 				dst = (void *)((uintptr_t)
1073 					       parser->queue[i].ibv_attr +
1074 					       parser->queue[i].offset);
1075 				memcpy(dst, &specs, size);
1076 				++parser->queue[i].ibv_attr->num_of_specs;
1077 			}
1078 			parser->queue[i].offset += size;
1079 		}
1080 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1081 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1082 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1083 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1084 				.type = inner | ((i == HASH_RXQ_UDPV4 ||
1085 					  i == HASH_RXQ_UDPV6) ?
1086 					 IBV_FLOW_SPEC_UDP :
1087 					 IBV_FLOW_SPEC_TCP),
1088 				.size = size,
1089 			};
1090 			if (parser->queue[i].ibv_attr) {
1091 				dst = (void *)((uintptr_t)
1092 					       parser->queue[i].ibv_attr +
1093 					       parser->queue[i].offset);
1094 				memcpy(dst, &specs, size);
1095 				++parser->queue[i].ibv_attr->num_of_specs;
1096 			}
1097 			parser->queue[i].offset += size;
1098 		}
1099 	}
1100 }
1101 
1102 /**
1103  * Update flows according to pattern and RSS hash fields.
1104  *
1105  * @param[in, out] parser
1106  *   Internal parser structure.
1107  *
1108  * @return
1109  *   0 on success, a negative errno value otherwise and rte_errno is set.
1110  */
1111 static int
1112 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1113 {
1114 	unsigned int i;
1115 	enum hash_rxq_type start;
1116 	enum hash_rxq_type layer;
1117 	int outer = parser->tunnel && parser->rss_conf.level < 2;
1118 	uint64_t rss = parser->rss_conf.types;
1119 
1120 	/* Default to outer RSS. */
1121 	if (!parser->rss_conf.level)
1122 		parser->rss_conf.level = 1;
1123 	layer = outer ? parser->out_layer : parser->layer;
1124 	if (layer == HASH_RXQ_TUNNEL)
1125 		layer = HASH_RXQ_ETH;
1126 	if (outer) {
1127 		/* Only one hash type for outer RSS. */
1128 		if (rss && layer == HASH_RXQ_ETH) {
1129 			start = HASH_RXQ_TCPV4;
1130 		} else if (rss && layer != HASH_RXQ_ETH &&
1131 			   !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1132 			/* If RSS not match L4 pattern, try L3 RSS. */
1133 			if (layer < HASH_RXQ_IPV4)
1134 				layer = HASH_RXQ_IPV4;
1135 			else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1136 				layer = HASH_RXQ_IPV6;
1137 			start = layer;
1138 		} else {
1139 			start = layer;
1140 		}
1141 		/* Scan first valid hash type. */
1142 		for (i = start; rss && i <= layer; ++i) {
1143 			if (!parser->queue[i].ibv_attr)
1144 				continue;
1145 			if (hash_rxq_init[i].dpdk_rss_hf & rss)
1146 				break;
1147 		}
1148 		if (rss && i <= layer)
1149 			parser->queue[layer].hash_fields =
1150 					hash_rxq_init[i].hash_fields;
1151 		/* Trim unused hash types. */
1152 		for (i = 0; i != hash_rxq_init_n; ++i) {
1153 			if (parser->queue[i].ibv_attr && i != layer) {
1154 				rte_free(parser->queue[i].ibv_attr);
1155 				parser->queue[i].ibv_attr = NULL;
1156 			}
1157 		}
1158 	} else {
1159 		/* Expand for inner or normal RSS. */
1160 		if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1161 			start = HASH_RXQ_TCPV4;
1162 		else if (rss && layer == HASH_RXQ_IPV6)
1163 			start = HASH_RXQ_TCPV6;
1164 		else
1165 			start = layer;
1166 		/* For L4 pattern, try L3 RSS if no L4 RSS. */
1167 		/* Trim unused hash types. */
1168 		for (i = 0; i != hash_rxq_init_n; ++i) {
1169 			if (!parser->queue[i].ibv_attr)
1170 				continue;
1171 			if (i < start || i > layer) {
1172 				rte_free(parser->queue[i].ibv_attr);
1173 				parser->queue[i].ibv_attr = NULL;
1174 				continue;
1175 			}
1176 			if (!rss)
1177 				continue;
1178 			if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1179 				parser->queue[i].hash_fields =
1180 						hash_rxq_init[i].hash_fields;
1181 			} else if (i != layer) {
1182 				/* Remove unused RSS expansion. */
1183 				rte_free(parser->queue[i].ibv_attr);
1184 				parser->queue[i].ibv_attr = NULL;
1185 			} else if (layer < HASH_RXQ_IPV4 &&
1186 				   (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1187 				    rss)) {
1188 				/* Allow IPv4 RSS on L4 pattern. */
1189 				parser->queue[i].hash_fields =
1190 					hash_rxq_init[HASH_RXQ_IPV4]
1191 						.hash_fields;
1192 			} else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1193 				   (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1194 				    rss)) {
1195 				/* Allow IPv4 RSS on L4 pattern. */
1196 				parser->queue[i].hash_fields =
1197 					hash_rxq_init[HASH_RXQ_IPV6]
1198 						.hash_fields;
1199 			}
1200 		}
1201 	}
1202 	return 0;
1203 }
1204 
1205 /**
1206  * Validate and convert a flow supported by the NIC.
1207  *
1208  * @param dev
1209  *   Pointer to Ethernet device.
1210  * @param[in] attr
1211  *   Flow rule attributes.
1212  * @param[in] pattern
1213  *   Pattern specification (list terminated by the END pattern item).
1214  * @param[in] actions
1215  *   Associated actions (list terminated by the END action).
1216  * @param[out] error
1217  *   Perform verbose error reporting if not NULL.
1218  * @param[in, out] parser
1219  *   Internal parser structure.
1220  *
1221  * @return
1222  *   0 on success, a negative errno value otherwise and rte_errno is set.
1223  */
1224 static int
1225 mlx5_flow_convert(struct rte_eth_dev *dev,
1226 		  const struct rte_flow_attr *attr,
1227 		  const struct rte_flow_item items[],
1228 		  const struct rte_flow_action actions[],
1229 		  struct rte_flow_error *error,
1230 		  struct mlx5_flow_parse *parser)
1231 {
1232 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1233 	unsigned int i;
1234 	int ret;
1235 
1236 	/* First step. Validate the attributes, items and actions. */
1237 	*parser = (struct mlx5_flow_parse){
1238 		.create = parser->create,
1239 		.layer = HASH_RXQ_ETH,
1240 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1241 	};
1242 	ret = mlx5_flow_convert_attributes(attr, error);
1243 	if (ret)
1244 		return ret;
1245 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1246 	if (ret)
1247 		return ret;
1248 	ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1249 	if (ret)
1250 		return ret;
1251 	mlx5_flow_convert_finalise(parser);
1252 	/*
1253 	 * Second step.
1254 	 * Allocate the memory space to store verbs specifications.
1255 	 */
1256 	if (parser->drop) {
1257 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1258 
1259 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1260 			mlx5_flow_convert_allocate(offset, error);
1261 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1262 			goto exit_enomem;
1263 		parser->queue[HASH_RXQ_ETH].offset =
1264 			sizeof(struct ibv_flow_attr);
1265 	} else {
1266 		for (i = 0; i != hash_rxq_init_n; ++i) {
1267 			unsigned int offset;
1268 
1269 			offset = parser->queue[i].offset;
1270 			parser->queue[i].ibv_attr =
1271 				mlx5_flow_convert_allocate(offset, error);
1272 			if (!parser->queue[i].ibv_attr)
1273 				goto exit_enomem;
1274 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1275 		}
1276 	}
1277 	/* Third step. Conversion parse, fill the specifications. */
1278 	parser->inner = 0;
1279 	parser->tunnel = 0;
1280 	parser->layer = HASH_RXQ_ETH;
1281 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1282 		struct mlx5_flow_data data = {
1283 			.dev = dev,
1284 			.parser = parser,
1285 			.error = error,
1286 		};
1287 
1288 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1289 			continue;
1290 		cur_item = &mlx5_flow_items[items->type];
1291 		ret = cur_item->convert(items,
1292 					(cur_item->default_mask ?
1293 					 cur_item->default_mask :
1294 					 cur_item->mask),
1295 					 &data);
1296 		if (ret)
1297 			goto exit_free;
1298 	}
1299 	if (!parser->drop) {
1300 		/* RSS check, remove unused hash types. */
1301 		ret = mlx5_flow_convert_rss(parser);
1302 		if (ret)
1303 			goto exit_free;
1304 		/* Complete missing specification. */
1305 		mlx5_flow_convert_finalise(parser);
1306 	}
1307 	mlx5_flow_update_priority(dev, parser, attr);
1308 	if (parser->mark)
1309 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1310 	if (parser->count && parser->create) {
1311 		mlx5_flow_create_count(dev, parser);
1312 		if (!parser->cs)
1313 			goto exit_count_error;
1314 	}
1315 exit_free:
1316 	/* Only verification is expected, all resources should be released. */
1317 	if (!parser->create) {
1318 		for (i = 0; i != hash_rxq_init_n; ++i) {
1319 			if (parser->queue[i].ibv_attr) {
1320 				rte_free(parser->queue[i].ibv_attr);
1321 				parser->queue[i].ibv_attr = NULL;
1322 			}
1323 		}
1324 	}
1325 	return ret;
1326 exit_enomem:
1327 	for (i = 0; i != hash_rxq_init_n; ++i) {
1328 		if (parser->queue[i].ibv_attr) {
1329 			rte_free(parser->queue[i].ibv_attr);
1330 			parser->queue[i].ibv_attr = NULL;
1331 		}
1332 	}
1333 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1334 			   NULL, "cannot allocate verbs spec attributes");
1335 	return -rte_errno;
1336 exit_count_error:
1337 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1338 			   NULL, "cannot create counter");
1339 	return -rte_errno;
1340 }
1341 
1342 /**
1343  * Copy the specification created into the flow.
1344  *
1345  * @param parser
1346  *   Internal parser structure.
1347  * @param src
1348  *   Create specification.
1349  * @param size
1350  *   Size in bytes of the specification to copy.
1351  */
1352 static void
1353 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1354 		      unsigned int size)
1355 {
1356 	unsigned int i;
1357 	void *dst;
1358 
1359 	for (i = 0; i != hash_rxq_init_n; ++i) {
1360 		if (!parser->queue[i].ibv_attr)
1361 			continue;
1362 		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1363 				parser->queue[i].offset);
1364 		memcpy(dst, src, size);
1365 		++parser->queue[i].ibv_attr->num_of_specs;
1366 		parser->queue[i].offset += size;
1367 	}
1368 }
1369 
1370 /**
1371  * Convert Ethernet item to Verbs specification.
1372  *
1373  * @param item[in]
1374  *   Item specification.
1375  * @param default_mask[in]
1376  *   Default bit-masks to use when item->mask is not provided.
1377  * @param data[in, out]
1378  *   User structure.
1379  *
1380  * @return
1381  *   0 on success, a negative errno value otherwise and rte_errno is set.
1382  */
1383 static int
1384 mlx5_flow_create_eth(const struct rte_flow_item *item,
1385 		     const void *default_mask,
1386 		     struct mlx5_flow_data *data)
1387 {
1388 	const struct rte_flow_item_eth *spec = item->spec;
1389 	const struct rte_flow_item_eth *mask = item->mask;
1390 	struct mlx5_flow_parse *parser = data->parser;
1391 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1392 	struct ibv_flow_spec_eth eth = {
1393 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1394 		.size = eth_size,
1395 	};
1396 
1397 	parser->layer = HASH_RXQ_ETH;
1398 	if (spec) {
1399 		unsigned int i;
1400 
1401 		if (!mask)
1402 			mask = default_mask;
1403 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1404 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1405 		eth.val.ether_type = spec->type;
1406 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1407 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1408 		eth.mask.ether_type = mask->type;
1409 		/* Remove unwanted bits from values. */
1410 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1411 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1412 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1413 		}
1414 		eth.val.ether_type &= eth.mask.ether_type;
1415 	}
1416 	mlx5_flow_create_copy(parser, &eth, eth_size);
1417 	return 0;
1418 }
1419 
1420 /**
1421  * Convert VLAN item to Verbs specification.
1422  *
1423  * @param item[in]
1424  *   Item specification.
1425  * @param default_mask[in]
1426  *   Default bit-masks to use when item->mask is not provided.
1427  * @param data[in, out]
1428  *   User structure.
1429  *
1430  * @return
1431  *   0 on success, a negative errno value otherwise and rte_errno is set.
1432  */
1433 static int
1434 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1435 		      const void *default_mask,
1436 		      struct mlx5_flow_data *data)
1437 {
1438 	const struct rte_flow_item_vlan *spec = item->spec;
1439 	const struct rte_flow_item_vlan *mask = item->mask;
1440 	struct mlx5_flow_parse *parser = data->parser;
1441 	struct ibv_flow_spec_eth *eth;
1442 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1443 	const char *msg = "VLAN cannot be empty";
1444 
1445 	if (spec) {
1446 		unsigned int i;
1447 		if (!mask)
1448 			mask = default_mask;
1449 
1450 		for (i = 0; i != hash_rxq_init_n; ++i) {
1451 			if (!parser->queue[i].ibv_attr)
1452 				continue;
1453 
1454 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1455 				       parser->queue[i].offset - eth_size);
1456 			eth->val.vlan_tag = spec->tci;
1457 			eth->mask.vlan_tag = mask->tci;
1458 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1459 			/*
1460 			 * From verbs perspective an empty VLAN is equivalent
1461 			 * to a packet without VLAN layer.
1462 			 */
1463 			if (!eth->mask.vlan_tag)
1464 				goto error;
1465 			/* Outer TPID cannot be matched. */
1466 			if (eth->mask.ether_type) {
1467 				msg = "VLAN TPID matching is not supported";
1468 				goto error;
1469 			}
1470 			eth->val.ether_type = spec->inner_type;
1471 			eth->mask.ether_type = mask->inner_type;
1472 			eth->val.ether_type &= eth->mask.ether_type;
1473 		}
1474 		return 0;
1475 	}
1476 error:
1477 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1478 				  item, msg);
1479 }
1480 
1481 /**
1482  * Convert IPv4 item to Verbs specification.
1483  *
1484  * @param item[in]
1485  *   Item specification.
1486  * @param default_mask[in]
1487  *   Default bit-masks to use when item->mask is not provided.
1488  * @param data[in, out]
1489  *   User structure.
1490  *
1491  * @return
1492  *   0 on success, a negative errno value otherwise and rte_errno is set.
1493  */
1494 static int
1495 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1496 		      const void *default_mask,
1497 		      struct mlx5_flow_data *data)
1498 {
1499 	struct priv *priv = data->dev->data->dev_private;
1500 	const struct rte_flow_item_ipv4 *spec = item->spec;
1501 	const struct rte_flow_item_ipv4 *mask = item->mask;
1502 	struct mlx5_flow_parse *parser = data->parser;
1503 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1504 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1505 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1506 		.size = ipv4_size,
1507 	};
1508 
1509 	if (parser->layer == HASH_RXQ_TUNNEL &&
1510 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1511 	    !priv->config.l3_vxlan_en)
1512 		return rte_flow_error_set(data->error, EINVAL,
1513 					  RTE_FLOW_ERROR_TYPE_ITEM,
1514 					  item,
1515 					  "L3 VXLAN not enabled by device"
1516 					  " parameter and/or not configured"
1517 					  " in firmware");
1518 	parser->layer = HASH_RXQ_IPV4;
1519 	if (spec) {
1520 		if (!mask)
1521 			mask = default_mask;
1522 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1523 			.src_ip = spec->hdr.src_addr,
1524 			.dst_ip = spec->hdr.dst_addr,
1525 			.proto = spec->hdr.next_proto_id,
1526 			.tos = spec->hdr.type_of_service,
1527 		};
1528 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1529 			.src_ip = mask->hdr.src_addr,
1530 			.dst_ip = mask->hdr.dst_addr,
1531 			.proto = mask->hdr.next_proto_id,
1532 			.tos = mask->hdr.type_of_service,
1533 		};
1534 		/* Remove unwanted bits from values. */
1535 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1536 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1537 		ipv4.val.proto &= ipv4.mask.proto;
1538 		ipv4.val.tos &= ipv4.mask.tos;
1539 	}
1540 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1541 	return 0;
1542 }
1543 
1544 /**
1545  * Convert IPv6 item to Verbs specification.
1546  *
1547  * @param item[in]
1548  *   Item specification.
1549  * @param default_mask[in]
1550  *   Default bit-masks to use when item->mask is not provided.
1551  * @param data[in, out]
1552  *   User structure.
1553  *
1554  * @return
1555  *   0 on success, a negative errno value otherwise and rte_errno is set.
1556  */
1557 static int
1558 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1559 		      const void *default_mask,
1560 		      struct mlx5_flow_data *data)
1561 {
1562 	struct priv *priv = data->dev->data->dev_private;
1563 	const struct rte_flow_item_ipv6 *spec = item->spec;
1564 	const struct rte_flow_item_ipv6 *mask = item->mask;
1565 	struct mlx5_flow_parse *parser = data->parser;
1566 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1567 	struct ibv_flow_spec_ipv6 ipv6 = {
1568 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1569 		.size = ipv6_size,
1570 	};
1571 
1572 	if (parser->layer == HASH_RXQ_TUNNEL &&
1573 	    parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1574 	    !priv->config.l3_vxlan_en)
1575 		return rte_flow_error_set(data->error, EINVAL,
1576 					  RTE_FLOW_ERROR_TYPE_ITEM,
1577 					  item,
1578 					  "L3 VXLAN not enabled by device"
1579 					  " parameter and/or not configured"
1580 					  " in firmware");
1581 	parser->layer = HASH_RXQ_IPV6;
1582 	if (spec) {
1583 		unsigned int i;
1584 		uint32_t vtc_flow_val;
1585 		uint32_t vtc_flow_mask;
1586 
1587 		if (!mask)
1588 			mask = default_mask;
1589 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1590 		       RTE_DIM(ipv6.val.src_ip));
1591 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1592 		       RTE_DIM(ipv6.val.dst_ip));
1593 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1594 		       RTE_DIM(ipv6.mask.src_ip));
1595 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1596 		       RTE_DIM(ipv6.mask.dst_ip));
1597 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1598 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1599 		ipv6.val.flow_label =
1600 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1601 					 IPV6_HDR_FL_SHIFT);
1602 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1603 					 IPV6_HDR_TC_SHIFT;
1604 		ipv6.val.next_hdr = spec->hdr.proto;
1605 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1606 		ipv6.mask.flow_label =
1607 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1608 					 IPV6_HDR_FL_SHIFT);
1609 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1610 					  IPV6_HDR_TC_SHIFT;
1611 		ipv6.mask.next_hdr = mask->hdr.proto;
1612 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1613 		/* Remove unwanted bits from values. */
1614 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1615 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1616 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1617 		}
1618 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1619 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1620 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1621 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1622 	}
1623 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1624 	return 0;
1625 }
1626 
1627 /**
1628  * Convert UDP item to Verbs specification.
1629  *
1630  * @param item[in]
1631  *   Item specification.
1632  * @param default_mask[in]
1633  *   Default bit-masks to use when item->mask is not provided.
1634  * @param data[in, out]
1635  *   User structure.
1636  *
1637  * @return
1638  *   0 on success, a negative errno value otherwise and rte_errno is set.
1639  */
1640 static int
1641 mlx5_flow_create_udp(const struct rte_flow_item *item,
1642 		     const void *default_mask,
1643 		     struct mlx5_flow_data *data)
1644 {
1645 	const struct rte_flow_item_udp *spec = item->spec;
1646 	const struct rte_flow_item_udp *mask = item->mask;
1647 	struct mlx5_flow_parse *parser = data->parser;
1648 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1649 	struct ibv_flow_spec_tcp_udp udp = {
1650 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1651 		.size = udp_size,
1652 	};
1653 
1654 	if (parser->layer == HASH_RXQ_IPV4)
1655 		parser->layer = HASH_RXQ_UDPV4;
1656 	else
1657 		parser->layer = HASH_RXQ_UDPV6;
1658 	if (spec) {
1659 		if (!mask)
1660 			mask = default_mask;
1661 		udp.val.dst_port = spec->hdr.dst_port;
1662 		udp.val.src_port = spec->hdr.src_port;
1663 		udp.mask.dst_port = mask->hdr.dst_port;
1664 		udp.mask.src_port = mask->hdr.src_port;
1665 		/* Remove unwanted bits from values. */
1666 		udp.val.src_port &= udp.mask.src_port;
1667 		udp.val.dst_port &= udp.mask.dst_port;
1668 	}
1669 	mlx5_flow_create_copy(parser, &udp, udp_size);
1670 	return 0;
1671 }
1672 
1673 /**
1674  * Convert TCP item to Verbs specification.
1675  *
1676  * @param item[in]
1677  *   Item specification.
1678  * @param default_mask[in]
1679  *   Default bit-masks to use when item->mask is not provided.
1680  * @param data[in, out]
1681  *   User structure.
1682  *
1683  * @return
1684  *   0 on success, a negative errno value otherwise and rte_errno is set.
1685  */
1686 static int
1687 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1688 		     const void *default_mask,
1689 		     struct mlx5_flow_data *data)
1690 {
1691 	const struct rte_flow_item_tcp *spec = item->spec;
1692 	const struct rte_flow_item_tcp *mask = item->mask;
1693 	struct mlx5_flow_parse *parser = data->parser;
1694 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1695 	struct ibv_flow_spec_tcp_udp tcp = {
1696 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1697 		.size = tcp_size,
1698 	};
1699 
1700 	if (parser->layer == HASH_RXQ_IPV4)
1701 		parser->layer = HASH_RXQ_TCPV4;
1702 	else
1703 		parser->layer = HASH_RXQ_TCPV6;
1704 	if (spec) {
1705 		if (!mask)
1706 			mask = default_mask;
1707 		tcp.val.dst_port = spec->hdr.dst_port;
1708 		tcp.val.src_port = spec->hdr.src_port;
1709 		tcp.mask.dst_port = mask->hdr.dst_port;
1710 		tcp.mask.src_port = mask->hdr.src_port;
1711 		/* Remove unwanted bits from values. */
1712 		tcp.val.src_port &= tcp.mask.src_port;
1713 		tcp.val.dst_port &= tcp.mask.dst_port;
1714 	}
1715 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1716 	return 0;
1717 }
1718 
1719 /**
1720  * Convert VXLAN item to Verbs specification.
1721  *
1722  * @param item[in]
1723  *   Item specification.
1724  * @param default_mask[in]
1725  *   Default bit-masks to use when item->mask is not provided.
1726  * @param data[in, out]
1727  *   User structure.
1728  *
1729  * @return
1730  *   0 on success, a negative errno value otherwise and rte_errno is set.
1731  */
1732 static int
1733 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1734 		       const void *default_mask,
1735 		       struct mlx5_flow_data *data)
1736 {
1737 	const struct rte_flow_item_vxlan *spec = item->spec;
1738 	const struct rte_flow_item_vxlan *mask = item->mask;
1739 	struct mlx5_flow_parse *parser = data->parser;
1740 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1741 	struct ibv_flow_spec_tunnel vxlan = {
1742 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1743 		.size = size,
1744 	};
1745 	union vni {
1746 		uint32_t vlan_id;
1747 		uint8_t vni[4];
1748 	} id;
1749 
1750 	id.vni[0] = 0;
1751 	parser->inner = IBV_FLOW_SPEC_INNER;
1752 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1753 	parser->out_layer = parser->layer;
1754 	parser->layer = HASH_RXQ_TUNNEL;
1755 	/* Default VXLAN to outer RSS. */
1756 	if (!parser->rss_conf.level)
1757 		parser->rss_conf.level = 1;
1758 	if (spec) {
1759 		if (!mask)
1760 			mask = default_mask;
1761 		memcpy(&id.vni[1], spec->vni, 3);
1762 		vxlan.val.tunnel_id = id.vlan_id;
1763 		memcpy(&id.vni[1], mask->vni, 3);
1764 		vxlan.mask.tunnel_id = id.vlan_id;
1765 		/* Remove unwanted bits from values. */
1766 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1767 	}
1768 	/*
1769 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1770 	 * layer is defined in the Verbs specification it is interpreted as
1771 	 * wildcard and all packets will match this rule, if it follows a full
1772 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1773 	 * before will also match this rule.
1774 	 * To avoid such situation, VNI 0 is currently refused.
1775 	 */
1776 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1777 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1778 		return rte_flow_error_set(data->error, EINVAL,
1779 					  RTE_FLOW_ERROR_TYPE_ITEM,
1780 					  item,
1781 					  "VxLAN vni cannot be 0");
1782 	mlx5_flow_create_copy(parser, &vxlan, size);
1783 	return 0;
1784 }
1785 
1786 /**
1787  * Convert VXLAN-GPE item to Verbs specification.
1788  *
1789  * @param item[in]
1790  *   Item specification.
1791  * @param default_mask[in]
1792  *   Default bit-masks to use when item->mask is not provided.
1793  * @param data[in, out]
1794  *   User structure.
1795  *
1796  * @return
1797  *   0 on success, a negative errno value otherwise and rte_errno is set.
1798  */
1799 static int
1800 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1801 			   const void *default_mask,
1802 			   struct mlx5_flow_data *data)
1803 {
1804 	struct priv *priv = data->dev->data->dev_private;
1805 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1806 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1807 	struct mlx5_flow_parse *parser = data->parser;
1808 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1809 	struct ibv_flow_spec_tunnel vxlan = {
1810 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1811 		.size = size,
1812 	};
1813 	union vni {
1814 		uint32_t vlan_id;
1815 		uint8_t vni[4];
1816 	} id;
1817 
1818 	if (!priv->config.l3_vxlan_en)
1819 		return rte_flow_error_set(data->error, EINVAL,
1820 					  RTE_FLOW_ERROR_TYPE_ITEM,
1821 					  item,
1822 					  "L3 VXLAN not enabled by device"
1823 					  " parameter and/or not configured"
1824 					  " in firmware");
1825 	id.vni[0] = 0;
1826 	parser->inner = IBV_FLOW_SPEC_INNER;
1827 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1828 	parser->out_layer = parser->layer;
1829 	parser->layer = HASH_RXQ_TUNNEL;
1830 	/* Default VXLAN-GPE to outer RSS. */
1831 	if (!parser->rss_conf.level)
1832 		parser->rss_conf.level = 1;
1833 	if (spec) {
1834 		if (!mask)
1835 			mask = default_mask;
1836 		memcpy(&id.vni[1], spec->vni, 3);
1837 		vxlan.val.tunnel_id = id.vlan_id;
1838 		memcpy(&id.vni[1], mask->vni, 3);
1839 		vxlan.mask.tunnel_id = id.vlan_id;
1840 		if (spec->protocol)
1841 			return rte_flow_error_set(data->error, EINVAL,
1842 						  RTE_FLOW_ERROR_TYPE_ITEM,
1843 						  item,
1844 						  "VxLAN-GPE protocol not"
1845 						  " supported");
1846 		/* Remove unwanted bits from values. */
1847 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1848 	}
1849 	/*
1850 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1851 	 * layer is defined in the Verbs specification it is interpreted as
1852 	 * wildcard and all packets will match this rule, if it follows a full
1853 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1854 	 * before will also match this rule.
1855 	 * To avoid such situation, VNI 0 is currently refused.
1856 	 */
1857 	/* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1858 	if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1859 		return rte_flow_error_set(data->error, EINVAL,
1860 					  RTE_FLOW_ERROR_TYPE_ITEM,
1861 					  item,
1862 					  "VxLAN-GPE vni cannot be 0");
1863 	mlx5_flow_create_copy(parser, &vxlan, size);
1864 	return 0;
1865 }
1866 
1867 /**
1868  * Convert GRE item to Verbs specification.
1869  *
1870  * @param item[in]
1871  *   Item specification.
1872  * @param default_mask[in]
1873  *   Default bit-masks to use when item->mask is not provided.
1874  * @param data[in, out]
1875  *   User structure.
1876  *
1877  * @return
1878  *   0 on success, a negative errno value otherwise and rte_errno is set.
1879  */
1880 static int
1881 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1882 		     const void *default_mask __rte_unused,
1883 		     struct mlx5_flow_data *data)
1884 {
1885 	struct mlx5_flow_parse *parser = data->parser;
1886 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1887 	struct ibv_flow_spec_tunnel tunnel = {
1888 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1889 		.size = size,
1890 	};
1891 	struct ibv_flow_spec_ipv4_ext *ipv4;
1892 	struct ibv_flow_spec_ipv6 *ipv6;
1893 	unsigned int i;
1894 
1895 	parser->inner = IBV_FLOW_SPEC_INNER;
1896 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1897 	parser->out_layer = parser->layer;
1898 	parser->layer = HASH_RXQ_TUNNEL;
1899 	/* Default GRE to inner RSS. */
1900 	if (!parser->rss_conf.level)
1901 		parser->rss_conf.level = 2;
1902 	/* Update encapsulation IP layer protocol. */
1903 	for (i = 0; i != hash_rxq_init_n; ++i) {
1904 		if (!parser->queue[i].ibv_attr)
1905 			continue;
1906 		if (parser->out_layer == HASH_RXQ_IPV4) {
1907 			ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1908 				parser->queue[i].offset -
1909 				sizeof(struct ibv_flow_spec_ipv4_ext));
1910 			if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1911 				break;
1912 			ipv4->val.proto = MLX5_GRE;
1913 			ipv4->mask.proto = 0xff;
1914 		} else if (parser->out_layer == HASH_RXQ_IPV6) {
1915 			ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1916 				parser->queue[i].offset -
1917 				sizeof(struct ibv_flow_spec_ipv6));
1918 			if (ipv6->mask.next_hdr &&
1919 			    ipv6->val.next_hdr != MLX5_GRE)
1920 				break;
1921 			ipv6->val.next_hdr = MLX5_GRE;
1922 			ipv6->mask.next_hdr = 0xff;
1923 		}
1924 	}
1925 	if (i != hash_rxq_init_n)
1926 		return rte_flow_error_set(data->error, EINVAL,
1927 					  RTE_FLOW_ERROR_TYPE_ITEM,
1928 					  item,
1929 					  "IP protocol of GRE must be 47");
1930 	mlx5_flow_create_copy(parser, &tunnel, size);
1931 	return 0;
1932 }
1933 
1934 /**
1935  * Convert mark/flag action to Verbs specification.
1936  *
1937  * @param parser
1938  *   Internal parser structure.
1939  * @param mark_id
1940  *   Mark identifier.
1941  *
1942  * @return
1943  *   0 on success, a negative errno value otherwise and rte_errno is set.
1944  */
1945 static int
1946 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1947 {
1948 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1949 	struct ibv_flow_spec_action_tag tag = {
1950 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1951 		.size = size,
1952 		.tag_id = mlx5_flow_mark_set(mark_id),
1953 	};
1954 
1955 	assert(parser->mark);
1956 	mlx5_flow_create_copy(parser, &tag, size);
1957 	return 0;
1958 }
1959 
1960 /**
1961  * Convert count action to Verbs specification.
1962  *
1963  * @param dev
1964  *   Pointer to Ethernet device.
1965  * @param parser
1966  *   Pointer to MLX5 flow parser structure.
1967  *
1968  * @return
1969  *   0 on success, a negative errno value otherwise and rte_errno is set.
1970  */
1971 static int
1972 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1973 		       struct mlx5_flow_parse *parser __rte_unused)
1974 {
1975 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1976 	struct priv *priv = dev->data->dev_private;
1977 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1978 	struct ibv_counter_set_init_attr init_attr = {0};
1979 	struct ibv_flow_spec_counter_action counter = {
1980 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1981 		.size = size,
1982 		.counter_set_handle = 0,
1983 	};
1984 
1985 	init_attr.counter_set_id = 0;
1986 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1987 	if (!parser->cs) {
1988 		rte_errno = EINVAL;
1989 		return -rte_errno;
1990 	}
1991 	counter.counter_set_handle = parser->cs->handle;
1992 	mlx5_flow_create_copy(parser, &counter, size);
1993 #endif
1994 	return 0;
1995 }
1996 
1997 /**
1998  * Complete flow rule creation with a drop queue.
1999  *
2000  * @param dev
2001  *   Pointer to Ethernet device.
2002  * @param parser
2003  *   Internal parser structure.
2004  * @param flow
2005  *   Pointer to the rte_flow.
2006  * @param[out] error
2007  *   Perform verbose error reporting if not NULL.
2008  *
2009  * @return
2010  *   0 on success, a negative errno value otherwise and rte_errno is set.
2011  */
2012 static int
2013 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2014 				   struct mlx5_flow_parse *parser,
2015 				   struct rte_flow *flow,
2016 				   struct rte_flow_error *error)
2017 {
2018 	struct priv *priv = dev->data->dev_private;
2019 	struct ibv_flow_spec_action_drop *drop;
2020 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2021 
2022 	assert(priv->pd);
2023 	assert(priv->ctx);
2024 	flow->drop = 1;
2025 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2026 			parser->queue[HASH_RXQ_ETH].offset);
2027 	*drop = (struct ibv_flow_spec_action_drop){
2028 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2029 			.size = size,
2030 	};
2031 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2032 	parser->queue[HASH_RXQ_ETH].offset += size;
2033 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
2034 		parser->queue[HASH_RXQ_ETH].ibv_attr;
2035 	if (parser->count)
2036 		flow->cs = parser->cs;
2037 	if (!priv->dev->data->dev_started)
2038 		return 0;
2039 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2040 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
2041 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2042 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
2043 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2044 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2045 				   NULL, "flow rule creation failure");
2046 		goto error;
2047 	}
2048 	return 0;
2049 error:
2050 	assert(flow);
2051 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2052 		claim_zero(mlx5_glue->destroy_flow
2053 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2054 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2055 	}
2056 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2057 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2058 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2059 	}
2060 	if (flow->cs) {
2061 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2062 		flow->cs = NULL;
2063 		parser->cs = NULL;
2064 	}
2065 	return -rte_errno;
2066 }
2067 
2068 /**
2069  * Create hash Rx queues when RSS is enabled.
2070  *
2071  * @param dev
2072  *   Pointer to Ethernet device.
2073  * @param parser
2074  *   Internal parser structure.
2075  * @param flow
2076  *   Pointer to the rte_flow.
2077  * @param[out] error
2078  *   Perform verbose error reporting if not NULL.
2079  *
2080  * @return
2081  *   0 on success, a negative errno value otherwise and rte_errno is set.
2082  */
2083 static int
2084 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2085 				  struct mlx5_flow_parse *parser,
2086 				  struct rte_flow *flow,
2087 				  struct rte_flow_error *error)
2088 {
2089 	struct priv *priv = dev->data->dev_private;
2090 	unsigned int i;
2091 
2092 	for (i = 0; i != hash_rxq_init_n; ++i) {
2093 		if (!parser->queue[i].ibv_attr)
2094 			continue;
2095 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2096 		parser->queue[i].ibv_attr = NULL;
2097 		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2098 		if (!priv->dev->data->dev_started)
2099 			continue;
2100 		flow->frxq[i].hrxq =
2101 			mlx5_hrxq_get(dev,
2102 				      parser->rss_conf.key,
2103 				      parser->rss_conf.key_len,
2104 				      flow->frxq[i].hash_fields,
2105 				      parser->rss_conf.queue,
2106 				      parser->rss_conf.queue_num,
2107 				      parser->tunnel,
2108 				      parser->rss_conf.level);
2109 		if (flow->frxq[i].hrxq)
2110 			continue;
2111 		flow->frxq[i].hrxq =
2112 			mlx5_hrxq_new(dev,
2113 				      parser->rss_conf.key,
2114 				      parser->rss_conf.key_len,
2115 				      flow->frxq[i].hash_fields,
2116 				      parser->rss_conf.queue,
2117 				      parser->rss_conf.queue_num,
2118 				      parser->tunnel,
2119 				      parser->rss_conf.level);
2120 		if (!flow->frxq[i].hrxq) {
2121 			return rte_flow_error_set(error, ENOMEM,
2122 						  RTE_FLOW_ERROR_TYPE_HANDLE,
2123 						  NULL,
2124 						  "cannot create hash rxq");
2125 		}
2126 	}
2127 	return 0;
2128 }
2129 
2130 /**
2131  * RXQ update after flow rule creation.
2132  *
2133  * @param dev
2134  *   Pointer to Ethernet device.
2135  * @param flow
2136  *   Pointer to the flow rule.
2137  */
2138 static void
2139 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2140 {
2141 	struct priv *priv = dev->data->dev_private;
2142 	unsigned int i;
2143 	unsigned int j;
2144 
2145 	if (!dev->data->dev_started)
2146 		return;
2147 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2148 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2149 						 [(*flow->queues)[i]];
2150 		struct mlx5_rxq_ctrl *rxq_ctrl =
2151 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2152 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2153 
2154 		rxq_data->mark |= flow->mark;
2155 		if (!tunnel)
2156 			continue;
2157 		rxq_ctrl->tunnel_types[tunnel] += 1;
2158 		/* Clear tunnel type if more than one tunnel types set. */
2159 		for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2160 			if (j == tunnel)
2161 				continue;
2162 			if (rxq_ctrl->tunnel_types[j] > 0) {
2163 				rxq_data->tunnel = 0;
2164 				break;
2165 			}
2166 		}
2167 		if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2168 			rxq_data->tunnel = flow->tunnel;
2169 	}
2170 }
2171 
2172 /**
2173  * Dump flow hash RX queue detail.
2174  *
2175  * @param dev
2176  *   Pointer to Ethernet device.
2177  * @param flow
2178  *   Pointer to the rte_flow.
2179  * @param hrxq_idx
2180  *   Hash RX queue index.
2181  */
2182 static void
2183 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2184 	       struct rte_flow *flow __rte_unused,
2185 	       unsigned int hrxq_idx __rte_unused)
2186 {
2187 #ifndef NDEBUG
2188 	uintptr_t spec_ptr;
2189 	uint16_t j;
2190 	char buf[256];
2191 	uint8_t off;
2192 
2193 	spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2194 	for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2195 	     j++) {
2196 		struct ibv_flow_spec *spec = (void *)spec_ptr;
2197 		off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2198 			       spec->hdr.size);
2199 		spec_ptr += spec->hdr.size;
2200 	}
2201 	DRV_LOG(DEBUG,
2202 		"port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2203 		" hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2204 		" flags:%x, comp_mask:%x specs:%s",
2205 		dev->data->port_id, (void *)flow, hrxq_idx,
2206 		(void *)flow->frxq[hrxq_idx].hrxq,
2207 		(void *)flow->frxq[hrxq_idx].hrxq->qp,
2208 		(void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2209 		flow->frxq[hrxq_idx].hash_fields |
2210 		(flow->tunnel &&
2211 		 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2212 		flow->rss_conf.queue_num,
2213 		flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2214 		flow->frxq[hrxq_idx].ibv_attr->size,
2215 		flow->frxq[hrxq_idx].ibv_attr->priority,
2216 		flow->frxq[hrxq_idx].ibv_attr->type,
2217 		flow->frxq[hrxq_idx].ibv_attr->flags,
2218 		flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2219 		buf);
2220 #endif
2221 }
2222 
2223 /**
2224  * Complete flow rule creation.
2225  *
2226  * @param dev
2227  *   Pointer to Ethernet device.
2228  * @param parser
2229  *   Internal parser structure.
2230  * @param flow
2231  *   Pointer to the rte_flow.
2232  * @param[out] error
2233  *   Perform verbose error reporting if not NULL.
2234  *
2235  * @return
2236  *   0 on success, a negative errno value otherwise and rte_errno is set.
2237  */
2238 static int
2239 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2240 			      struct mlx5_flow_parse *parser,
2241 			      struct rte_flow *flow,
2242 			      struct rte_flow_error *error)
2243 {
2244 	struct priv *priv = dev->data->dev_private;
2245 	int ret;
2246 	unsigned int i;
2247 	unsigned int flows_n = 0;
2248 
2249 	assert(priv->pd);
2250 	assert(priv->ctx);
2251 	assert(!parser->drop);
2252 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2253 	if (ret)
2254 		goto error;
2255 	if (parser->count)
2256 		flow->cs = parser->cs;
2257 	if (!priv->dev->data->dev_started)
2258 		return 0;
2259 	for (i = 0; i != hash_rxq_init_n; ++i) {
2260 		if (!flow->frxq[i].hrxq)
2261 			continue;
2262 		flow->frxq[i].ibv_flow =
2263 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2264 					       flow->frxq[i].ibv_attr);
2265 		mlx5_flow_dump(dev, flow, i);
2266 		if (!flow->frxq[i].ibv_flow) {
2267 			rte_flow_error_set(error, ENOMEM,
2268 					   RTE_FLOW_ERROR_TYPE_HANDLE,
2269 					   NULL, "flow rule creation failure");
2270 			goto error;
2271 		}
2272 		++flows_n;
2273 	}
2274 	if (!flows_n) {
2275 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2276 				   NULL, "internal error in flow creation");
2277 		goto error;
2278 	}
2279 	mlx5_flow_create_update_rxqs(dev, flow);
2280 	return 0;
2281 error:
2282 	ret = rte_errno; /* Save rte_errno before cleanup. */
2283 	assert(flow);
2284 	for (i = 0; i != hash_rxq_init_n; ++i) {
2285 		if (flow->frxq[i].ibv_flow) {
2286 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2287 
2288 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2289 		}
2290 		if (flow->frxq[i].hrxq)
2291 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2292 		if (flow->frxq[i].ibv_attr)
2293 			rte_free(flow->frxq[i].ibv_attr);
2294 	}
2295 	if (flow->cs) {
2296 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2297 		flow->cs = NULL;
2298 		parser->cs = NULL;
2299 	}
2300 	rte_errno = ret; /* Restore rte_errno. */
2301 	return -rte_errno;
2302 }
2303 
2304 /**
2305  * Convert a flow.
2306  *
2307  * @param dev
2308  *   Pointer to Ethernet device.
2309  * @param list
2310  *   Pointer to a TAILQ flow list.
2311  * @param[in] attr
2312  *   Flow rule attributes.
2313  * @param[in] pattern
2314  *   Pattern specification (list terminated by the END pattern item).
2315  * @param[in] actions
2316  *   Associated actions (list terminated by the END action).
2317  * @param[out] error
2318  *   Perform verbose error reporting if not NULL.
2319  *
2320  * @return
2321  *   A flow on success, NULL otherwise and rte_errno is set.
2322  */
2323 static struct rte_flow *
2324 mlx5_flow_list_create(struct rte_eth_dev *dev,
2325 		      struct mlx5_flows *list,
2326 		      const struct rte_flow_attr *attr,
2327 		      const struct rte_flow_item items[],
2328 		      const struct rte_flow_action actions[],
2329 		      struct rte_flow_error *error)
2330 {
2331 	struct mlx5_flow_parse parser = { .create = 1, };
2332 	struct rte_flow *flow = NULL;
2333 	unsigned int i;
2334 	int ret;
2335 
2336 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2337 	if (ret)
2338 		goto exit;
2339 	flow = rte_calloc(__func__, 1,
2340 			  sizeof(*flow) +
2341 			  parser.rss_conf.queue_num * sizeof(uint16_t),
2342 			  0);
2343 	if (!flow) {
2344 		rte_flow_error_set(error, ENOMEM,
2345 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2346 				   NULL,
2347 				   "cannot allocate flow memory");
2348 		return NULL;
2349 	}
2350 	/* Copy configuration. */
2351 	flow->queues = (uint16_t (*)[])(flow + 1);
2352 	flow->tunnel = parser.tunnel;
2353 	flow->rss_conf = (struct rte_flow_action_rss){
2354 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2355 		.level = 0,
2356 		.types = parser.rss_conf.types,
2357 		.key_len = parser.rss_conf.key_len,
2358 		.queue_num = parser.rss_conf.queue_num,
2359 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
2360 			      sizeof(*parser.rss_conf.key) *
2361 			      parser.rss_conf.key_len),
2362 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
2363 				sizeof(*parser.rss_conf.queue) *
2364 				parser.rss_conf.queue_num),
2365 	};
2366 	flow->mark = parser.mark;
2367 	/* finalise the flow. */
2368 	if (parser.drop)
2369 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2370 							 error);
2371 	else
2372 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2373 	if (ret)
2374 		goto exit;
2375 	TAILQ_INSERT_TAIL(list, flow, next);
2376 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2377 		(void *)flow);
2378 	return flow;
2379 exit:
2380 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2381 		error->message);
2382 	for (i = 0; i != hash_rxq_init_n; ++i) {
2383 		if (parser.queue[i].ibv_attr)
2384 			rte_free(parser.queue[i].ibv_attr);
2385 	}
2386 	rte_free(flow);
2387 	return NULL;
2388 }
2389 
2390 /**
2391  * Validate a flow supported by the NIC.
2392  *
2393  * @see rte_flow_validate()
2394  * @see rte_flow_ops
2395  */
2396 int
2397 mlx5_flow_validate(struct rte_eth_dev *dev,
2398 		   const struct rte_flow_attr *attr,
2399 		   const struct rte_flow_item items[],
2400 		   const struct rte_flow_action actions[],
2401 		   struct rte_flow_error *error)
2402 {
2403 	struct mlx5_flow_parse parser = { .create = 0, };
2404 
2405 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2406 }
2407 
2408 /**
2409  * Create a flow.
2410  *
2411  * @see rte_flow_create()
2412  * @see rte_flow_ops
2413  */
2414 struct rte_flow *
2415 mlx5_flow_create(struct rte_eth_dev *dev,
2416 		 const struct rte_flow_attr *attr,
2417 		 const struct rte_flow_item items[],
2418 		 const struct rte_flow_action actions[],
2419 		 struct rte_flow_error *error)
2420 {
2421 	struct priv *priv = dev->data->dev_private;
2422 
2423 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2424 				     error);
2425 }
2426 
2427 /**
2428  * Destroy a flow in a list.
2429  *
2430  * @param dev
2431  *   Pointer to Ethernet device.
2432  * @param list
2433  *   Pointer to a TAILQ flow list.
2434  * @param[in] flow
2435  *   Flow to destroy.
2436  */
2437 static void
2438 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2439 		       struct rte_flow *flow)
2440 {
2441 	struct priv *priv = dev->data->dev_private;
2442 	unsigned int i;
2443 
2444 	if (flow->drop || !dev->data->dev_started)
2445 		goto free;
2446 	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2447 		/* Update queue tunnel type. */
2448 		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2449 						 [(*flow->queues)[i]];
2450 		struct mlx5_rxq_ctrl *rxq_ctrl =
2451 			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2452 		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2453 
2454 		assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2455 		rxq_ctrl->tunnel_types[tunnel] -= 1;
2456 		if (!rxq_ctrl->tunnel_types[tunnel]) {
2457 			/* Update tunnel type. */
2458 			uint8_t j;
2459 			uint8_t types = 0;
2460 			uint8_t last;
2461 
2462 			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2463 				if (rxq_ctrl->tunnel_types[j]) {
2464 					types += 1;
2465 					last = j;
2466 				}
2467 			/* Keep same if more than one tunnel types left. */
2468 			if (types == 1)
2469 				rxq_data->tunnel = ptype_ext[last];
2470 			else if (types == 0)
2471 				/* No tunnel type left. */
2472 				rxq_data->tunnel = 0;
2473 		}
2474 	}
2475 	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2476 		struct rte_flow *tmp;
2477 		int mark = 0;
2478 
2479 		/*
2480 		 * To remove the mark from the queue, the queue must not be
2481 		 * present in any other marked flow (RSS or not).
2482 		 */
2483 		TAILQ_FOREACH(tmp, list, next) {
2484 			unsigned int j;
2485 			uint16_t *tqs = NULL;
2486 			uint16_t tq_n = 0;
2487 
2488 			if (!tmp->mark)
2489 				continue;
2490 			for (j = 0; j != hash_rxq_init_n; ++j) {
2491 				if (!tmp->frxq[j].hrxq)
2492 					continue;
2493 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2494 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2495 			}
2496 			if (!tq_n)
2497 				continue;
2498 			for (j = 0; (j != tq_n) && !mark; j++)
2499 				if (tqs[j] == (*flow->queues)[i])
2500 					mark = 1;
2501 		}
2502 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2503 	}
2504 free:
2505 	if (flow->drop) {
2506 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2507 			claim_zero(mlx5_glue->destroy_flow
2508 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2509 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2510 	} else {
2511 		for (i = 0; i != hash_rxq_init_n; ++i) {
2512 			struct mlx5_flow *frxq = &flow->frxq[i];
2513 
2514 			if (frxq->ibv_flow)
2515 				claim_zero(mlx5_glue->destroy_flow
2516 					   (frxq->ibv_flow));
2517 			if (frxq->hrxq)
2518 				mlx5_hrxq_release(dev, frxq->hrxq);
2519 			if (frxq->ibv_attr)
2520 				rte_free(frxq->ibv_attr);
2521 		}
2522 	}
2523 	if (flow->cs) {
2524 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2525 		flow->cs = NULL;
2526 	}
2527 	TAILQ_REMOVE(list, flow, next);
2528 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2529 		(void *)flow);
2530 	rte_free(flow);
2531 }
2532 
2533 /**
2534  * Destroy all flows.
2535  *
2536  * @param dev
2537  *   Pointer to Ethernet device.
2538  * @param list
2539  *   Pointer to a TAILQ flow list.
2540  */
2541 void
2542 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2543 {
2544 	while (!TAILQ_EMPTY(list)) {
2545 		struct rte_flow *flow;
2546 
2547 		flow = TAILQ_FIRST(list);
2548 		mlx5_flow_list_destroy(dev, list, flow);
2549 	}
2550 }
2551 
2552 /**
2553  * Create drop queue.
2554  *
2555  * @param dev
2556  *   Pointer to Ethernet device.
2557  *
2558  * @return
2559  *   0 on success, a negative errno value otherwise and rte_errno is set.
2560  */
2561 int
2562 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2563 {
2564 	struct priv *priv = dev->data->dev_private;
2565 	struct mlx5_hrxq_drop *fdq = NULL;
2566 
2567 	assert(priv->pd);
2568 	assert(priv->ctx);
2569 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2570 	if (!fdq) {
2571 		DRV_LOG(WARNING,
2572 			"port %u cannot allocate memory for drop queue",
2573 			dev->data->port_id);
2574 		rte_errno = ENOMEM;
2575 		return -rte_errno;
2576 	}
2577 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2578 	if (!fdq->cq) {
2579 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2580 			dev->data->port_id);
2581 		rte_errno = errno;
2582 		goto error;
2583 	}
2584 	fdq->wq = mlx5_glue->create_wq
2585 		(priv->ctx,
2586 		 &(struct ibv_wq_init_attr){
2587 			.wq_type = IBV_WQT_RQ,
2588 			.max_wr = 1,
2589 			.max_sge = 1,
2590 			.pd = priv->pd,
2591 			.cq = fdq->cq,
2592 		 });
2593 	if (!fdq->wq) {
2594 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2595 			dev->data->port_id);
2596 		rte_errno = errno;
2597 		goto error;
2598 	}
2599 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2600 		(priv->ctx,
2601 		 &(struct ibv_rwq_ind_table_init_attr){
2602 			.log_ind_tbl_size = 0,
2603 			.ind_tbl = &fdq->wq,
2604 			.comp_mask = 0,
2605 		 });
2606 	if (!fdq->ind_table) {
2607 		DRV_LOG(WARNING,
2608 			"port %u cannot allocate indirection table for drop"
2609 			" queue",
2610 			dev->data->port_id);
2611 		rte_errno = errno;
2612 		goto error;
2613 	}
2614 	fdq->qp = mlx5_glue->create_qp_ex
2615 		(priv->ctx,
2616 		 &(struct ibv_qp_init_attr_ex){
2617 			.qp_type = IBV_QPT_RAW_PACKET,
2618 			.comp_mask =
2619 				IBV_QP_INIT_ATTR_PD |
2620 				IBV_QP_INIT_ATTR_IND_TABLE |
2621 				IBV_QP_INIT_ATTR_RX_HASH,
2622 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2623 				.rx_hash_function =
2624 					IBV_RX_HASH_FUNC_TOEPLITZ,
2625 				.rx_hash_key_len = rss_hash_default_key_len,
2626 				.rx_hash_key = rss_hash_default_key,
2627 				.rx_hash_fields_mask = 0,
2628 				},
2629 			.rwq_ind_tbl = fdq->ind_table,
2630 			.pd = priv->pd
2631 		 });
2632 	if (!fdq->qp) {
2633 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2634 			dev->data->port_id);
2635 		rte_errno = errno;
2636 		goto error;
2637 	}
2638 	priv->flow_drop_queue = fdq;
2639 	return 0;
2640 error:
2641 	if (fdq->qp)
2642 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2643 	if (fdq->ind_table)
2644 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2645 	if (fdq->wq)
2646 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2647 	if (fdq->cq)
2648 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2649 	if (fdq)
2650 		rte_free(fdq);
2651 	priv->flow_drop_queue = NULL;
2652 	return -rte_errno;
2653 }
2654 
2655 /**
2656  * Delete drop queue.
2657  *
2658  * @param dev
2659  *   Pointer to Ethernet device.
2660  */
2661 void
2662 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2663 {
2664 	struct priv *priv = dev->data->dev_private;
2665 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2666 
2667 	if (!fdq)
2668 		return;
2669 	if (fdq->qp)
2670 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2671 	if (fdq->ind_table)
2672 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2673 	if (fdq->wq)
2674 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2675 	if (fdq->cq)
2676 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2677 	rte_free(fdq);
2678 	priv->flow_drop_queue = NULL;
2679 }
2680 
2681 /**
2682  * Remove all flows.
2683  *
2684  * @param dev
2685  *   Pointer to Ethernet device.
2686  * @param list
2687  *   Pointer to a TAILQ flow list.
2688  */
2689 void
2690 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2691 {
2692 	struct priv *priv = dev->data->dev_private;
2693 	struct rte_flow *flow;
2694 	unsigned int i;
2695 
2696 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2697 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2698 
2699 		if (flow->drop) {
2700 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2701 				continue;
2702 			claim_zero(mlx5_glue->destroy_flow
2703 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2704 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2705 			DRV_LOG(DEBUG, "port %u flow %p removed",
2706 				dev->data->port_id, (void *)flow);
2707 			/* Next flow. */
2708 			continue;
2709 		}
2710 		/* Verify the flow has not already been cleaned. */
2711 		for (i = 0; i != hash_rxq_init_n; ++i) {
2712 			if (!flow->frxq[i].ibv_flow)
2713 				continue;
2714 			/*
2715 			 * Indirection table may be necessary to remove the
2716 			 * flags in the Rx queues.
2717 			 * This helps to speed-up the process by avoiding
2718 			 * another loop.
2719 			 */
2720 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2721 			break;
2722 		}
2723 		if (i == hash_rxq_init_n)
2724 			return;
2725 		if (flow->mark) {
2726 			assert(ind_tbl);
2727 			for (i = 0; i != ind_tbl->queues_n; ++i)
2728 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2729 		}
2730 		for (i = 0; i != hash_rxq_init_n; ++i) {
2731 			if (!flow->frxq[i].ibv_flow)
2732 				continue;
2733 			claim_zero(mlx5_glue->destroy_flow
2734 				   (flow->frxq[i].ibv_flow));
2735 			flow->frxq[i].ibv_flow = NULL;
2736 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2737 			flow->frxq[i].hrxq = NULL;
2738 		}
2739 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2740 			(void *)flow);
2741 	}
2742 	/* Cleanup Rx queue tunnel info. */
2743 	for (i = 0; i != priv->rxqs_n; ++i) {
2744 		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2745 		struct mlx5_rxq_ctrl *rxq_ctrl =
2746 			container_of(q, struct mlx5_rxq_ctrl, rxq);
2747 
2748 		if (!q)
2749 			continue;
2750 		memset((void *)rxq_ctrl->tunnel_types, 0,
2751 		       sizeof(rxq_ctrl->tunnel_types));
2752 		q->tunnel = 0;
2753 	}
2754 }
2755 
2756 /**
2757  * Add all flows.
2758  *
2759  * @param dev
2760  *   Pointer to Ethernet device.
2761  * @param list
2762  *   Pointer to a TAILQ flow list.
2763  *
2764  * @return
2765  *   0 on success, a negative errno value otherwise and rte_errno is set.
2766  */
2767 int
2768 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2769 {
2770 	struct priv *priv = dev->data->dev_private;
2771 	struct rte_flow *flow;
2772 
2773 	TAILQ_FOREACH(flow, list, next) {
2774 		unsigned int i;
2775 
2776 		if (flow->drop) {
2777 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2778 				mlx5_glue->create_flow
2779 				(priv->flow_drop_queue->qp,
2780 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2781 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2782 				DRV_LOG(DEBUG,
2783 					"port %u flow %p cannot be applied",
2784 					dev->data->port_id, (void *)flow);
2785 				rte_errno = EINVAL;
2786 				return -rte_errno;
2787 			}
2788 			DRV_LOG(DEBUG, "port %u flow %p applied",
2789 				dev->data->port_id, (void *)flow);
2790 			/* Next flow. */
2791 			continue;
2792 		}
2793 		for (i = 0; i != hash_rxq_init_n; ++i) {
2794 			if (!flow->frxq[i].ibv_attr)
2795 				continue;
2796 			flow->frxq[i].hrxq =
2797 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2798 					      flow->rss_conf.key_len,
2799 					      flow->frxq[i].hash_fields,
2800 					      flow->rss_conf.queue,
2801 					      flow->rss_conf.queue_num,
2802 					      flow->tunnel,
2803 					      flow->rss_conf.level);
2804 			if (flow->frxq[i].hrxq)
2805 				goto flow_create;
2806 			flow->frxq[i].hrxq =
2807 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2808 					      flow->rss_conf.key_len,
2809 					      flow->frxq[i].hash_fields,
2810 					      flow->rss_conf.queue,
2811 					      flow->rss_conf.queue_num,
2812 					      flow->tunnel,
2813 					      flow->rss_conf.level);
2814 			if (!flow->frxq[i].hrxq) {
2815 				DRV_LOG(DEBUG,
2816 					"port %u flow %p cannot create hash"
2817 					" rxq",
2818 					dev->data->port_id, (void *)flow);
2819 				rte_errno = EINVAL;
2820 				return -rte_errno;
2821 			}
2822 flow_create:
2823 			mlx5_flow_dump(dev, flow, i);
2824 			flow->frxq[i].ibv_flow =
2825 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2826 						       flow->frxq[i].ibv_attr);
2827 			if (!flow->frxq[i].ibv_flow) {
2828 				DRV_LOG(DEBUG,
2829 					"port %u flow %p type %u cannot be"
2830 					" applied",
2831 					dev->data->port_id, (void *)flow, i);
2832 				rte_errno = EINVAL;
2833 				return -rte_errno;
2834 			}
2835 		}
2836 		mlx5_flow_create_update_rxqs(dev, flow);
2837 	}
2838 	return 0;
2839 }
2840 
2841 /**
2842  * Verify the flow list is empty
2843  *
2844  * @param dev
2845  *  Pointer to Ethernet device.
2846  *
2847  * @return the number of flows not released.
2848  */
2849 int
2850 mlx5_flow_verify(struct rte_eth_dev *dev)
2851 {
2852 	struct priv *priv = dev->data->dev_private;
2853 	struct rte_flow *flow;
2854 	int ret = 0;
2855 
2856 	TAILQ_FOREACH(flow, &priv->flows, next) {
2857 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2858 			dev->data->port_id, (void *)flow);
2859 		++ret;
2860 	}
2861 	return ret;
2862 }
2863 
2864 /**
2865  * Enable a control flow configured from the control plane.
2866  *
2867  * @param dev
2868  *   Pointer to Ethernet device.
2869  * @param eth_spec
2870  *   An Ethernet flow spec to apply.
2871  * @param eth_mask
2872  *   An Ethernet flow mask to apply.
2873  * @param vlan_spec
2874  *   A VLAN flow spec to apply.
2875  * @param vlan_mask
2876  *   A VLAN flow mask to apply.
2877  *
2878  * @return
2879  *   0 on success, a negative errno value otherwise and rte_errno is set.
2880  */
2881 int
2882 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2883 		    struct rte_flow_item_eth *eth_spec,
2884 		    struct rte_flow_item_eth *eth_mask,
2885 		    struct rte_flow_item_vlan *vlan_spec,
2886 		    struct rte_flow_item_vlan *vlan_mask)
2887 {
2888 	struct priv *priv = dev->data->dev_private;
2889 	const struct rte_flow_attr attr = {
2890 		.ingress = 1,
2891 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2892 	};
2893 	struct rte_flow_item items[] = {
2894 		{
2895 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2896 			.spec = eth_spec,
2897 			.last = NULL,
2898 			.mask = eth_mask,
2899 		},
2900 		{
2901 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2902 				RTE_FLOW_ITEM_TYPE_END,
2903 			.spec = vlan_spec,
2904 			.last = NULL,
2905 			.mask = vlan_mask,
2906 		},
2907 		{
2908 			.type = RTE_FLOW_ITEM_TYPE_END,
2909 		},
2910 	};
2911 	uint16_t queue[priv->reta_idx_n];
2912 	struct rte_flow_action_rss action_rss = {
2913 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2914 		.level = 0,
2915 		.types = priv->rss_conf.rss_hf,
2916 		.key_len = priv->rss_conf.rss_key_len,
2917 		.queue_num = priv->reta_idx_n,
2918 		.key = priv->rss_conf.rss_key,
2919 		.queue = queue,
2920 	};
2921 	struct rte_flow_action actions[] = {
2922 		{
2923 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2924 			.conf = &action_rss,
2925 		},
2926 		{
2927 			.type = RTE_FLOW_ACTION_TYPE_END,
2928 		},
2929 	};
2930 	struct rte_flow *flow;
2931 	struct rte_flow_error error;
2932 	unsigned int i;
2933 
2934 	if (!priv->reta_idx_n) {
2935 		rte_errno = EINVAL;
2936 		return -rte_errno;
2937 	}
2938 	for (i = 0; i != priv->reta_idx_n; ++i)
2939 		queue[i] = (*priv->reta_idx)[i];
2940 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2941 				     actions, &error);
2942 	if (!flow)
2943 		return -rte_errno;
2944 	return 0;
2945 }
2946 
2947 /**
2948  * Enable a flow control configured from the control plane.
2949  *
2950  * @param dev
2951  *   Pointer to Ethernet device.
2952  * @param eth_spec
2953  *   An Ethernet flow spec to apply.
2954  * @param eth_mask
2955  *   An Ethernet flow mask to apply.
2956  *
2957  * @return
2958  *   0 on success, a negative errno value otherwise and rte_errno is set.
2959  */
2960 int
2961 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2962 	       struct rte_flow_item_eth *eth_spec,
2963 	       struct rte_flow_item_eth *eth_mask)
2964 {
2965 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2966 }
2967 
2968 /**
2969  * Destroy a flow.
2970  *
2971  * @see rte_flow_destroy()
2972  * @see rte_flow_ops
2973  */
2974 int
2975 mlx5_flow_destroy(struct rte_eth_dev *dev,
2976 		  struct rte_flow *flow,
2977 		  struct rte_flow_error *error __rte_unused)
2978 {
2979 	struct priv *priv = dev->data->dev_private;
2980 
2981 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2982 	return 0;
2983 }
2984 
2985 /**
2986  * Destroy all flows.
2987  *
2988  * @see rte_flow_flush()
2989  * @see rte_flow_ops
2990  */
2991 int
2992 mlx5_flow_flush(struct rte_eth_dev *dev,
2993 		struct rte_flow_error *error __rte_unused)
2994 {
2995 	struct priv *priv = dev->data->dev_private;
2996 
2997 	mlx5_flow_list_flush(dev, &priv->flows);
2998 	return 0;
2999 }
3000 
3001 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3002 /**
3003  * Query flow counter.
3004  *
3005  * @param cs
3006  *   the counter set.
3007  * @param counter_value
3008  *   returned data from the counter.
3009  *
3010  * @return
3011  *   0 on success, a negative errno value otherwise and rte_errno is set.
3012  */
3013 static int
3014 mlx5_flow_query_count(struct ibv_counter_set *cs,
3015 		      struct mlx5_flow_counter_stats *counter_stats,
3016 		      struct rte_flow_query_count *query_count,
3017 		      struct rte_flow_error *error)
3018 {
3019 	uint64_t counters[2];
3020 	struct ibv_query_counter_set_attr query_cs_attr = {
3021 		.cs = cs,
3022 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3023 	};
3024 	struct ibv_counter_set_data query_out = {
3025 		.out = counters,
3026 		.outlen = 2 * sizeof(uint64_t),
3027 	};
3028 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3029 
3030 	if (err)
3031 		return rte_flow_error_set(error, err,
3032 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3033 					  NULL,
3034 					  "cannot read counter");
3035 	query_count->hits_set = 1;
3036 	query_count->bytes_set = 1;
3037 	query_count->hits = counters[0] - counter_stats->hits;
3038 	query_count->bytes = counters[1] - counter_stats->bytes;
3039 	if (query_count->reset) {
3040 		counter_stats->hits = counters[0];
3041 		counter_stats->bytes = counters[1];
3042 	}
3043 	return 0;
3044 }
3045 
3046 /**
3047  * Query a flows.
3048  *
3049  * @see rte_flow_query()
3050  * @see rte_flow_ops
3051  */
3052 int
3053 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3054 		struct rte_flow *flow,
3055 		const struct rte_flow_action *action __rte_unused,
3056 		void *data,
3057 		struct rte_flow_error *error)
3058 {
3059 	if (flow->cs) {
3060 		int ret;
3061 
3062 		ret = mlx5_flow_query_count(flow->cs,
3063 					    &flow->counter_stats,
3064 					    (struct rte_flow_query_count *)data,
3065 					    error);
3066 		if (ret)
3067 			return ret;
3068 	} else {
3069 		return rte_flow_error_set(error, EINVAL,
3070 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3071 					  NULL,
3072 					  "no counter found for flow");
3073 	}
3074 	return 0;
3075 }
3076 #endif
3077 
3078 /**
3079  * Isolated mode.
3080  *
3081  * @see rte_flow_isolate()
3082  * @see rte_flow_ops
3083  */
3084 int
3085 mlx5_flow_isolate(struct rte_eth_dev *dev,
3086 		  int enable,
3087 		  struct rte_flow_error *error)
3088 {
3089 	struct priv *priv = dev->data->dev_private;
3090 
3091 	if (dev->data->dev_started) {
3092 		rte_flow_error_set(error, EBUSY,
3093 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3094 				   NULL,
3095 				   "port must be stopped first");
3096 		return -rte_errno;
3097 	}
3098 	priv->isolated = !!enable;
3099 	if (enable)
3100 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3101 	else
3102 		priv->dev->dev_ops = &mlx5_dev_ops;
3103 	return 0;
3104 }
3105 
3106 /**
3107  * Convert a flow director filter to a generic flow.
3108  *
3109  * @param dev
3110  *   Pointer to Ethernet device.
3111  * @param fdir_filter
3112  *   Flow director filter to add.
3113  * @param attributes
3114  *   Generic flow parameters structure.
3115  *
3116  * @return
3117  *   0 on success, a negative errno value otherwise and rte_errno is set.
3118  */
3119 static int
3120 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3121 			 const struct rte_eth_fdir_filter *fdir_filter,
3122 			 struct mlx5_fdir *attributes)
3123 {
3124 	struct priv *priv = dev->data->dev_private;
3125 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
3126 	const struct rte_eth_fdir_masks *mask =
3127 		&dev->data->dev_conf.fdir_conf.mask;
3128 
3129 	/* Validate queue number. */
3130 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3131 		DRV_LOG(ERR, "port %u invalid queue number %d",
3132 			dev->data->port_id, fdir_filter->action.rx_queue);
3133 		rte_errno = EINVAL;
3134 		return -rte_errno;
3135 	}
3136 	attributes->attr.ingress = 1;
3137 	attributes->items[0] = (struct rte_flow_item) {
3138 		.type = RTE_FLOW_ITEM_TYPE_ETH,
3139 		.spec = &attributes->l2,
3140 		.mask = &attributes->l2_mask,
3141 	};
3142 	switch (fdir_filter->action.behavior) {
3143 	case RTE_ETH_FDIR_ACCEPT:
3144 		attributes->actions[0] = (struct rte_flow_action){
3145 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
3146 			.conf = &attributes->queue,
3147 		};
3148 		break;
3149 	case RTE_ETH_FDIR_REJECT:
3150 		attributes->actions[0] = (struct rte_flow_action){
3151 			.type = RTE_FLOW_ACTION_TYPE_DROP,
3152 		};
3153 		break;
3154 	default:
3155 		DRV_LOG(ERR, "port %u invalid behavior %d",
3156 			dev->data->port_id,
3157 			fdir_filter->action.behavior);
3158 		rte_errno = ENOTSUP;
3159 		return -rte_errno;
3160 	}
3161 	attributes->queue.index = fdir_filter->action.rx_queue;
3162 	/* Handle L3. */
3163 	switch (fdir_filter->input.flow_type) {
3164 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3165 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3166 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3167 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3168 			.src_addr = input->flow.ip4_flow.src_ip,
3169 			.dst_addr = input->flow.ip4_flow.dst_ip,
3170 			.time_to_live = input->flow.ip4_flow.ttl,
3171 			.type_of_service = input->flow.ip4_flow.tos,
3172 			.next_proto_id = input->flow.ip4_flow.proto,
3173 		};
3174 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3175 			.src_addr = mask->ipv4_mask.src_ip,
3176 			.dst_addr = mask->ipv4_mask.dst_ip,
3177 			.time_to_live = mask->ipv4_mask.ttl,
3178 			.type_of_service = mask->ipv4_mask.tos,
3179 			.next_proto_id = mask->ipv4_mask.proto,
3180 		};
3181 		attributes->items[1] = (struct rte_flow_item){
3182 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
3183 			.spec = &attributes->l3,
3184 			.mask = &attributes->l3_mask,
3185 		};
3186 		break;
3187 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3188 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3189 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3190 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3191 			.hop_limits = input->flow.ipv6_flow.hop_limits,
3192 			.proto = input->flow.ipv6_flow.proto,
3193 		};
3194 
3195 		memcpy(attributes->l3.ipv6.hdr.src_addr,
3196 		       input->flow.ipv6_flow.src_ip,
3197 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3198 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
3199 		       input->flow.ipv6_flow.dst_ip,
3200 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3201 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3202 		       mask->ipv6_mask.src_ip,
3203 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3204 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3205 		       mask->ipv6_mask.dst_ip,
3206 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3207 		attributes->items[1] = (struct rte_flow_item){
3208 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
3209 			.spec = &attributes->l3,
3210 			.mask = &attributes->l3_mask,
3211 		};
3212 		break;
3213 	default:
3214 		DRV_LOG(ERR, "port %u invalid flow type%d",
3215 			dev->data->port_id, fdir_filter->input.flow_type);
3216 		rte_errno = ENOTSUP;
3217 		return -rte_errno;
3218 	}
3219 	/* Handle L4. */
3220 	switch (fdir_filter->input.flow_type) {
3221 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3222 		attributes->l4.udp.hdr = (struct udp_hdr){
3223 			.src_port = input->flow.udp4_flow.src_port,
3224 			.dst_port = input->flow.udp4_flow.dst_port,
3225 		};
3226 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3227 			.src_port = mask->src_port_mask,
3228 			.dst_port = mask->dst_port_mask,
3229 		};
3230 		attributes->items[2] = (struct rte_flow_item){
3231 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3232 			.spec = &attributes->l4,
3233 			.mask = &attributes->l4_mask,
3234 		};
3235 		break;
3236 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3237 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3238 			.src_port = input->flow.tcp4_flow.src_port,
3239 			.dst_port = input->flow.tcp4_flow.dst_port,
3240 		};
3241 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3242 			.src_port = mask->src_port_mask,
3243 			.dst_port = mask->dst_port_mask,
3244 		};
3245 		attributes->items[2] = (struct rte_flow_item){
3246 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3247 			.spec = &attributes->l4,
3248 			.mask = &attributes->l4_mask,
3249 		};
3250 		break;
3251 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3252 		attributes->l4.udp.hdr = (struct udp_hdr){
3253 			.src_port = input->flow.udp6_flow.src_port,
3254 			.dst_port = input->flow.udp6_flow.dst_port,
3255 		};
3256 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
3257 			.src_port = mask->src_port_mask,
3258 			.dst_port = mask->dst_port_mask,
3259 		};
3260 		attributes->items[2] = (struct rte_flow_item){
3261 			.type = RTE_FLOW_ITEM_TYPE_UDP,
3262 			.spec = &attributes->l4,
3263 			.mask = &attributes->l4_mask,
3264 		};
3265 		break;
3266 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3267 		attributes->l4.tcp.hdr = (struct tcp_hdr){
3268 			.src_port = input->flow.tcp6_flow.src_port,
3269 			.dst_port = input->flow.tcp6_flow.dst_port,
3270 		};
3271 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3272 			.src_port = mask->src_port_mask,
3273 			.dst_port = mask->dst_port_mask,
3274 		};
3275 		attributes->items[2] = (struct rte_flow_item){
3276 			.type = RTE_FLOW_ITEM_TYPE_TCP,
3277 			.spec = &attributes->l4,
3278 			.mask = &attributes->l4_mask,
3279 		};
3280 		break;
3281 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3282 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3283 		break;
3284 	default:
3285 		DRV_LOG(ERR, "port %u invalid flow type%d",
3286 			dev->data->port_id, fdir_filter->input.flow_type);
3287 		rte_errno = ENOTSUP;
3288 		return -rte_errno;
3289 	}
3290 	return 0;
3291 }
3292 
3293 /**
3294  * Add new flow director filter and store it in list.
3295  *
3296  * @param dev
3297  *   Pointer to Ethernet device.
3298  * @param fdir_filter
3299  *   Flow director filter to add.
3300  *
3301  * @return
3302  *   0 on success, a negative errno value otherwise and rte_errno is set.
3303  */
3304 static int
3305 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3306 		     const struct rte_eth_fdir_filter *fdir_filter)
3307 {
3308 	struct priv *priv = dev->data->dev_private;
3309 	struct mlx5_fdir attributes = {
3310 		.attr.group = 0,
3311 		.l2_mask = {
3312 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3313 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3314 			.type = 0,
3315 		},
3316 	};
3317 	struct mlx5_flow_parse parser = {
3318 		.layer = HASH_RXQ_ETH,
3319 	};
3320 	struct rte_flow_error error;
3321 	struct rte_flow *flow;
3322 	int ret;
3323 
3324 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3325 	if (ret)
3326 		return ret;
3327 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3328 				attributes.actions, &error, &parser);
3329 	if (ret)
3330 		return ret;
3331 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3332 				     attributes.items, attributes.actions,
3333 				     &error);
3334 	if (flow) {
3335 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3336 			(void *)flow);
3337 		return 0;
3338 	}
3339 	return -rte_errno;
3340 }
3341 
3342 /**
3343  * Delete specific filter.
3344  *
3345  * @param dev
3346  *   Pointer to Ethernet device.
3347  * @param fdir_filter
3348  *   Filter to be deleted.
3349  *
3350  * @return
3351  *   0 on success, a negative errno value otherwise and rte_errno is set.
3352  */
3353 static int
3354 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3355 			const struct rte_eth_fdir_filter *fdir_filter)
3356 {
3357 	struct priv *priv = dev->data->dev_private;
3358 	struct mlx5_fdir attributes = {
3359 		.attr.group = 0,
3360 	};
3361 	struct mlx5_flow_parse parser = {
3362 		.create = 1,
3363 		.layer = HASH_RXQ_ETH,
3364 	};
3365 	struct rte_flow_error error;
3366 	struct rte_flow *flow;
3367 	unsigned int i;
3368 	int ret;
3369 
3370 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3371 	if (ret)
3372 		return ret;
3373 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3374 				attributes.actions, &error, &parser);
3375 	if (ret)
3376 		goto exit;
3377 	/*
3378 	 * Special case for drop action which is only set in the
3379 	 * specifications when the flow is created.  In this situation the
3380 	 * drop specification is missing.
3381 	 */
3382 	if (parser.drop) {
3383 		struct ibv_flow_spec_action_drop *drop;
3384 
3385 		drop = (void *)((uintptr_t)parser.queue[parser.layer].ibv_attr +
3386 				parser.queue[parser.layer].offset);
3387 		*drop = (struct ibv_flow_spec_action_drop){
3388 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3389 			.size = sizeof(struct ibv_flow_spec_action_drop),
3390 		};
3391 		parser.queue[parser.layer].ibv_attr->num_of_specs++;
3392 	}
3393 	TAILQ_FOREACH(flow, &priv->flows, next) {
3394 		struct ibv_flow_attr *attr;
3395 		struct ibv_spec_header *attr_h;
3396 		void *spec;
3397 		struct ibv_flow_attr *flow_attr;
3398 		struct ibv_spec_header *flow_h;
3399 		void *flow_spec;
3400 		unsigned int specs_n;
3401 
3402 		attr = parser.queue[parser.layer].ibv_attr;
3403 		flow_attr = flow->frxq[parser.layer].ibv_attr;
3404 		/* Compare first the attributes. */
3405 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3406 			continue;
3407 		if (attr->num_of_specs == 0)
3408 			continue;
3409 		spec = (void *)((uintptr_t)attr +
3410 				sizeof(struct ibv_flow_attr));
3411 		flow_spec = (void *)((uintptr_t)flow_attr +
3412 				     sizeof(struct ibv_flow_attr));
3413 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3414 		for (i = 0; i != specs_n; ++i) {
3415 			attr_h = spec;
3416 			flow_h = flow_spec;
3417 			if (memcmp(spec, flow_spec,
3418 				   RTE_MIN(attr_h->size, flow_h->size)))
3419 				goto wrong_flow;
3420 			spec = (void *)((uintptr_t)spec + attr_h->size);
3421 			flow_spec = (void *)((uintptr_t)flow_spec +
3422 					     flow_h->size);
3423 		}
3424 		/* At this point, the flow match. */
3425 		break;
3426 wrong_flow:
3427 		/* The flow does not match. */
3428 		continue;
3429 	}
3430 	ret = rte_errno; /* Save rte_errno before cleanup. */
3431 	if (flow)
3432 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
3433 exit:
3434 	for (i = 0; i != hash_rxq_init_n; ++i) {
3435 		if (parser.queue[i].ibv_attr)
3436 			rte_free(parser.queue[i].ibv_attr);
3437 	}
3438 	rte_errno = ret; /* Restore rte_errno. */
3439 	return -rte_errno;
3440 }
3441 
3442 /**
3443  * Update queue for specific filter.
3444  *
3445  * @param dev
3446  *   Pointer to Ethernet device.
3447  * @param fdir_filter
3448  *   Filter to be updated.
3449  *
3450  * @return
3451  *   0 on success, a negative errno value otherwise and rte_errno is set.
3452  */
3453 static int
3454 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3455 			const struct rte_eth_fdir_filter *fdir_filter)
3456 {
3457 	int ret;
3458 
3459 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3460 	if (ret)
3461 		return ret;
3462 	return mlx5_fdir_filter_add(dev, fdir_filter);
3463 }
3464 
3465 /**
3466  * Flush all filters.
3467  *
3468  * @param dev
3469  *   Pointer to Ethernet device.
3470  */
3471 static void
3472 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3473 {
3474 	struct priv *priv = dev->data->dev_private;
3475 
3476 	mlx5_flow_list_flush(dev, &priv->flows);
3477 }
3478 
3479 /**
3480  * Get flow director information.
3481  *
3482  * @param dev
3483  *   Pointer to Ethernet device.
3484  * @param[out] fdir_info
3485  *   Resulting flow director information.
3486  */
3487 static void
3488 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3489 {
3490 	struct priv *priv = dev->data->dev_private;
3491 	struct rte_eth_fdir_masks *mask =
3492 		&priv->dev->data->dev_conf.fdir_conf.mask;
3493 
3494 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3495 	fdir_info->guarant_spc = 0;
3496 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3497 	fdir_info->max_flexpayload = 0;
3498 	fdir_info->flow_types_mask[0] = 0;
3499 	fdir_info->flex_payload_unit = 0;
3500 	fdir_info->max_flex_payload_segment_num = 0;
3501 	fdir_info->flex_payload_limit = 0;
3502 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3503 }
3504 
3505 /**
3506  * Deal with flow director operations.
3507  *
3508  * @param dev
3509  *   Pointer to Ethernet device.
3510  * @param filter_op
3511  *   Operation to perform.
3512  * @param arg
3513  *   Pointer to operation-specific structure.
3514  *
3515  * @return
3516  *   0 on success, a negative errno value otherwise and rte_errno is set.
3517  */
3518 static int
3519 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3520 		    void *arg)
3521 {
3522 	struct priv *priv = dev->data->dev_private;
3523 	enum rte_fdir_mode fdir_mode =
3524 		priv->dev->data->dev_conf.fdir_conf.mode;
3525 
3526 	if (filter_op == RTE_ETH_FILTER_NOP)
3527 		return 0;
3528 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3529 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3530 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3531 			dev->data->port_id, fdir_mode);
3532 		rte_errno = EINVAL;
3533 		return -rte_errno;
3534 	}
3535 	switch (filter_op) {
3536 	case RTE_ETH_FILTER_ADD:
3537 		return mlx5_fdir_filter_add(dev, arg);
3538 	case RTE_ETH_FILTER_UPDATE:
3539 		return mlx5_fdir_filter_update(dev, arg);
3540 	case RTE_ETH_FILTER_DELETE:
3541 		return mlx5_fdir_filter_delete(dev, arg);
3542 	case RTE_ETH_FILTER_FLUSH:
3543 		mlx5_fdir_filter_flush(dev);
3544 		break;
3545 	case RTE_ETH_FILTER_INFO:
3546 		mlx5_fdir_info_get(dev, arg);
3547 		break;
3548 	default:
3549 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3550 			dev->data->port_id, filter_op);
3551 		rte_errno = EINVAL;
3552 		return -rte_errno;
3553 	}
3554 	return 0;
3555 }
3556 
3557 /**
3558  * Manage filter operations.
3559  *
3560  * @param dev
3561  *   Pointer to Ethernet device structure.
3562  * @param filter_type
3563  *   Filter type.
3564  * @param filter_op
3565  *   Operation to perform.
3566  * @param arg
3567  *   Pointer to operation-specific structure.
3568  *
3569  * @return
3570  *   0 on success, a negative errno value otherwise and rte_errno is set.
3571  */
3572 int
3573 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3574 		     enum rte_filter_type filter_type,
3575 		     enum rte_filter_op filter_op,
3576 		     void *arg)
3577 {
3578 	switch (filter_type) {
3579 	case RTE_ETH_FILTER_GENERIC:
3580 		if (filter_op != RTE_ETH_FILTER_GET) {
3581 			rte_errno = EINVAL;
3582 			return -rte_errno;
3583 		}
3584 		*(const void **)arg = &mlx5_flow_ops;
3585 		return 0;
3586 	case RTE_ETH_FILTER_FDIR:
3587 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3588 	default:
3589 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3590 			dev->data->port_id, filter_type);
3591 		rte_errno = ENOTSUP;
3592 		return -rte_errno;
3593 	}
3594 	return 0;
3595 }
3596 
3597 /**
3598  * Detect number of Verbs flow priorities supported.
3599  *
3600  * @param dev
3601  *   Pointer to Ethernet device.
3602  *
3603  * @return
3604  *   number of supported Verbs flow priority.
3605  */
3606 unsigned int
3607 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3608 {
3609 	struct priv *priv = dev->data->dev_private;
3610 	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3611 	struct {
3612 		struct ibv_flow_attr attr;
3613 		struct ibv_flow_spec_eth eth;
3614 		struct ibv_flow_spec_action_drop drop;
3615 	} flow_attr = {
3616 		.attr = {
3617 			.num_of_specs = 2,
3618 		},
3619 		.eth = {
3620 			.type = IBV_FLOW_SPEC_ETH,
3621 			.size = sizeof(struct ibv_flow_spec_eth),
3622 		},
3623 		.drop = {
3624 			.size = sizeof(struct ibv_flow_spec_action_drop),
3625 			.type = IBV_FLOW_SPEC_ACTION_DROP,
3626 		},
3627 	};
3628 	struct ibv_flow *flow;
3629 
3630 	do {
3631 		flow_attr.attr.priority = verb_priorities - 1;
3632 		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3633 					      &flow_attr.attr);
3634 		if (flow) {
3635 			claim_zero(mlx5_glue->destroy_flow(flow));
3636 			/* Try more priorities. */
3637 			verb_priorities *= 2;
3638 		} else {
3639 			/* Failed, restore last right number. */
3640 			verb_priorities /= 2;
3641 			break;
3642 		}
3643 	} while (1);
3644 	DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3645 		" user flow priorities: %d",
3646 		dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3647 	return verb_priorities;
3648 }
3649