xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 0f4203fe9d18339a3dc75c606481e005670c4ad4)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/queue.h>
35 #include <string.h>
36 
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46 
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51 
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54 
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
57 
58 /* Internet Protocol versions. */
59 #define MLX5_IPV4 4
60 #define MLX5_IPV6 6
61 
62 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
63 struct ibv_counter_set_init_attr {
64 	int dummy;
65 };
66 struct ibv_flow_spec_counter_action {
67 	int dummy;
68 };
69 struct ibv_counter_set {
70 	int dummy;
71 };
72 
73 static inline int
74 ibv_destroy_counter_set(struct ibv_counter_set *cs)
75 {
76 	(void)cs;
77 	return -ENOTSUP;
78 }
79 #endif
80 
81 /* Dev ops structure defined in mlx5.c */
82 extern const struct eth_dev_ops mlx5_dev_ops;
83 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
84 
85 static int
86 mlx5_flow_create_eth(const struct rte_flow_item *item,
87 		     const void *default_mask,
88 		     void *data);
89 
90 static int
91 mlx5_flow_create_vlan(const struct rte_flow_item *item,
92 		      const void *default_mask,
93 		      void *data);
94 
95 static int
96 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
97 		      const void *default_mask,
98 		      void *data);
99 
100 static int
101 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
102 		      const void *default_mask,
103 		      void *data);
104 
105 static int
106 mlx5_flow_create_udp(const struct rte_flow_item *item,
107 		     const void *default_mask,
108 		     void *data);
109 
110 static int
111 mlx5_flow_create_tcp(const struct rte_flow_item *item,
112 		     const void *default_mask,
113 		     void *data);
114 
115 static int
116 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
117 		       const void *default_mask,
118 		       void *data);
119 
120 struct mlx5_flow_parse;
121 
122 static void
123 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
124 		      unsigned int size);
125 
126 static int
127 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
128 
129 static int
130 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
131 
132 /* Hash RX queue types. */
133 enum hash_rxq_type {
134 	HASH_RXQ_TCPV4,
135 	HASH_RXQ_UDPV4,
136 	HASH_RXQ_IPV4,
137 	HASH_RXQ_TCPV6,
138 	HASH_RXQ_UDPV6,
139 	HASH_RXQ_IPV6,
140 	HASH_RXQ_ETH,
141 };
142 
143 /* Initialization data for hash RX queue. */
144 struct hash_rxq_init {
145 	uint64_t hash_fields; /* Fields that participate in the hash. */
146 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
147 	unsigned int flow_priority; /* Flow priority to use. */
148 	unsigned int ip_version; /* Internet protocol. */
149 };
150 
151 /* Initialization data for hash RX queues. */
152 const struct hash_rxq_init hash_rxq_init[] = {
153 	[HASH_RXQ_TCPV4] = {
154 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
155 				IBV_RX_HASH_DST_IPV4 |
156 				IBV_RX_HASH_SRC_PORT_TCP |
157 				IBV_RX_HASH_DST_PORT_TCP),
158 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
159 		.flow_priority = 0,
160 		.ip_version = MLX5_IPV4,
161 	},
162 	[HASH_RXQ_UDPV4] = {
163 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
164 				IBV_RX_HASH_DST_IPV4 |
165 				IBV_RX_HASH_SRC_PORT_UDP |
166 				IBV_RX_HASH_DST_PORT_UDP),
167 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
168 		.flow_priority = 0,
169 		.ip_version = MLX5_IPV4,
170 	},
171 	[HASH_RXQ_IPV4] = {
172 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
173 				IBV_RX_HASH_DST_IPV4),
174 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
175 				ETH_RSS_FRAG_IPV4),
176 		.flow_priority = 1,
177 		.ip_version = MLX5_IPV4,
178 	},
179 	[HASH_RXQ_TCPV6] = {
180 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
181 				IBV_RX_HASH_DST_IPV6 |
182 				IBV_RX_HASH_SRC_PORT_TCP |
183 				IBV_RX_HASH_DST_PORT_TCP),
184 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
185 		.flow_priority = 0,
186 		.ip_version = MLX5_IPV6,
187 	},
188 	[HASH_RXQ_UDPV6] = {
189 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
190 				IBV_RX_HASH_DST_IPV6 |
191 				IBV_RX_HASH_SRC_PORT_UDP |
192 				IBV_RX_HASH_DST_PORT_UDP),
193 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
194 		.flow_priority = 0,
195 		.ip_version = MLX5_IPV6,
196 	},
197 	[HASH_RXQ_IPV6] = {
198 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
199 				IBV_RX_HASH_DST_IPV6),
200 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
201 				ETH_RSS_FRAG_IPV6),
202 		.flow_priority = 1,
203 		.ip_version = MLX5_IPV6,
204 	},
205 	[HASH_RXQ_ETH] = {
206 		.hash_fields = 0,
207 		.dpdk_rss_hf = 0,
208 		.flow_priority = 2,
209 	},
210 };
211 
212 /* Number of entries in hash_rxq_init[]. */
213 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
214 
215 /** Structure for holding counter stats. */
216 struct mlx5_flow_counter_stats {
217 	uint64_t hits; /**< Number of packets matched by the rule. */
218 	uint64_t bytes; /**< Number of bytes matched by the rule. */
219 };
220 
221 /** Structure for Drop queue. */
222 struct mlx5_hrxq_drop {
223 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
224 	struct ibv_qp *qp; /**< Verbs queue pair. */
225 	struct ibv_wq *wq; /**< Verbs work queue. */
226 	struct ibv_cq *cq; /**< Verbs completion queue. */
227 };
228 
229 /* Flows structures. */
230 struct mlx5_flow {
231 	uint64_t hash_fields; /**< Fields that participate in the hash. */
232 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
233 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
234 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
235 };
236 
237 /* Drop flows structures. */
238 struct mlx5_flow_drop {
239 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
240 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
241 };
242 
243 struct rte_flow {
244 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
245 	uint32_t mark:1; /**< Set if the flow is marked. */
246 	uint32_t drop:1; /**< Drop queue. */
247 	uint16_t queues_n; /**< Number of entries in queue[]. */
248 	uint16_t (*queues)[]; /**< Queues indexes to use. */
249 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
250 	uint8_t rss_key[40]; /**< copy of the RSS key. */
251 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
252 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
253 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
254 	/**< Flow with Rx queue. */
255 };
256 
257 /** Static initializer for items. */
258 #define ITEMS(...) \
259 	(const enum rte_flow_item_type []){ \
260 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
261 	}
262 
263 /** Structure to generate a simple graph of layers supported by the NIC. */
264 struct mlx5_flow_items {
265 	/** List of possible actions for these items. */
266 	const enum rte_flow_action_type *const actions;
267 	/** Bit-masks corresponding to the possibilities for the item. */
268 	const void *mask;
269 	/**
270 	 * Default bit-masks to use when item->mask is not provided. When
271 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
272 	 * used instead.
273 	 */
274 	const void *default_mask;
275 	/** Bit-masks size in bytes. */
276 	const unsigned int mask_sz;
277 	/**
278 	 * Conversion function from rte_flow to NIC specific flow.
279 	 *
280 	 * @param item
281 	 *   rte_flow item to convert.
282 	 * @param default_mask
283 	 *   Default bit-masks to use when item->mask is not provided.
284 	 * @param data
285 	 *   Internal structure to store the conversion.
286 	 *
287 	 * @return
288 	 *   0 on success, negative value otherwise.
289 	 */
290 	int (*convert)(const struct rte_flow_item *item,
291 		       const void *default_mask,
292 		       void *data);
293 	/** Size in bytes of the destination structure. */
294 	const unsigned int dst_sz;
295 	/** List of possible following items.  */
296 	const enum rte_flow_item_type *const items;
297 };
298 
299 /** Valid action for this PMD. */
300 static const enum rte_flow_action_type valid_actions[] = {
301 	RTE_FLOW_ACTION_TYPE_DROP,
302 	RTE_FLOW_ACTION_TYPE_QUEUE,
303 	RTE_FLOW_ACTION_TYPE_MARK,
304 	RTE_FLOW_ACTION_TYPE_FLAG,
305 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
306 	RTE_FLOW_ACTION_TYPE_COUNT,
307 #endif
308 	RTE_FLOW_ACTION_TYPE_END,
309 };
310 
311 /** Graph of supported items and associated actions. */
312 static const struct mlx5_flow_items mlx5_flow_items[] = {
313 	[RTE_FLOW_ITEM_TYPE_END] = {
314 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
315 			       RTE_FLOW_ITEM_TYPE_VXLAN),
316 	},
317 	[RTE_FLOW_ITEM_TYPE_ETH] = {
318 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
319 			       RTE_FLOW_ITEM_TYPE_IPV4,
320 			       RTE_FLOW_ITEM_TYPE_IPV6),
321 		.actions = valid_actions,
322 		.mask = &(const struct rte_flow_item_eth){
323 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
324 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
325 			.type = -1,
326 		},
327 		.default_mask = &rte_flow_item_eth_mask,
328 		.mask_sz = sizeof(struct rte_flow_item_eth),
329 		.convert = mlx5_flow_create_eth,
330 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
331 	},
332 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
333 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
334 			       RTE_FLOW_ITEM_TYPE_IPV6),
335 		.actions = valid_actions,
336 		.mask = &(const struct rte_flow_item_vlan){
337 			.tci = -1,
338 		},
339 		.default_mask = &rte_flow_item_vlan_mask,
340 		.mask_sz = sizeof(struct rte_flow_item_vlan),
341 		.convert = mlx5_flow_create_vlan,
342 		.dst_sz = 0,
343 	},
344 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
345 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
346 			       RTE_FLOW_ITEM_TYPE_TCP),
347 		.actions = valid_actions,
348 		.mask = &(const struct rte_flow_item_ipv4){
349 			.hdr = {
350 				.src_addr = -1,
351 				.dst_addr = -1,
352 				.type_of_service = -1,
353 				.next_proto_id = -1,
354 			},
355 		},
356 		.default_mask = &rte_flow_item_ipv4_mask,
357 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
358 		.convert = mlx5_flow_create_ipv4,
359 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
360 	},
361 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
362 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
363 			       RTE_FLOW_ITEM_TYPE_TCP),
364 		.actions = valid_actions,
365 		.mask = &(const struct rte_flow_item_ipv6){
366 			.hdr = {
367 				.src_addr = {
368 					0xff, 0xff, 0xff, 0xff,
369 					0xff, 0xff, 0xff, 0xff,
370 					0xff, 0xff, 0xff, 0xff,
371 					0xff, 0xff, 0xff, 0xff,
372 				},
373 				.dst_addr = {
374 					0xff, 0xff, 0xff, 0xff,
375 					0xff, 0xff, 0xff, 0xff,
376 					0xff, 0xff, 0xff, 0xff,
377 					0xff, 0xff, 0xff, 0xff,
378 				},
379 				.vtc_flow = -1,
380 				.proto = -1,
381 				.hop_limits = -1,
382 			},
383 		},
384 		.default_mask = &rte_flow_item_ipv6_mask,
385 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
386 		.convert = mlx5_flow_create_ipv6,
387 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
388 	},
389 	[RTE_FLOW_ITEM_TYPE_UDP] = {
390 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
391 		.actions = valid_actions,
392 		.mask = &(const struct rte_flow_item_udp){
393 			.hdr = {
394 				.src_port = -1,
395 				.dst_port = -1,
396 			},
397 		},
398 		.default_mask = &rte_flow_item_udp_mask,
399 		.mask_sz = sizeof(struct rte_flow_item_udp),
400 		.convert = mlx5_flow_create_udp,
401 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
402 	},
403 	[RTE_FLOW_ITEM_TYPE_TCP] = {
404 		.actions = valid_actions,
405 		.mask = &(const struct rte_flow_item_tcp){
406 			.hdr = {
407 				.src_port = -1,
408 				.dst_port = -1,
409 			},
410 		},
411 		.default_mask = &rte_flow_item_tcp_mask,
412 		.mask_sz = sizeof(struct rte_flow_item_tcp),
413 		.convert = mlx5_flow_create_tcp,
414 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415 	},
416 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
417 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
418 		.actions = valid_actions,
419 		.mask = &(const struct rte_flow_item_vxlan){
420 			.vni = "\xff\xff\xff",
421 		},
422 		.default_mask = &rte_flow_item_vxlan_mask,
423 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
424 		.convert = mlx5_flow_create_vxlan,
425 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
426 	},
427 };
428 
429 /** Structure to pass to the conversion function. */
430 struct mlx5_flow_parse {
431 	uint32_t inner; /**< Set once VXLAN is encountered. */
432 	uint32_t create:1;
433 	/**< Whether resources should remain after a validate. */
434 	uint32_t drop:1; /**< Target is a drop queue. */
435 	uint32_t mark:1; /**< Mark is present in the flow. */
436 	uint32_t count:1; /**< Count is present in the flow. */
437 	uint32_t mark_id; /**< Mark identifier. */
438 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
439 	uint16_t queues_n; /**< Number of entries in queue[]. */
440 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
441 	uint8_t rss_key[40]; /**< copy of the RSS key. */
442 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
443 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
444 	struct {
445 		struct ibv_flow_attr *ibv_attr;
446 		/**< Pointer to Verbs attributes. */
447 		unsigned int offset;
448 		/**< Current position or total size of the attribute. */
449 	} queue[RTE_DIM(hash_rxq_init)];
450 };
451 
452 static const struct rte_flow_ops mlx5_flow_ops = {
453 	.validate = mlx5_flow_validate,
454 	.create = mlx5_flow_create,
455 	.destroy = mlx5_flow_destroy,
456 	.flush = mlx5_flow_flush,
457 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
458 	.query = mlx5_flow_query,
459 #else
460 	.query = NULL,
461 #endif
462 	.isolate = mlx5_flow_isolate,
463 };
464 
465 /* Convert FDIR request to Generic flow. */
466 struct mlx5_fdir {
467 	struct rte_flow_attr attr;
468 	struct rte_flow_action actions[2];
469 	struct rte_flow_item items[4];
470 	struct rte_flow_item_eth l2;
471 	struct rte_flow_item_eth l2_mask;
472 	union {
473 		struct rte_flow_item_ipv4 ipv4;
474 		struct rte_flow_item_ipv6 ipv6;
475 	} l3;
476 	union {
477 		struct rte_flow_item_udp udp;
478 		struct rte_flow_item_tcp tcp;
479 	} l4;
480 	struct rte_flow_action_queue queue;
481 };
482 
483 /* Verbs specification header. */
484 struct ibv_spec_header {
485 	enum ibv_flow_spec_type type;
486 	uint16_t size;
487 };
488 
489 /**
490  * Check support for a given item.
491  *
492  * @param item[in]
493  *   Item specification.
494  * @param mask[in]
495  *   Bit-masks covering supported fields to compare with spec, last and mask in
496  *   \item.
497  * @param size
498  *   Bit-Mask size in bytes.
499  *
500  * @return
501  *   0 on success.
502  */
503 static int
504 mlx5_flow_item_validate(const struct rte_flow_item *item,
505 			const uint8_t *mask, unsigned int size)
506 {
507 	int ret = 0;
508 
509 	if (!item->spec && (item->mask || item->last))
510 		return -1;
511 	if (item->spec && !item->mask) {
512 		unsigned int i;
513 		const uint8_t *spec = item->spec;
514 
515 		for (i = 0; i < size; ++i)
516 			if ((spec[i] | mask[i]) != mask[i])
517 				return -1;
518 	}
519 	if (item->last && !item->mask) {
520 		unsigned int i;
521 		const uint8_t *spec = item->last;
522 
523 		for (i = 0; i < size; ++i)
524 			if ((spec[i] | mask[i]) != mask[i])
525 				return -1;
526 	}
527 	if (item->mask) {
528 		unsigned int i;
529 		const uint8_t *spec = item->mask;
530 
531 		for (i = 0; i < size; ++i)
532 			if ((spec[i] | mask[i]) != mask[i])
533 				return -1;
534 	}
535 	if (item->spec && item->last) {
536 		uint8_t spec[size];
537 		uint8_t last[size];
538 		const uint8_t *apply = mask;
539 		unsigned int i;
540 
541 		if (item->mask)
542 			apply = item->mask;
543 		for (i = 0; i < size; ++i) {
544 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
545 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
546 		}
547 		ret = memcmp(spec, last, size);
548 	}
549 	return ret;
550 }
551 
552 /**
553  * Copy the RSS configuration from the user ones.
554  *
555  * @param priv
556  *   Pointer to private structure.
557  * @param parser
558  *   Internal parser structure.
559  * @param rss_conf
560  *   User RSS configuration to save.
561  *
562  * @return
563  *   0 on success, errno value on failure.
564  */
565 static int
566 priv_flow_convert_rss_conf(struct priv *priv,
567 			   struct mlx5_flow_parse *parser,
568 			   const struct rte_eth_rss_conf *rss_conf)
569 {
570 	const struct rte_eth_rss_conf *rss =
571 		rss_conf ? rss_conf : &priv->rss_conf;
572 
573 	if (rss->rss_key_len > 40)
574 		return EINVAL;
575 	parser->rss_conf.rss_key_len = rss->rss_key_len;
576 	parser->rss_conf.rss_hf = rss->rss_hf;
577 	memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
578 	parser->rss_conf.rss_key = parser->rss_key;
579 	return 0;
580 }
581 
582 /**
583  * Extract attribute to the parser.
584  *
585  * @param priv
586  *   Pointer to private structure.
587  * @param[in] attr
588  *   Flow rule attributes.
589  * @param[out] error
590  *   Perform verbose error reporting if not NULL.
591  * @param[in, out] parser
592  *   Internal parser structure.
593  *
594  * @return
595  *   0 on success, a negative errno value otherwise and rte_errno is set.
596  */
597 static int
598 priv_flow_convert_attributes(struct priv *priv,
599 			     const struct rte_flow_attr *attr,
600 			     struct rte_flow_error *error,
601 			     struct mlx5_flow_parse *parser)
602 {
603 	(void)priv;
604 	(void)parser;
605 	if (attr->group) {
606 		rte_flow_error_set(error, ENOTSUP,
607 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
608 				   NULL,
609 				   "groups are not supported");
610 		return -rte_errno;
611 	}
612 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
613 		rte_flow_error_set(error, ENOTSUP,
614 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
615 				   NULL,
616 				   "priorities are not supported");
617 		return -rte_errno;
618 	}
619 	if (attr->egress) {
620 		rte_flow_error_set(error, ENOTSUP,
621 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
622 				   NULL,
623 				   "egress is not supported");
624 		return -rte_errno;
625 	}
626 	if (!attr->ingress) {
627 		rte_flow_error_set(error, ENOTSUP,
628 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
629 				   NULL,
630 				   "only ingress is supported");
631 		return -rte_errno;
632 	}
633 	return 0;
634 }
635 
636 /**
637  * Extract actions request to the parser.
638  *
639  * @param priv
640  *   Pointer to private structure.
641  * @param[in] actions
642  *   Associated actions (list terminated by the END action).
643  * @param[out] error
644  *   Perform verbose error reporting if not NULL.
645  * @param[in, out] parser
646  *   Internal parser structure.
647  *
648  * @return
649  *   0 on success, a negative errno value otherwise and rte_errno is set.
650  */
651 static int
652 priv_flow_convert_actions(struct priv *priv,
653 			  const struct rte_flow_action actions[],
654 			  struct rte_flow_error *error,
655 			  struct mlx5_flow_parse *parser)
656 {
657 	/*
658 	 * Add default RSS configuration necessary for Verbs to create QP even
659 	 * if no RSS is necessary.
660 	 */
661 	priv_flow_convert_rss_conf(priv, parser,
662 				   (const struct rte_eth_rss_conf *)
663 				   &priv->rss_conf);
664 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
665 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
666 			continue;
667 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
668 			parser->drop = 1;
669 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
670 			const struct rte_flow_action_queue *queue =
671 				(const struct rte_flow_action_queue *)
672 				actions->conf;
673 			uint16_t n;
674 			uint16_t found = 0;
675 
676 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
677 				goto exit_action_not_supported;
678 			for (n = 0; n < parser->queues_n; ++n) {
679 				if (parser->queues[n] == queue->index) {
680 					found = 1;
681 					break;
682 				}
683 			}
684 			if (parser->queues_n > 1 && !found) {
685 				rte_flow_error_set(error, ENOTSUP,
686 					   RTE_FLOW_ERROR_TYPE_ACTION,
687 					   actions,
688 					   "queue action not in RSS queues");
689 				return -rte_errno;
690 			}
691 			if (!found) {
692 				parser->queues_n = 1;
693 				parser->queues[0] = queue->index;
694 			}
695 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
696 			const struct rte_flow_action_rss *rss =
697 				(const struct rte_flow_action_rss *)
698 				actions->conf;
699 			uint16_t n;
700 
701 			if (!rss || !rss->num) {
702 				rte_flow_error_set(error, EINVAL,
703 						   RTE_FLOW_ERROR_TYPE_ACTION,
704 						   actions,
705 						   "no valid queues");
706 				return -rte_errno;
707 			}
708 			if (parser->queues_n == 1) {
709 				uint16_t found = 0;
710 
711 				assert(parser->queues_n);
712 				for (n = 0; n < rss->num; ++n) {
713 					if (parser->queues[0] ==
714 					    rss->queue[n]) {
715 						found = 1;
716 						break;
717 					}
718 				}
719 				if (!found) {
720 					rte_flow_error_set(error, ENOTSUP,
721 						   RTE_FLOW_ERROR_TYPE_ACTION,
722 						   actions,
723 						   "queue action not in RSS"
724 						   " queues");
725 					return -rte_errno;
726 				}
727 			}
728 			for (n = 0; n < rss->num; ++n) {
729 				if (rss->queue[n] >= priv->rxqs_n) {
730 					rte_flow_error_set(error, EINVAL,
731 						   RTE_FLOW_ERROR_TYPE_ACTION,
732 						   actions,
733 						   "queue id > number of"
734 						   " queues");
735 					return -rte_errno;
736 				}
737 			}
738 			for (n = 0; n < rss->num; ++n)
739 				parser->queues[n] = rss->queue[n];
740 			parser->queues_n = rss->num;
741 			if (priv_flow_convert_rss_conf(priv, parser,
742 						       rss->rss_conf)) {
743 				rte_flow_error_set(error, EINVAL,
744 						   RTE_FLOW_ERROR_TYPE_ACTION,
745 						   actions,
746 						   "wrong RSS configuration");
747 				return -rte_errno;
748 			}
749 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
750 			const struct rte_flow_action_mark *mark =
751 				(const struct rte_flow_action_mark *)
752 				actions->conf;
753 
754 			if (!mark) {
755 				rte_flow_error_set(error, EINVAL,
756 						   RTE_FLOW_ERROR_TYPE_ACTION,
757 						   actions,
758 						   "mark must be defined");
759 				return -rte_errno;
760 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
761 				rte_flow_error_set(error, ENOTSUP,
762 						   RTE_FLOW_ERROR_TYPE_ACTION,
763 						   actions,
764 						   "mark must be between 0"
765 						   " and 16777199");
766 				return -rte_errno;
767 			}
768 			parser->mark = 1;
769 			parser->mark_id = mark->id;
770 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
771 			parser->mark = 1;
772 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
773 			   priv->counter_set_supported) {
774 			parser->count = 1;
775 		} else {
776 			goto exit_action_not_supported;
777 		}
778 	}
779 	if (parser->drop && parser->mark)
780 		parser->mark = 0;
781 	if (!parser->queues_n && !parser->drop) {
782 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
783 				   NULL, "no valid action");
784 		return -rte_errno;
785 	}
786 	return 0;
787 exit_action_not_supported:
788 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
789 			   actions, "action not supported");
790 	return -rte_errno;
791 }
792 
793 /**
794  * Validate items.
795  *
796  * @param priv
797  *   Pointer to private structure.
798  * @param[in] items
799  *   Pattern specification (list terminated by the END pattern item).
800  * @param[out] error
801  *   Perform verbose error reporting if not NULL.
802  * @param[in, out] parser
803  *   Internal parser structure.
804  *
805  * @return
806  *   0 on success, a negative errno value otherwise and rte_errno is set.
807  */
808 static int
809 priv_flow_convert_items_validate(struct priv *priv,
810 				 const struct rte_flow_item items[],
811 				 struct rte_flow_error *error,
812 				 struct mlx5_flow_parse *parser)
813 {
814 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
815 	unsigned int i;
816 
817 	(void)priv;
818 	/* Initialise the offsets to start after verbs attribute. */
819 	for (i = 0; i != hash_rxq_init_n; ++i)
820 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
821 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
822 		const struct mlx5_flow_items *token = NULL;
823 		unsigned int n;
824 		int err;
825 
826 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
827 			continue;
828 		for (i = 0;
829 		     cur_item->items &&
830 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
831 		     ++i) {
832 			if (cur_item->items[i] == items->type) {
833 				token = &mlx5_flow_items[items->type];
834 				break;
835 			}
836 		}
837 		if (!token)
838 			goto exit_item_not_supported;
839 		cur_item = token;
840 		err = mlx5_flow_item_validate(items,
841 					      (const uint8_t *)cur_item->mask,
842 					      cur_item->mask_sz);
843 		if (err)
844 			goto exit_item_not_supported;
845 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
846 			if (parser->inner) {
847 				rte_flow_error_set(error, ENOTSUP,
848 						   RTE_FLOW_ERROR_TYPE_ITEM,
849 						   items,
850 						   "cannot recognize multiple"
851 						   " VXLAN encapsulations");
852 				return -rte_errno;
853 			}
854 			parser->inner = IBV_FLOW_SPEC_INNER;
855 		}
856 		if (parser->drop || parser->queues_n == 1) {
857 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
858 		} else {
859 			for (n = 0; n != hash_rxq_init_n; ++n)
860 				parser->queue[n].offset += cur_item->dst_sz;
861 		}
862 	}
863 	if (parser->mark) {
864 		for (i = 0; i != hash_rxq_init_n; ++i)
865 			parser->queue[i].offset +=
866 				sizeof(struct ibv_flow_spec_action_tag);
867 	}
868 	if (parser->count) {
869 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
870 
871 		for (i = 0; i != hash_rxq_init_n; ++i)
872 			parser->queue[i].offset += size;
873 	}
874 	return 0;
875 exit_item_not_supported:
876 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
877 			   items, "item not supported");
878 	return -rte_errno;
879 }
880 
881 /**
882  * Allocate memory space to store verbs flow attributes.
883  *
884  * @param priv
885  *   Pointer to private structure.
886  * @param[in] priority
887  *   Flow priority.
888  * @param[in] size
889  *   Amount of byte to allocate.
890  * @param[out] error
891  *   Perform verbose error reporting if not NULL.
892  *
893  * @return
894  *   A verbs flow attribute on success, NULL otherwise.
895  */
896 static struct ibv_flow_attr*
897 priv_flow_convert_allocate(struct priv *priv,
898 			   unsigned int priority,
899 			   unsigned int size,
900 			   struct rte_flow_error *error)
901 {
902 	struct ibv_flow_attr *ibv_attr;
903 
904 	(void)priv;
905 	ibv_attr = rte_calloc(__func__, 1, size, 0);
906 	if (!ibv_attr) {
907 		rte_flow_error_set(error, ENOMEM,
908 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
909 				   NULL,
910 				   "cannot allocate verbs spec attributes.");
911 		return NULL;
912 	}
913 	ibv_attr->priority = priority;
914 	return ibv_attr;
915 }
916 
917 /**
918  * Finalise verbs flow attributes.
919  *
920  * @param priv
921  *   Pointer to private structure.
922  * @param[in, out] parser
923  *   Internal parser structure.
924  */
925 static void
926 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
927 {
928 	const unsigned int ipv4 =
929 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
930 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
931 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
932 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
933 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
934 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
935 	unsigned int i;
936 
937 	(void)priv;
938 	if (parser->layer == HASH_RXQ_ETH) {
939 		goto fill;
940 	} else {
941 		/*
942 		 * This layer becomes useless as the pattern define under
943 		 * layers.
944 		 */
945 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
946 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
947 	}
948 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
949 	for (i = ohmin; i != (ohmax + 1); ++i) {
950 		if (!parser->queue[i].ibv_attr)
951 			continue;
952 		rte_free(parser->queue[i].ibv_attr);
953 		parser->queue[i].ibv_attr = NULL;
954 	}
955 	/* Remove impossible flow according to the RSS configuration. */
956 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
957 	    parser->rss_conf.rss_hf) {
958 		/* Remove any other flow. */
959 		for (i = hmin; i != (hmax + 1); ++i) {
960 			if ((i == parser->layer) ||
961 			     (!parser->queue[i].ibv_attr))
962 				continue;
963 			rte_free(parser->queue[i].ibv_attr);
964 			parser->queue[i].ibv_attr = NULL;
965 		}
966 	} else  if (!parser->queue[ip].ibv_attr) {
967 		/* no RSS possible with the current configuration. */
968 		parser->queues_n = 1;
969 		return;
970 	}
971 fill:
972 	/*
973 	 * Fill missing layers in verbs specifications, or compute the correct
974 	 * offset to allocate the memory space for the attributes and
975 	 * specifications.
976 	 */
977 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
978 		union {
979 			struct ibv_flow_spec_ipv4_ext ipv4;
980 			struct ibv_flow_spec_ipv6 ipv6;
981 			struct ibv_flow_spec_tcp_udp udp_tcp;
982 		} specs;
983 		void *dst;
984 		uint16_t size;
985 
986 		if (i == parser->layer)
987 			continue;
988 		if (parser->layer == HASH_RXQ_ETH) {
989 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
990 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
991 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
992 					.type = IBV_FLOW_SPEC_IPV4_EXT,
993 					.size = size,
994 				};
995 			} else {
996 				size = sizeof(struct ibv_flow_spec_ipv6);
997 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
998 					.type = IBV_FLOW_SPEC_IPV6,
999 					.size = size,
1000 				};
1001 			}
1002 			if (parser->queue[i].ibv_attr) {
1003 				dst = (void *)((uintptr_t)
1004 					       parser->queue[i].ibv_attr +
1005 					       parser->queue[i].offset);
1006 				memcpy(dst, &specs, size);
1007 				++parser->queue[i].ibv_attr->num_of_specs;
1008 			}
1009 			parser->queue[i].offset += size;
1010 		}
1011 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1012 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1013 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1014 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1015 				.type = ((i == HASH_RXQ_UDPV4 ||
1016 					  i == HASH_RXQ_UDPV6) ?
1017 					 IBV_FLOW_SPEC_UDP :
1018 					 IBV_FLOW_SPEC_TCP),
1019 				.size = size,
1020 			};
1021 			if (parser->queue[i].ibv_attr) {
1022 				dst = (void *)((uintptr_t)
1023 					       parser->queue[i].ibv_attr +
1024 					       parser->queue[i].offset);
1025 				memcpy(dst, &specs, size);
1026 				++parser->queue[i].ibv_attr->num_of_specs;
1027 			}
1028 			parser->queue[i].offset += size;
1029 		}
1030 	}
1031 }
1032 
1033 /**
1034  * Validate and convert a flow supported by the NIC.
1035  *
1036  * @param priv
1037  *   Pointer to private structure.
1038  * @param[in] attr
1039  *   Flow rule attributes.
1040  * @param[in] pattern
1041  *   Pattern specification (list terminated by the END pattern item).
1042  * @param[in] actions
1043  *   Associated actions (list terminated by the END action).
1044  * @param[out] error
1045  *   Perform verbose error reporting if not NULL.
1046  * @param[in, out] parser
1047  *   Internal parser structure.
1048  *
1049  * @return
1050  *   0 on success, a negative errno value otherwise and rte_errno is set.
1051  */
1052 static int
1053 priv_flow_convert(struct priv *priv,
1054 		  const struct rte_flow_attr *attr,
1055 		  const struct rte_flow_item items[],
1056 		  const struct rte_flow_action actions[],
1057 		  struct rte_flow_error *error,
1058 		  struct mlx5_flow_parse *parser)
1059 {
1060 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1061 	unsigned int i;
1062 	int ret;
1063 
1064 	/* First step. Validate the attributes, items and actions. */
1065 	*parser = (struct mlx5_flow_parse){
1066 		.create = parser->create,
1067 		.layer = HASH_RXQ_ETH,
1068 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1069 	};
1070 	ret = priv_flow_convert_attributes(priv, attr, error, parser);
1071 	if (ret)
1072 		return ret;
1073 	ret = priv_flow_convert_actions(priv, actions, error, parser);
1074 	if (ret)
1075 		return ret;
1076 	ret = priv_flow_convert_items_validate(priv, items, error, parser);
1077 	if (ret)
1078 		return ret;
1079 	priv_flow_convert_finalise(priv, parser);
1080 	/*
1081 	 * Second step.
1082 	 * Allocate the memory space to store verbs specifications.
1083 	 */
1084 	if (parser->drop || parser->queues_n == 1) {
1085 		unsigned int priority =
1086 			attr->priority +
1087 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1088 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1089 
1090 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1091 			priv_flow_convert_allocate(priv, priority,
1092 						   offset, error);
1093 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1094 			return ENOMEM;
1095 		parser->queue[HASH_RXQ_ETH].offset =
1096 			sizeof(struct ibv_flow_attr);
1097 	} else {
1098 		for (i = 0; i != hash_rxq_init_n; ++i) {
1099 			unsigned int priority =
1100 				attr->priority +
1101 				hash_rxq_init[i].flow_priority;
1102 			unsigned int offset;
1103 
1104 			if (!(parser->rss_conf.rss_hf &
1105 			      hash_rxq_init[i].dpdk_rss_hf) &&
1106 			    (i != HASH_RXQ_ETH))
1107 				continue;
1108 			offset = parser->queue[i].offset;
1109 			parser->queue[i].ibv_attr =
1110 				priv_flow_convert_allocate(priv, priority,
1111 							   offset, error);
1112 			if (!parser->queue[i].ibv_attr)
1113 				goto exit_enomem;
1114 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1115 		}
1116 	}
1117 	/* Third step. Conversion parse, fill the specifications. */
1118 	parser->inner = 0;
1119 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1120 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1121 			continue;
1122 		cur_item = &mlx5_flow_items[items->type];
1123 		ret = cur_item->convert(items,
1124 					(cur_item->default_mask ?
1125 					 cur_item->default_mask :
1126 					 cur_item->mask),
1127 					parser);
1128 		if (ret) {
1129 			rte_flow_error_set(error, ret,
1130 					   RTE_FLOW_ERROR_TYPE_ITEM,
1131 					   items, "item not supported");
1132 			goto exit_free;
1133 		}
1134 	}
1135 	if (parser->mark)
1136 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1137 	if (parser->count && parser->create) {
1138 		mlx5_flow_create_count(priv, parser);
1139 		if (!parser->cs)
1140 			goto exit_count_error;
1141 	}
1142 	/*
1143 	 * Last step. Complete missing specification to reach the RSS
1144 	 * configuration.
1145 	 */
1146 	if (parser->queues_n > 1) {
1147 		priv_flow_convert_finalise(priv, parser);
1148 	} else {
1149 		/*
1150 		 * Action queue have their priority overridden with
1151 		 * Ethernet priority, this priority needs to be adjusted to
1152 		 * their most specific layer priority.
1153 		 */
1154 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1155 			attr->priority +
1156 			hash_rxq_init[parser->layer].flow_priority;
1157 	}
1158 exit_free:
1159 	/* Only verification is expected, all resources should be released. */
1160 	if (!parser->create) {
1161 		for (i = 0; i != hash_rxq_init_n; ++i) {
1162 			if (parser->queue[i].ibv_attr) {
1163 				rte_free(parser->queue[i].ibv_attr);
1164 				parser->queue[i].ibv_attr = NULL;
1165 			}
1166 		}
1167 	}
1168 	return ret;
1169 exit_enomem:
1170 	for (i = 0; i != hash_rxq_init_n; ++i) {
1171 		if (parser->queue[i].ibv_attr) {
1172 			rte_free(parser->queue[i].ibv_attr);
1173 			parser->queue[i].ibv_attr = NULL;
1174 		}
1175 	}
1176 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1177 			   NULL, "cannot allocate verbs spec attributes.");
1178 	return ret;
1179 exit_count_error:
1180 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1181 			   NULL, "cannot create counter.");
1182 	return rte_errno;
1183 }
1184 
1185 /**
1186  * Copy the specification created into the flow.
1187  *
1188  * @param parser
1189  *   Internal parser structure.
1190  * @param src
1191  *   Create specification.
1192  * @param size
1193  *   Size in bytes of the specification to copy.
1194  */
1195 static void
1196 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1197 		      unsigned int size)
1198 {
1199 	unsigned int i;
1200 	void *dst;
1201 
1202 	for (i = 0; i != hash_rxq_init_n; ++i) {
1203 		if (!parser->queue[i].ibv_attr)
1204 			continue;
1205 		/* Specification must be the same l3 type or none. */
1206 		if (parser->layer == HASH_RXQ_ETH ||
1207 		    (hash_rxq_init[parser->layer].ip_version ==
1208 		     hash_rxq_init[i].ip_version) ||
1209 		    (hash_rxq_init[i].ip_version == 0)) {
1210 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1211 					parser->queue[i].offset);
1212 			memcpy(dst, src, size);
1213 			++parser->queue[i].ibv_attr->num_of_specs;
1214 			parser->queue[i].offset += size;
1215 		}
1216 	}
1217 }
1218 
1219 /**
1220  * Convert Ethernet item to Verbs specification.
1221  *
1222  * @param item[in]
1223  *   Item specification.
1224  * @param default_mask[in]
1225  *   Default bit-masks to use when item->mask is not provided.
1226  * @param data[in, out]
1227  *   User structure.
1228  */
1229 static int
1230 mlx5_flow_create_eth(const struct rte_flow_item *item,
1231 		     const void *default_mask,
1232 		     void *data)
1233 {
1234 	const struct rte_flow_item_eth *spec = item->spec;
1235 	const struct rte_flow_item_eth *mask = item->mask;
1236 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1237 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1238 	struct ibv_flow_spec_eth eth = {
1239 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1240 		.size = eth_size,
1241 	};
1242 
1243 	/* Don't update layer for the inner pattern. */
1244 	if (!parser->inner)
1245 		parser->layer = HASH_RXQ_ETH;
1246 	if (spec) {
1247 		unsigned int i;
1248 
1249 		if (!mask)
1250 			mask = default_mask;
1251 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1252 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1253 		eth.val.ether_type = spec->type;
1254 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1255 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1256 		eth.mask.ether_type = mask->type;
1257 		/* Remove unwanted bits from values. */
1258 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1259 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1260 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1261 		}
1262 		eth.val.ether_type &= eth.mask.ether_type;
1263 	}
1264 	mlx5_flow_create_copy(parser, &eth, eth_size);
1265 	return 0;
1266 }
1267 
1268 /**
1269  * Convert VLAN item to Verbs specification.
1270  *
1271  * @param item[in]
1272  *   Item specification.
1273  * @param default_mask[in]
1274  *   Default bit-masks to use when item->mask is not provided.
1275  * @param data[in, out]
1276  *   User structure.
1277  */
1278 static int
1279 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1280 		      const void *default_mask,
1281 		      void *data)
1282 {
1283 	const struct rte_flow_item_vlan *spec = item->spec;
1284 	const struct rte_flow_item_vlan *mask = item->mask;
1285 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1286 	struct ibv_flow_spec_eth *eth;
1287 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1288 
1289 	if (spec) {
1290 		unsigned int i;
1291 		if (!mask)
1292 			mask = default_mask;
1293 
1294 		for (i = 0; i != hash_rxq_init_n; ++i) {
1295 			if (!parser->queue[i].ibv_attr)
1296 				continue;
1297 
1298 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1299 				       parser->queue[i].offset - eth_size);
1300 			eth->val.vlan_tag = spec->tci;
1301 			eth->mask.vlan_tag = mask->tci;
1302 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1303 		}
1304 	}
1305 	return 0;
1306 }
1307 
1308 /**
1309  * Convert IPv4 item to Verbs specification.
1310  *
1311  * @param item[in]
1312  *   Item specification.
1313  * @param default_mask[in]
1314  *   Default bit-masks to use when item->mask is not provided.
1315  * @param data[in, out]
1316  *   User structure.
1317  */
1318 static int
1319 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1320 		      const void *default_mask,
1321 		      void *data)
1322 {
1323 	const struct rte_flow_item_ipv4 *spec = item->spec;
1324 	const struct rte_flow_item_ipv4 *mask = item->mask;
1325 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1326 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1327 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1328 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1329 		.size = ipv4_size,
1330 	};
1331 
1332 	/* Don't update layer for the inner pattern. */
1333 	if (!parser->inner)
1334 		parser->layer = HASH_RXQ_IPV4;
1335 	if (spec) {
1336 		if (!mask)
1337 			mask = default_mask;
1338 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1339 			.src_ip = spec->hdr.src_addr,
1340 			.dst_ip = spec->hdr.dst_addr,
1341 			.proto = spec->hdr.next_proto_id,
1342 			.tos = spec->hdr.type_of_service,
1343 		};
1344 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1345 			.src_ip = mask->hdr.src_addr,
1346 			.dst_ip = mask->hdr.dst_addr,
1347 			.proto = mask->hdr.next_proto_id,
1348 			.tos = mask->hdr.type_of_service,
1349 		};
1350 		/* Remove unwanted bits from values. */
1351 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1352 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1353 		ipv4.val.proto &= ipv4.mask.proto;
1354 		ipv4.val.tos &= ipv4.mask.tos;
1355 	}
1356 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1357 	return 0;
1358 }
1359 
1360 /**
1361  * Convert IPv6 item to Verbs specification.
1362  *
1363  * @param item[in]
1364  *   Item specification.
1365  * @param default_mask[in]
1366  *   Default bit-masks to use when item->mask is not provided.
1367  * @param data[in, out]
1368  *   User structure.
1369  */
1370 static int
1371 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1372 		      const void *default_mask,
1373 		      void *data)
1374 {
1375 	const struct rte_flow_item_ipv6 *spec = item->spec;
1376 	const struct rte_flow_item_ipv6 *mask = item->mask;
1377 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1378 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1379 	struct ibv_flow_spec_ipv6 ipv6 = {
1380 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1381 		.size = ipv6_size,
1382 	};
1383 
1384 	/* Don't update layer for the inner pattern. */
1385 	if (!parser->inner)
1386 		parser->layer = HASH_RXQ_IPV6;
1387 	if (spec) {
1388 		unsigned int i;
1389 
1390 		if (!mask)
1391 			mask = default_mask;
1392 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1393 		       RTE_DIM(ipv6.val.src_ip));
1394 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1395 		       RTE_DIM(ipv6.val.dst_ip));
1396 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1397 		       RTE_DIM(ipv6.mask.src_ip));
1398 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1399 		       RTE_DIM(ipv6.mask.dst_ip));
1400 		ipv6.mask.flow_label = mask->hdr.vtc_flow;
1401 		ipv6.mask.next_hdr = mask->hdr.proto;
1402 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1403 		/* Remove unwanted bits from values. */
1404 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1405 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1406 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1407 		}
1408 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1409 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1410 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1411 	}
1412 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1413 	return 0;
1414 }
1415 
1416 /**
1417  * Convert UDP item to Verbs specification.
1418  *
1419  * @param item[in]
1420  *   Item specification.
1421  * @param default_mask[in]
1422  *   Default bit-masks to use when item->mask is not provided.
1423  * @param data[in, out]
1424  *   User structure.
1425  */
1426 static int
1427 mlx5_flow_create_udp(const struct rte_flow_item *item,
1428 		     const void *default_mask,
1429 		     void *data)
1430 {
1431 	const struct rte_flow_item_udp *spec = item->spec;
1432 	const struct rte_flow_item_udp *mask = item->mask;
1433 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1434 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1435 	struct ibv_flow_spec_tcp_udp udp = {
1436 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1437 		.size = udp_size,
1438 	};
1439 
1440 	/* Don't update layer for the inner pattern. */
1441 	if (!parser->inner) {
1442 		if (parser->layer == HASH_RXQ_IPV4)
1443 			parser->layer = HASH_RXQ_UDPV4;
1444 		else
1445 			parser->layer = HASH_RXQ_UDPV6;
1446 	}
1447 	if (spec) {
1448 		if (!mask)
1449 			mask = default_mask;
1450 		udp.val.dst_port = spec->hdr.dst_port;
1451 		udp.val.src_port = spec->hdr.src_port;
1452 		udp.mask.dst_port = mask->hdr.dst_port;
1453 		udp.mask.src_port = mask->hdr.src_port;
1454 		/* Remove unwanted bits from values. */
1455 		udp.val.src_port &= udp.mask.src_port;
1456 		udp.val.dst_port &= udp.mask.dst_port;
1457 	}
1458 	mlx5_flow_create_copy(parser, &udp, udp_size);
1459 	return 0;
1460 }
1461 
1462 /**
1463  * Convert TCP item to Verbs specification.
1464  *
1465  * @param item[in]
1466  *   Item specification.
1467  * @param default_mask[in]
1468  *   Default bit-masks to use when item->mask is not provided.
1469  * @param data[in, out]
1470  *   User structure.
1471  */
1472 static int
1473 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1474 		     const void *default_mask,
1475 		     void *data)
1476 {
1477 	const struct rte_flow_item_tcp *spec = item->spec;
1478 	const struct rte_flow_item_tcp *mask = item->mask;
1479 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1480 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1481 	struct ibv_flow_spec_tcp_udp tcp = {
1482 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1483 		.size = tcp_size,
1484 	};
1485 
1486 	/* Don't update layer for the inner pattern. */
1487 	if (!parser->inner) {
1488 		if (parser->layer == HASH_RXQ_IPV4)
1489 			parser->layer = HASH_RXQ_TCPV4;
1490 		else
1491 			parser->layer = HASH_RXQ_TCPV6;
1492 	}
1493 	if (spec) {
1494 		if (!mask)
1495 			mask = default_mask;
1496 		tcp.val.dst_port = spec->hdr.dst_port;
1497 		tcp.val.src_port = spec->hdr.src_port;
1498 		tcp.mask.dst_port = mask->hdr.dst_port;
1499 		tcp.mask.src_port = mask->hdr.src_port;
1500 		/* Remove unwanted bits from values. */
1501 		tcp.val.src_port &= tcp.mask.src_port;
1502 		tcp.val.dst_port &= tcp.mask.dst_port;
1503 	}
1504 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1505 	return 0;
1506 }
1507 
1508 /**
1509  * Convert VXLAN item to Verbs specification.
1510  *
1511  * @param item[in]
1512  *   Item specification.
1513  * @param default_mask[in]
1514  *   Default bit-masks to use when item->mask is not provided.
1515  * @param data[in, out]
1516  *   User structure.
1517  */
1518 static int
1519 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1520 		       const void *default_mask,
1521 		       void *data)
1522 {
1523 	const struct rte_flow_item_vxlan *spec = item->spec;
1524 	const struct rte_flow_item_vxlan *mask = item->mask;
1525 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1526 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1527 	struct ibv_flow_spec_tunnel vxlan = {
1528 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1529 		.size = size,
1530 	};
1531 	union vni {
1532 		uint32_t vlan_id;
1533 		uint8_t vni[4];
1534 	} id;
1535 
1536 	id.vni[0] = 0;
1537 	parser->inner = IBV_FLOW_SPEC_INNER;
1538 	if (spec) {
1539 		if (!mask)
1540 			mask = default_mask;
1541 		memcpy(&id.vni[1], spec->vni, 3);
1542 		vxlan.val.tunnel_id = id.vlan_id;
1543 		memcpy(&id.vni[1], mask->vni, 3);
1544 		vxlan.mask.tunnel_id = id.vlan_id;
1545 		/* Remove unwanted bits from values. */
1546 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1547 	}
1548 	/*
1549 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1550 	 * layer is defined in the Verbs specification it is interpreted as
1551 	 * wildcard and all packets will match this rule, if it follows a full
1552 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1553 	 * before will also match this rule.
1554 	 * To avoid such situation, VNI 0 is currently refused.
1555 	 */
1556 	if (!vxlan.val.tunnel_id)
1557 		return EINVAL;
1558 	mlx5_flow_create_copy(parser, &vxlan, size);
1559 	return 0;
1560 }
1561 
1562 /**
1563  * Convert mark/flag action to Verbs specification.
1564  *
1565  * @param parser
1566  *   Internal parser structure.
1567  * @param mark_id
1568  *   Mark identifier.
1569  */
1570 static int
1571 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1572 {
1573 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1574 	struct ibv_flow_spec_action_tag tag = {
1575 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1576 		.size = size,
1577 		.tag_id = mlx5_flow_mark_set(mark_id),
1578 	};
1579 
1580 	assert(parser->mark);
1581 	mlx5_flow_create_copy(parser, &tag, size);
1582 	return 0;
1583 }
1584 
1585 /**
1586  * Convert count action to Verbs specification.
1587  *
1588  * @param priv
1589  *   Pointer to private structure.
1590  * @param parser
1591  *   Pointer to MLX5 flow parser structure.
1592  *
1593  * @return
1594  *   0 on success, errno value on failure.
1595  */
1596 static int
1597 mlx5_flow_create_count(struct priv *priv __rte_unused,
1598 		       struct mlx5_flow_parse *parser __rte_unused)
1599 {
1600 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1601 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1602 	struct ibv_counter_set_init_attr init_attr = {0};
1603 	struct ibv_flow_spec_counter_action counter = {
1604 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1605 		.size = size,
1606 		.counter_set_handle = 0,
1607 	};
1608 
1609 	init_attr.counter_set_id = 0;
1610 	parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1611 	if (!parser->cs)
1612 		return EINVAL;
1613 	counter.counter_set_handle = parser->cs->handle;
1614 	mlx5_flow_create_copy(parser, &counter, size);
1615 #endif
1616 	return 0;
1617 }
1618 
1619 /**
1620  * Complete flow rule creation with a drop queue.
1621  *
1622  * @param priv
1623  *   Pointer to private structure.
1624  * @param parser
1625  *   Internal parser structure.
1626  * @param flow
1627  *   Pointer to the rte_flow.
1628  * @param[out] error
1629  *   Perform verbose error reporting if not NULL.
1630  *
1631  * @return
1632  *   0 on success, errno value on failure.
1633  */
1634 static int
1635 priv_flow_create_action_queue_drop(struct priv *priv,
1636 				   struct mlx5_flow_parse *parser,
1637 				   struct rte_flow *flow,
1638 				   struct rte_flow_error *error)
1639 {
1640 	struct ibv_flow_spec_action_drop *drop;
1641 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1642 	int err = 0;
1643 
1644 	assert(priv->pd);
1645 	assert(priv->ctx);
1646 	flow->drop = 1;
1647 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1648 			parser->queue[HASH_RXQ_ETH].offset);
1649 	*drop = (struct ibv_flow_spec_action_drop){
1650 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1651 			.size = size,
1652 	};
1653 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1654 	parser->queue[HASH_RXQ_ETH].offset += size;
1655 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1656 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1657 	if (parser->count)
1658 		flow->cs = parser->cs;
1659 	if (!priv->dev->data->dev_started)
1660 		return 0;
1661 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1662 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1663 		ibv_create_flow(priv->flow_drop_queue->qp,
1664 				flow->frxq[HASH_RXQ_ETH].ibv_attr);
1665 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1666 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1667 				   NULL, "flow rule creation failure");
1668 		err = ENOMEM;
1669 		goto error;
1670 	}
1671 	return 0;
1672 error:
1673 	assert(flow);
1674 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1675 		claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1676 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1677 	}
1678 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1679 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1680 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1681 	}
1682 	if (flow->cs) {
1683 		claim_zero(ibv_destroy_counter_set(flow->cs));
1684 		flow->cs = NULL;
1685 		parser->cs = NULL;
1686 	}
1687 	return err;
1688 }
1689 
1690 /**
1691  * Create hash Rx queues when RSS is enabled.
1692  *
1693  * @param priv
1694  *   Pointer to private structure.
1695  * @param parser
1696  *   Internal parser structure.
1697  * @param flow
1698  *   Pointer to the rte_flow.
1699  * @param[out] error
1700  *   Perform verbose error reporting if not NULL.
1701  *
1702  * @return
1703  *   0 on success, a errno value otherwise and rte_errno is set.
1704  */
1705 static int
1706 priv_flow_create_action_queue_rss(struct priv *priv,
1707 				  struct mlx5_flow_parse *parser,
1708 				  struct rte_flow *flow,
1709 				  struct rte_flow_error *error)
1710 {
1711 	unsigned int i;
1712 
1713 	for (i = 0; i != hash_rxq_init_n; ++i) {
1714 		uint64_t hash_fields;
1715 
1716 		if (!parser->queue[i].ibv_attr)
1717 			continue;
1718 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1719 		parser->queue[i].ibv_attr = NULL;
1720 		hash_fields = hash_rxq_init[i].hash_fields;
1721 		if (!priv->dev->data->dev_started)
1722 			continue;
1723 		flow->frxq[i].hrxq =
1724 			mlx5_priv_hrxq_get(priv,
1725 					   parser->rss_conf.rss_key,
1726 					   parser->rss_conf.rss_key_len,
1727 					   hash_fields,
1728 					   parser->queues,
1729 					   parser->queues_n);
1730 		if (flow->frxq[i].hrxq)
1731 			continue;
1732 		flow->frxq[i].hrxq =
1733 			mlx5_priv_hrxq_new(priv,
1734 					   parser->rss_conf.rss_key,
1735 					   parser->rss_conf.rss_key_len,
1736 					   hash_fields,
1737 					   parser->queues,
1738 					   parser->queues_n);
1739 		if (!flow->frxq[i].hrxq) {
1740 			rte_flow_error_set(error, ENOMEM,
1741 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1742 					   NULL, "cannot create hash rxq");
1743 			return ENOMEM;
1744 		}
1745 	}
1746 	return 0;
1747 }
1748 
1749 /**
1750  * Complete flow rule creation.
1751  *
1752  * @param priv
1753  *   Pointer to private structure.
1754  * @param parser
1755  *   Internal parser structure.
1756  * @param flow
1757  *   Pointer to the rte_flow.
1758  * @param[out] error
1759  *   Perform verbose error reporting if not NULL.
1760  *
1761  * @return
1762  *   0 on success, a errno value otherwise and rte_errno is set.
1763  */
1764 static int
1765 priv_flow_create_action_queue(struct priv *priv,
1766 			      struct mlx5_flow_parse *parser,
1767 			      struct rte_flow *flow,
1768 			      struct rte_flow_error *error)
1769 {
1770 	int err = 0;
1771 	unsigned int i;
1772 
1773 	assert(priv->pd);
1774 	assert(priv->ctx);
1775 	assert(!parser->drop);
1776 	err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1777 	if (err)
1778 		goto error;
1779 	if (parser->count)
1780 		flow->cs = parser->cs;
1781 	if (!priv->dev->data->dev_started)
1782 		return 0;
1783 	for (i = 0; i != hash_rxq_init_n; ++i) {
1784 		if (!flow->frxq[i].hrxq)
1785 			continue;
1786 		flow->frxq[i].ibv_flow =
1787 			ibv_create_flow(flow->frxq[i].hrxq->qp,
1788 					flow->frxq[i].ibv_attr);
1789 		if (!flow->frxq[i].ibv_flow) {
1790 			rte_flow_error_set(error, ENOMEM,
1791 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1792 					   NULL, "flow rule creation failure");
1793 			err = ENOMEM;
1794 			goto error;
1795 		}
1796 		DEBUG("%p type %d QP %p ibv_flow %p",
1797 		      (void *)flow, i,
1798 		      (void *)flow->frxq[i].hrxq,
1799 		      (void *)flow->frxq[i].ibv_flow);
1800 	}
1801 	for (i = 0; i != parser->queues_n; ++i) {
1802 		struct mlx5_rxq_data *q =
1803 			(*priv->rxqs)[parser->queues[i]];
1804 
1805 		q->mark |= parser->mark;
1806 	}
1807 	return 0;
1808 error:
1809 	assert(flow);
1810 	for (i = 0; i != hash_rxq_init_n; ++i) {
1811 		if (flow->frxq[i].ibv_flow) {
1812 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1813 
1814 			claim_zero(ibv_destroy_flow(ibv_flow));
1815 		}
1816 		if (flow->frxq[i].hrxq)
1817 			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1818 		if (flow->frxq[i].ibv_attr)
1819 			rte_free(flow->frxq[i].ibv_attr);
1820 	}
1821 	if (flow->cs) {
1822 		claim_zero(ibv_destroy_counter_set(flow->cs));
1823 		flow->cs = NULL;
1824 		parser->cs = NULL;
1825 	}
1826 	return err;
1827 }
1828 
1829 /**
1830  * Convert a flow.
1831  *
1832  * @param priv
1833  *   Pointer to private structure.
1834  * @param list
1835  *   Pointer to a TAILQ flow list.
1836  * @param[in] attr
1837  *   Flow rule attributes.
1838  * @param[in] pattern
1839  *   Pattern specification (list terminated by the END pattern item).
1840  * @param[in] actions
1841  *   Associated actions (list terminated by the END action).
1842  * @param[out] error
1843  *   Perform verbose error reporting if not NULL.
1844  *
1845  * @return
1846  *   A flow on success, NULL otherwise.
1847  */
1848 static struct rte_flow *
1849 priv_flow_create(struct priv *priv,
1850 		 struct mlx5_flows *list,
1851 		 const struct rte_flow_attr *attr,
1852 		 const struct rte_flow_item items[],
1853 		 const struct rte_flow_action actions[],
1854 		 struct rte_flow_error *error)
1855 {
1856 	struct mlx5_flow_parse parser = { .create = 1, };
1857 	struct rte_flow *flow = NULL;
1858 	unsigned int i;
1859 	int err;
1860 
1861 	err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1862 	if (err)
1863 		goto exit;
1864 	flow = rte_calloc(__func__, 1,
1865 			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1866 			  0);
1867 	if (!flow) {
1868 		rte_flow_error_set(error, ENOMEM,
1869 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1870 				   NULL,
1871 				   "cannot allocate flow memory");
1872 		return NULL;
1873 	}
1874 	/* Copy queues configuration. */
1875 	flow->queues = (uint16_t (*)[])(flow + 1);
1876 	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1877 	flow->queues_n = parser.queues_n;
1878 	flow->mark = parser.mark;
1879 	/* Copy RSS configuration. */
1880 	flow->rss_conf = parser.rss_conf;
1881 	flow->rss_conf.rss_key = flow->rss_key;
1882 	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1883 	/* finalise the flow. */
1884 	if (parser.drop)
1885 		err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1886 							 error);
1887 	else
1888 		err = priv_flow_create_action_queue(priv, &parser, flow, error);
1889 	if (err)
1890 		goto exit;
1891 	TAILQ_INSERT_TAIL(list, flow, next);
1892 	DEBUG("Flow created %p", (void *)flow);
1893 	return flow;
1894 exit:
1895 	for (i = 0; i != hash_rxq_init_n; ++i) {
1896 		if (parser.queue[i].ibv_attr)
1897 			rte_free(parser.queue[i].ibv_attr);
1898 	}
1899 	rte_free(flow);
1900 	return NULL;
1901 }
1902 
1903 /**
1904  * Validate a flow supported by the NIC.
1905  *
1906  * @see rte_flow_validate()
1907  * @see rte_flow_ops
1908  */
1909 int
1910 mlx5_flow_validate(struct rte_eth_dev *dev,
1911 		   const struct rte_flow_attr *attr,
1912 		   const struct rte_flow_item items[],
1913 		   const struct rte_flow_action actions[],
1914 		   struct rte_flow_error *error)
1915 {
1916 	struct priv *priv = dev->data->dev_private;
1917 	int ret;
1918 	struct mlx5_flow_parse parser = { .create = 0, };
1919 
1920 	priv_lock(priv);
1921 	ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1922 	priv_unlock(priv);
1923 	return ret;
1924 }
1925 
1926 /**
1927  * Create a flow.
1928  *
1929  * @see rte_flow_create()
1930  * @see rte_flow_ops
1931  */
1932 struct rte_flow *
1933 mlx5_flow_create(struct rte_eth_dev *dev,
1934 		 const struct rte_flow_attr *attr,
1935 		 const struct rte_flow_item items[],
1936 		 const struct rte_flow_action actions[],
1937 		 struct rte_flow_error *error)
1938 {
1939 	struct priv *priv = dev->data->dev_private;
1940 	struct rte_flow *flow;
1941 
1942 	priv_lock(priv);
1943 	flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1944 				error);
1945 	priv_unlock(priv);
1946 	return flow;
1947 }
1948 
1949 /**
1950  * Destroy a flow.
1951  *
1952  * @param priv
1953  *   Pointer to private structure.
1954  * @param list
1955  *   Pointer to a TAILQ flow list.
1956  * @param[in] flow
1957  *   Flow to destroy.
1958  */
1959 static void
1960 priv_flow_destroy(struct priv *priv,
1961 		  struct mlx5_flows *list,
1962 		  struct rte_flow *flow)
1963 {
1964 	unsigned int i;
1965 
1966 	if (flow->drop || !flow->mark)
1967 		goto free;
1968 	for (i = 0; i != flow->queues_n; ++i) {
1969 		struct rte_flow *tmp;
1970 		int mark = 0;
1971 
1972 		/*
1973 		 * To remove the mark from the queue, the queue must not be
1974 		 * present in any other marked flow (RSS or not).
1975 		 */
1976 		TAILQ_FOREACH(tmp, list, next) {
1977 			unsigned int j;
1978 			uint16_t *tqs = NULL;
1979 			uint16_t tq_n = 0;
1980 
1981 			if (!tmp->mark)
1982 				continue;
1983 			for (j = 0; j != hash_rxq_init_n; ++j) {
1984 				if (!tmp->frxq[j].hrxq)
1985 					continue;
1986 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
1987 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1988 			}
1989 			if (!tq_n)
1990 				continue;
1991 			for (j = 0; (j != tq_n) && !mark; j++)
1992 				if (tqs[j] == (*flow->queues)[i])
1993 					mark = 1;
1994 		}
1995 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1996 	}
1997 free:
1998 	if (flow->drop) {
1999 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2000 			claim_zero(ibv_destroy_flow
2001 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2002 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2003 	} else {
2004 		for (i = 0; i != hash_rxq_init_n; ++i) {
2005 			struct mlx5_flow *frxq = &flow->frxq[i];
2006 
2007 			if (frxq->ibv_flow)
2008 				claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2009 			if (frxq->hrxq)
2010 				mlx5_priv_hrxq_release(priv, frxq->hrxq);
2011 			if (frxq->ibv_attr)
2012 				rte_free(frxq->ibv_attr);
2013 		}
2014 	}
2015 	if (flow->cs) {
2016 		claim_zero(ibv_destroy_counter_set(flow->cs));
2017 		flow->cs = NULL;
2018 	}
2019 	TAILQ_REMOVE(list, flow, next);
2020 	DEBUG("Flow destroyed %p", (void *)flow);
2021 	rte_free(flow);
2022 }
2023 
2024 /**
2025  * Destroy all flows.
2026  *
2027  * @param priv
2028  *   Pointer to private structure.
2029  * @param list
2030  *   Pointer to a TAILQ flow list.
2031  */
2032 void
2033 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2034 {
2035 	while (!TAILQ_EMPTY(list)) {
2036 		struct rte_flow *flow;
2037 
2038 		flow = TAILQ_FIRST(list);
2039 		priv_flow_destroy(priv, list, flow);
2040 	}
2041 }
2042 
2043 /**
2044  * Create drop queue.
2045  *
2046  * @param priv
2047  *   Pointer to private structure.
2048  *
2049  * @return
2050  *   0 on success.
2051  */
2052 int
2053 priv_flow_create_drop_queue(struct priv *priv)
2054 {
2055 	struct mlx5_hrxq_drop *fdq = NULL;
2056 
2057 	assert(priv->pd);
2058 	assert(priv->ctx);
2059 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2060 	if (!fdq) {
2061 		WARN("cannot allocate memory for drop queue");
2062 		goto error;
2063 	}
2064 	fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2065 	if (!fdq->cq) {
2066 		WARN("cannot allocate CQ for drop queue");
2067 		goto error;
2068 	}
2069 	fdq->wq = ibv_create_wq(priv->ctx,
2070 			&(struct ibv_wq_init_attr){
2071 			.wq_type = IBV_WQT_RQ,
2072 			.max_wr = 1,
2073 			.max_sge = 1,
2074 			.pd = priv->pd,
2075 			.cq = fdq->cq,
2076 			});
2077 	if (!fdq->wq) {
2078 		WARN("cannot allocate WQ for drop queue");
2079 		goto error;
2080 	}
2081 	fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2082 			&(struct ibv_rwq_ind_table_init_attr){
2083 			.log_ind_tbl_size = 0,
2084 			.ind_tbl = &fdq->wq,
2085 			.comp_mask = 0,
2086 			});
2087 	if (!fdq->ind_table) {
2088 		WARN("cannot allocate indirection table for drop queue");
2089 		goto error;
2090 	}
2091 	fdq->qp = ibv_create_qp_ex(priv->ctx,
2092 		&(struct ibv_qp_init_attr_ex){
2093 			.qp_type = IBV_QPT_RAW_PACKET,
2094 			.comp_mask =
2095 				IBV_QP_INIT_ATTR_PD |
2096 				IBV_QP_INIT_ATTR_IND_TABLE |
2097 				IBV_QP_INIT_ATTR_RX_HASH,
2098 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2099 				.rx_hash_function =
2100 					IBV_RX_HASH_FUNC_TOEPLITZ,
2101 				.rx_hash_key_len = rss_hash_default_key_len,
2102 				.rx_hash_key = rss_hash_default_key,
2103 				.rx_hash_fields_mask = 0,
2104 				},
2105 			.rwq_ind_tbl = fdq->ind_table,
2106 			.pd = priv->pd
2107 		});
2108 	if (!fdq->qp) {
2109 		WARN("cannot allocate QP for drop queue");
2110 		goto error;
2111 	}
2112 	priv->flow_drop_queue = fdq;
2113 	return 0;
2114 error:
2115 	if (fdq->qp)
2116 		claim_zero(ibv_destroy_qp(fdq->qp));
2117 	if (fdq->ind_table)
2118 		claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2119 	if (fdq->wq)
2120 		claim_zero(ibv_destroy_wq(fdq->wq));
2121 	if (fdq->cq)
2122 		claim_zero(ibv_destroy_cq(fdq->cq));
2123 	if (fdq)
2124 		rte_free(fdq);
2125 	priv->flow_drop_queue = NULL;
2126 	return -1;
2127 }
2128 
2129 /**
2130  * Delete drop queue.
2131  *
2132  * @param priv
2133  *   Pointer to private structure.
2134  */
2135 void
2136 priv_flow_delete_drop_queue(struct priv *priv)
2137 {
2138 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2139 
2140 	if (!fdq)
2141 		return;
2142 	if (fdq->qp)
2143 		claim_zero(ibv_destroy_qp(fdq->qp));
2144 	if (fdq->ind_table)
2145 		claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2146 	if (fdq->wq)
2147 		claim_zero(ibv_destroy_wq(fdq->wq));
2148 	if (fdq->cq)
2149 		claim_zero(ibv_destroy_cq(fdq->cq));
2150 	rte_free(fdq);
2151 	priv->flow_drop_queue = NULL;
2152 }
2153 
2154 /**
2155  * Remove all flows.
2156  *
2157  * @param priv
2158  *   Pointer to private structure.
2159  * @param list
2160  *   Pointer to a TAILQ flow list.
2161  */
2162 void
2163 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2164 {
2165 	struct rte_flow *flow;
2166 
2167 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2168 		unsigned int i;
2169 
2170 		if (flow->drop) {
2171 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2172 				continue;
2173 			claim_zero(ibv_destroy_flow
2174 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2175 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2176 			/* Next flow. */
2177 			continue;
2178 		}
2179 		if (flow->mark) {
2180 			struct mlx5_ind_table_ibv *ind_tbl = NULL;
2181 
2182 			for (i = 0; i != hash_rxq_init_n; ++i) {
2183 				if (!flow->frxq[i].hrxq)
2184 					continue;
2185 				ind_tbl = flow->frxq[i].hrxq->ind_table;
2186 			}
2187 			assert(ind_tbl);
2188 			for (i = 0; i != ind_tbl->queues_n; ++i)
2189 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2190 		}
2191 		for (i = 0; i != hash_rxq_init_n; ++i) {
2192 			if (!flow->frxq[i].ibv_flow)
2193 				continue;
2194 			claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2195 			flow->frxq[i].ibv_flow = NULL;
2196 			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2197 			flow->frxq[i].hrxq = NULL;
2198 		}
2199 		DEBUG("Flow %p removed", (void *)flow);
2200 	}
2201 }
2202 
2203 /**
2204  * Add all flows.
2205  *
2206  * @param priv
2207  *   Pointer to private structure.
2208  * @param list
2209  *   Pointer to a TAILQ flow list.
2210  *
2211  * @return
2212  *   0 on success, a errno value otherwise and rte_errno is set.
2213  */
2214 int
2215 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2216 {
2217 	struct rte_flow *flow;
2218 
2219 	TAILQ_FOREACH(flow, list, next) {
2220 		unsigned int i;
2221 
2222 		if (flow->drop) {
2223 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2224 				ibv_create_flow
2225 				(priv->flow_drop_queue->qp,
2226 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2227 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2228 				DEBUG("Flow %p cannot be applied",
2229 				      (void *)flow);
2230 				rte_errno = EINVAL;
2231 				return rte_errno;
2232 			}
2233 			DEBUG("Flow %p applied", (void *)flow);
2234 			/* Next flow. */
2235 			continue;
2236 		}
2237 		for (i = 0; i != hash_rxq_init_n; ++i) {
2238 			if (!flow->frxq[i].ibv_attr)
2239 				continue;
2240 			flow->frxq[i].hrxq =
2241 				mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2242 						   flow->rss_conf.rss_key_len,
2243 						   hash_rxq_init[i].hash_fields,
2244 						   (*flow->queues),
2245 						   flow->queues_n);
2246 			if (flow->frxq[i].hrxq)
2247 				goto flow_create;
2248 			flow->frxq[i].hrxq =
2249 				mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2250 						   flow->rss_conf.rss_key_len,
2251 						   hash_rxq_init[i].hash_fields,
2252 						   (*flow->queues),
2253 						   flow->queues_n);
2254 			if (!flow->frxq[i].hrxq) {
2255 				DEBUG("Flow %p cannot be applied",
2256 				      (void *)flow);
2257 				rte_errno = EINVAL;
2258 				return rte_errno;
2259 			}
2260 flow_create:
2261 			flow->frxq[i].ibv_flow =
2262 				ibv_create_flow(flow->frxq[i].hrxq->qp,
2263 						flow->frxq[i].ibv_attr);
2264 			if (!flow->frxq[i].ibv_flow) {
2265 				DEBUG("Flow %p cannot be applied",
2266 				      (void *)flow);
2267 				rte_errno = EINVAL;
2268 				return rte_errno;
2269 			}
2270 			DEBUG("Flow %p applied", (void *)flow);
2271 		}
2272 		if (!flow->mark)
2273 			continue;
2274 		for (i = 0; i != flow->queues_n; ++i)
2275 			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2276 	}
2277 	return 0;
2278 }
2279 
2280 /**
2281  * Verify the flow list is empty
2282  *
2283  * @param priv
2284  *  Pointer to private structure.
2285  *
2286  * @return the number of flows not released.
2287  */
2288 int
2289 priv_flow_verify(struct priv *priv)
2290 {
2291 	struct rte_flow *flow;
2292 	int ret = 0;
2293 
2294 	TAILQ_FOREACH(flow, &priv->flows, next) {
2295 		DEBUG("%p: flow %p still referenced", (void *)priv,
2296 		      (void *)flow);
2297 		++ret;
2298 	}
2299 	return ret;
2300 }
2301 
2302 /**
2303  * Enable a control flow configured from the control plane.
2304  *
2305  * @param dev
2306  *   Pointer to Ethernet device.
2307  * @param eth_spec
2308  *   An Ethernet flow spec to apply.
2309  * @param eth_mask
2310  *   An Ethernet flow mask to apply.
2311  * @param vlan_spec
2312  *   A VLAN flow spec to apply.
2313  * @param vlan_mask
2314  *   A VLAN flow mask to apply.
2315  *
2316  * @return
2317  *   0 on success.
2318  */
2319 int
2320 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2321 		    struct rte_flow_item_eth *eth_spec,
2322 		    struct rte_flow_item_eth *eth_mask,
2323 		    struct rte_flow_item_vlan *vlan_spec,
2324 		    struct rte_flow_item_vlan *vlan_mask)
2325 {
2326 	struct priv *priv = dev->data->dev_private;
2327 	const struct rte_flow_attr attr = {
2328 		.ingress = 1,
2329 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2330 	};
2331 	struct rte_flow_item items[] = {
2332 		{
2333 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2334 			.spec = eth_spec,
2335 			.last = NULL,
2336 			.mask = eth_mask,
2337 		},
2338 		{
2339 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2340 				RTE_FLOW_ITEM_TYPE_END,
2341 			.spec = vlan_spec,
2342 			.last = NULL,
2343 			.mask = vlan_mask,
2344 		},
2345 		{
2346 			.type = RTE_FLOW_ITEM_TYPE_END,
2347 		},
2348 	};
2349 	struct rte_flow_action actions[] = {
2350 		{
2351 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2352 		},
2353 		{
2354 			.type = RTE_FLOW_ACTION_TYPE_END,
2355 		},
2356 	};
2357 	struct rte_flow *flow;
2358 	struct rte_flow_error error;
2359 	unsigned int i;
2360 	union {
2361 		struct rte_flow_action_rss rss;
2362 		struct {
2363 			const struct rte_eth_rss_conf *rss_conf;
2364 			uint16_t num;
2365 			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2366 		} local;
2367 	} action_rss;
2368 
2369 	if (!priv->reta_idx_n)
2370 		return EINVAL;
2371 	for (i = 0; i != priv->reta_idx_n; ++i)
2372 		action_rss.local.queue[i] = (*priv->reta_idx)[i];
2373 	action_rss.local.rss_conf = &priv->rss_conf;
2374 	action_rss.local.num = priv->reta_idx_n;
2375 	actions[0].conf = (const void *)&action_rss.rss;
2376 	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2377 				&error);
2378 	if (!flow)
2379 		return rte_errno;
2380 	return 0;
2381 }
2382 
2383 /**
2384  * Enable a flow control configured from the control plane.
2385  *
2386  * @param dev
2387  *   Pointer to Ethernet device.
2388  * @param eth_spec
2389  *   An Ethernet flow spec to apply.
2390  * @param eth_mask
2391  *   An Ethernet flow mask to apply.
2392  *
2393  * @return
2394  *   0 on success.
2395  */
2396 int
2397 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2398 	       struct rte_flow_item_eth *eth_spec,
2399 	       struct rte_flow_item_eth *eth_mask)
2400 {
2401 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2402 }
2403 
2404 /**
2405  * Destroy a flow.
2406  *
2407  * @see rte_flow_destroy()
2408  * @see rte_flow_ops
2409  */
2410 int
2411 mlx5_flow_destroy(struct rte_eth_dev *dev,
2412 		  struct rte_flow *flow,
2413 		  struct rte_flow_error *error)
2414 {
2415 	struct priv *priv = dev->data->dev_private;
2416 
2417 	(void)error;
2418 	priv_lock(priv);
2419 	priv_flow_destroy(priv, &priv->flows, flow);
2420 	priv_unlock(priv);
2421 	return 0;
2422 }
2423 
2424 /**
2425  * Destroy all flows.
2426  *
2427  * @see rte_flow_flush()
2428  * @see rte_flow_ops
2429  */
2430 int
2431 mlx5_flow_flush(struct rte_eth_dev *dev,
2432 		struct rte_flow_error *error)
2433 {
2434 	struct priv *priv = dev->data->dev_private;
2435 
2436 	(void)error;
2437 	priv_lock(priv);
2438 	priv_flow_flush(priv, &priv->flows);
2439 	priv_unlock(priv);
2440 	return 0;
2441 }
2442 
2443 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2444 /**
2445  * Query flow counter.
2446  *
2447  * @param cs
2448  *   the counter set.
2449  * @param counter_value
2450  *   returned data from the counter.
2451  *
2452  * @return
2453  *   0 on success, a errno value otherwise and rte_errno is set.
2454  */
2455 static int
2456 priv_flow_query_count(struct ibv_counter_set *cs,
2457 		      struct mlx5_flow_counter_stats *counter_stats,
2458 		      struct rte_flow_query_count *query_count,
2459 		      struct rte_flow_error *error)
2460 {
2461 	uint64_t counters[2];
2462 	struct ibv_query_counter_set_attr query_cs_attr = {
2463 		.cs = cs,
2464 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2465 	};
2466 	struct ibv_counter_set_data query_out = {
2467 		.out = counters,
2468 		.outlen = 2 * sizeof(uint64_t),
2469 	};
2470 	int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2471 
2472 	if (res) {
2473 		rte_flow_error_set(error, -res,
2474 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2475 				   NULL,
2476 				   "cannot read counter");
2477 		return -res;
2478 	}
2479 	query_count->hits_set = 1;
2480 	query_count->bytes_set = 1;
2481 	query_count->hits = counters[0] - counter_stats->hits;
2482 	query_count->bytes = counters[1] - counter_stats->bytes;
2483 	if (query_count->reset) {
2484 		counter_stats->hits = counters[0];
2485 		counter_stats->bytes = counters[1];
2486 	}
2487 	return 0;
2488 }
2489 
2490 /**
2491  * Query a flows.
2492  *
2493  * @see rte_flow_query()
2494  * @see rte_flow_ops
2495  */
2496 int
2497 mlx5_flow_query(struct rte_eth_dev *dev,
2498 		struct rte_flow *flow,
2499 		enum rte_flow_action_type action __rte_unused,
2500 		void *data,
2501 		struct rte_flow_error *error)
2502 {
2503 	struct priv *priv = dev->data->dev_private;
2504 	int res = EINVAL;
2505 
2506 	priv_lock(priv);
2507 	if (flow->cs) {
2508 		res = priv_flow_query_count(flow->cs,
2509 					&flow->counter_stats,
2510 					(struct rte_flow_query_count *)data,
2511 					error);
2512 	} else {
2513 		rte_flow_error_set(error, res,
2514 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2515 				   NULL,
2516 				   "no counter found for flow");
2517 	}
2518 	priv_unlock(priv);
2519 	return -res;
2520 }
2521 #endif
2522 
2523 /**
2524  * Isolated mode.
2525  *
2526  * @see rte_flow_isolate()
2527  * @see rte_flow_ops
2528  */
2529 int
2530 mlx5_flow_isolate(struct rte_eth_dev *dev,
2531 		  int enable,
2532 		  struct rte_flow_error *error)
2533 {
2534 	struct priv *priv = dev->data->dev_private;
2535 
2536 	priv_lock(priv);
2537 	if (dev->data->dev_started) {
2538 		rte_flow_error_set(error, EBUSY,
2539 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2540 				   NULL,
2541 				   "port must be stopped first");
2542 		priv_unlock(priv);
2543 		return -rte_errno;
2544 	}
2545 	priv->isolated = !!enable;
2546 	if (enable)
2547 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2548 	else
2549 		priv->dev->dev_ops = &mlx5_dev_ops;
2550 	priv_unlock(priv);
2551 	return 0;
2552 }
2553 
2554 /**
2555  * Convert a flow director filter to a generic flow.
2556  *
2557  * @param priv
2558  *   Private structure.
2559  * @param fdir_filter
2560  *   Flow director filter to add.
2561  * @param attributes
2562  *   Generic flow parameters structure.
2563  *
2564  * @return
2565  *  0 on success, errno value on error.
2566  */
2567 static int
2568 priv_fdir_filter_convert(struct priv *priv,
2569 			 const struct rte_eth_fdir_filter *fdir_filter,
2570 			 struct mlx5_fdir *attributes)
2571 {
2572 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2573 
2574 	/* Validate queue number. */
2575 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2576 		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2577 		return EINVAL;
2578 	}
2579 	attributes->attr.ingress = 1;
2580 	attributes->items[0] = (struct rte_flow_item) {
2581 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2582 		.spec = &attributes->l2,
2583 		.mask = &attributes->l2_mask,
2584 	};
2585 	switch (fdir_filter->action.behavior) {
2586 	case RTE_ETH_FDIR_ACCEPT:
2587 		attributes->actions[0] = (struct rte_flow_action){
2588 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2589 			.conf = &attributes->queue,
2590 		};
2591 		break;
2592 	case RTE_ETH_FDIR_REJECT:
2593 		attributes->actions[0] = (struct rte_flow_action){
2594 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2595 		};
2596 		break;
2597 	default:
2598 		ERROR("invalid behavior %d", fdir_filter->action.behavior);
2599 		return ENOTSUP;
2600 	}
2601 	attributes->queue.index = fdir_filter->action.rx_queue;
2602 	switch (fdir_filter->input.flow_type) {
2603 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2604 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2605 			.src_addr = input->flow.udp4_flow.ip.src_ip,
2606 			.dst_addr = input->flow.udp4_flow.ip.dst_ip,
2607 			.time_to_live = input->flow.udp4_flow.ip.ttl,
2608 			.type_of_service = input->flow.udp4_flow.ip.tos,
2609 			.next_proto_id = input->flow.udp4_flow.ip.proto,
2610 		};
2611 		attributes->l4.udp.hdr = (struct udp_hdr){
2612 			.src_port = input->flow.udp4_flow.src_port,
2613 			.dst_port = input->flow.udp4_flow.dst_port,
2614 		};
2615 		attributes->items[1] = (struct rte_flow_item){
2616 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2617 			.spec = &attributes->l3,
2618 		};
2619 		attributes->items[2] = (struct rte_flow_item){
2620 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2621 			.spec = &attributes->l4,
2622 		};
2623 		break;
2624 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2625 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2626 			.src_addr = input->flow.tcp4_flow.ip.src_ip,
2627 			.dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2628 			.time_to_live = input->flow.tcp4_flow.ip.ttl,
2629 			.type_of_service = input->flow.tcp4_flow.ip.tos,
2630 			.next_proto_id = input->flow.tcp4_flow.ip.proto,
2631 		};
2632 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2633 			.src_port = input->flow.tcp4_flow.src_port,
2634 			.dst_port = input->flow.tcp4_flow.dst_port,
2635 		};
2636 		attributes->items[1] = (struct rte_flow_item){
2637 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2638 			.spec = &attributes->l3,
2639 		};
2640 		attributes->items[2] = (struct rte_flow_item){
2641 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2642 			.spec = &attributes->l4,
2643 		};
2644 		break;
2645 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2646 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2647 			.src_addr = input->flow.ip4_flow.src_ip,
2648 			.dst_addr = input->flow.ip4_flow.dst_ip,
2649 			.time_to_live = input->flow.ip4_flow.ttl,
2650 			.type_of_service = input->flow.ip4_flow.tos,
2651 			.next_proto_id = input->flow.ip4_flow.proto,
2652 		};
2653 		attributes->items[1] = (struct rte_flow_item){
2654 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2655 			.spec = &attributes->l3,
2656 		};
2657 		break;
2658 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2659 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2660 			.hop_limits = input->flow.udp6_flow.ip.hop_limits,
2661 			.proto = input->flow.udp6_flow.ip.proto,
2662 		};
2663 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2664 		       input->flow.udp6_flow.ip.src_ip,
2665 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2666 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2667 		       input->flow.udp6_flow.ip.dst_ip,
2668 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2669 		attributes->l4.udp.hdr = (struct udp_hdr){
2670 			.src_port = input->flow.udp6_flow.src_port,
2671 			.dst_port = input->flow.udp6_flow.dst_port,
2672 		};
2673 		attributes->items[1] = (struct rte_flow_item){
2674 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2675 			.spec = &attributes->l3,
2676 		};
2677 		attributes->items[2] = (struct rte_flow_item){
2678 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2679 			.spec = &attributes->l4,
2680 		};
2681 		break;
2682 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2683 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2684 			.hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2685 			.proto = input->flow.tcp6_flow.ip.proto,
2686 		};
2687 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2688 		       input->flow.tcp6_flow.ip.src_ip,
2689 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2690 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2691 		       input->flow.tcp6_flow.ip.dst_ip,
2692 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2693 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2694 			.src_port = input->flow.tcp6_flow.src_port,
2695 			.dst_port = input->flow.tcp6_flow.dst_port,
2696 		};
2697 		attributes->items[1] = (struct rte_flow_item){
2698 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2699 			.spec = &attributes->l3,
2700 		};
2701 		attributes->items[2] = (struct rte_flow_item){
2702 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2703 			.spec = &attributes->l4,
2704 		};
2705 		break;
2706 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2707 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2708 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2709 			.proto = input->flow.ipv6_flow.proto,
2710 		};
2711 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2712 		       input->flow.ipv6_flow.src_ip,
2713 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2714 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2715 		       input->flow.ipv6_flow.dst_ip,
2716 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2717 		attributes->items[1] = (struct rte_flow_item){
2718 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2719 			.spec = &attributes->l3,
2720 		};
2721 		break;
2722 	default:
2723 		ERROR("invalid flow type%d",
2724 		      fdir_filter->input.flow_type);
2725 		return ENOTSUP;
2726 	}
2727 	return 0;
2728 }
2729 
2730 /**
2731  * Add new flow director filter and store it in list.
2732  *
2733  * @param priv
2734  *   Private structure.
2735  * @param fdir_filter
2736  *   Flow director filter to add.
2737  *
2738  * @return
2739  *   0 on success, errno value on failure.
2740  */
2741 static int
2742 priv_fdir_filter_add(struct priv *priv,
2743 		     const struct rte_eth_fdir_filter *fdir_filter)
2744 {
2745 	struct mlx5_fdir attributes = {
2746 		.attr.group = 0,
2747 		.l2_mask = {
2748 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2749 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2750 			.type = 0,
2751 		},
2752 	};
2753 	struct mlx5_flow_parse parser = {
2754 		.layer = HASH_RXQ_ETH,
2755 	};
2756 	struct rte_flow_error error;
2757 	struct rte_flow *flow;
2758 	int ret;
2759 
2760 	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2761 	if (ret)
2762 		return -ret;
2763 	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2764 				attributes.actions, &error, &parser);
2765 	if (ret)
2766 		return -ret;
2767 	flow = priv_flow_create(priv,
2768 				&priv->flows,
2769 				&attributes.attr,
2770 				attributes.items,
2771 				attributes.actions,
2772 				&error);
2773 	if (flow) {
2774 		DEBUG("FDIR created %p", (void *)flow);
2775 		return 0;
2776 	}
2777 	return ENOTSUP;
2778 }
2779 
2780 /**
2781  * Delete specific filter.
2782  *
2783  * @param priv
2784  *   Private structure.
2785  * @param fdir_filter
2786  *   Filter to be deleted.
2787  *
2788  * @return
2789  *   0 on success, errno value on failure.
2790  */
2791 static int
2792 priv_fdir_filter_delete(struct priv *priv,
2793 			const struct rte_eth_fdir_filter *fdir_filter)
2794 {
2795 	struct mlx5_fdir attributes = {
2796 		.attr.group = 0,
2797 	};
2798 	struct mlx5_flow_parse parser = {
2799 		.create = 1,
2800 		.layer = HASH_RXQ_ETH,
2801 	};
2802 	struct rte_flow_error error;
2803 	struct rte_flow *flow;
2804 	unsigned int i;
2805 	int ret;
2806 
2807 	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2808 	if (ret)
2809 		return -ret;
2810 	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2811 				attributes.actions, &error, &parser);
2812 	if (ret)
2813 		goto exit;
2814 	/*
2815 	 * Special case for drop action which is only set in the
2816 	 * specifications when the flow is created.  In this situation the
2817 	 * drop specification is missing.
2818 	 */
2819 	if (parser.drop) {
2820 		struct ibv_flow_spec_action_drop *drop;
2821 
2822 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2823 				parser.queue[HASH_RXQ_ETH].offset);
2824 		*drop = (struct ibv_flow_spec_action_drop){
2825 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2826 			.size = sizeof(struct ibv_flow_spec_action_drop),
2827 		};
2828 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2829 	}
2830 	TAILQ_FOREACH(flow, &priv->flows, next) {
2831 		struct ibv_flow_attr *attr;
2832 		struct ibv_spec_header *attr_h;
2833 		void *spec;
2834 		struct ibv_flow_attr *flow_attr;
2835 		struct ibv_spec_header *flow_h;
2836 		void *flow_spec;
2837 		unsigned int specs_n;
2838 
2839 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2840 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2841 		/* Compare first the attributes. */
2842 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2843 			continue;
2844 		if (attr->num_of_specs == 0)
2845 			continue;
2846 		spec = (void *)((uintptr_t)attr +
2847 				sizeof(struct ibv_flow_attr));
2848 		flow_spec = (void *)((uintptr_t)flow_attr +
2849 				     sizeof(struct ibv_flow_attr));
2850 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2851 		for (i = 0; i != specs_n; ++i) {
2852 			attr_h = spec;
2853 			flow_h = flow_spec;
2854 			if (memcmp(spec, flow_spec,
2855 				   RTE_MIN(attr_h->size, flow_h->size)))
2856 				goto wrong_flow;
2857 			spec = (void *)((uintptr_t)spec + attr_h->size);
2858 			flow_spec = (void *)((uintptr_t)flow_spec +
2859 					     flow_h->size);
2860 		}
2861 		/* At this point, the flow match. */
2862 		break;
2863 wrong_flow:
2864 		/* The flow does not match. */
2865 		continue;
2866 	}
2867 	if (flow)
2868 		priv_flow_destroy(priv, &priv->flows, flow);
2869 exit:
2870 	for (i = 0; i != hash_rxq_init_n; ++i) {
2871 		if (parser.queue[i].ibv_attr)
2872 			rte_free(parser.queue[i].ibv_attr);
2873 	}
2874 	return -ret;
2875 }
2876 
2877 /**
2878  * Update queue for specific filter.
2879  *
2880  * @param priv
2881  *   Private structure.
2882  * @param fdir_filter
2883  *   Filter to be updated.
2884  *
2885  * @return
2886  *   0 on success, errno value on failure.
2887  */
2888 static int
2889 priv_fdir_filter_update(struct priv *priv,
2890 			const struct rte_eth_fdir_filter *fdir_filter)
2891 {
2892 	int ret;
2893 
2894 	ret = priv_fdir_filter_delete(priv, fdir_filter);
2895 	if (ret)
2896 		return ret;
2897 	ret = priv_fdir_filter_add(priv, fdir_filter);
2898 	return ret;
2899 }
2900 
2901 /**
2902  * Flush all filters.
2903  *
2904  * @param priv
2905  *   Private structure.
2906  */
2907 static void
2908 priv_fdir_filter_flush(struct priv *priv)
2909 {
2910 	priv_flow_flush(priv, &priv->flows);
2911 }
2912 
2913 /**
2914  * Get flow director information.
2915  *
2916  * @param priv
2917  *   Private structure.
2918  * @param[out] fdir_info
2919  *   Resulting flow director information.
2920  */
2921 static void
2922 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2923 {
2924 	struct rte_eth_fdir_masks *mask =
2925 		&priv->dev->data->dev_conf.fdir_conf.mask;
2926 
2927 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2928 	fdir_info->guarant_spc = 0;
2929 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2930 	fdir_info->max_flexpayload = 0;
2931 	fdir_info->flow_types_mask[0] = 0;
2932 	fdir_info->flex_payload_unit = 0;
2933 	fdir_info->max_flex_payload_segment_num = 0;
2934 	fdir_info->flex_payload_limit = 0;
2935 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2936 }
2937 
2938 /**
2939  * Deal with flow director operations.
2940  *
2941  * @param priv
2942  *   Pointer to private structure.
2943  * @param filter_op
2944  *   Operation to perform.
2945  * @param arg
2946  *   Pointer to operation-specific structure.
2947  *
2948  * @return
2949  *   0 on success, errno value on failure.
2950  */
2951 static int
2952 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2953 {
2954 	enum rte_fdir_mode fdir_mode =
2955 		priv->dev->data->dev_conf.fdir_conf.mode;
2956 	int ret = 0;
2957 
2958 	if (filter_op == RTE_ETH_FILTER_NOP)
2959 		return 0;
2960 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2961 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2962 		ERROR("%p: flow director mode %d not supported",
2963 		      (void *)priv, fdir_mode);
2964 		return EINVAL;
2965 	}
2966 	switch (filter_op) {
2967 	case RTE_ETH_FILTER_ADD:
2968 		ret = priv_fdir_filter_add(priv, arg);
2969 		break;
2970 	case RTE_ETH_FILTER_UPDATE:
2971 		ret = priv_fdir_filter_update(priv, arg);
2972 		break;
2973 	case RTE_ETH_FILTER_DELETE:
2974 		ret = priv_fdir_filter_delete(priv, arg);
2975 		break;
2976 	case RTE_ETH_FILTER_FLUSH:
2977 		priv_fdir_filter_flush(priv);
2978 		break;
2979 	case RTE_ETH_FILTER_INFO:
2980 		priv_fdir_info_get(priv, arg);
2981 		break;
2982 	default:
2983 		DEBUG("%p: unknown operation %u", (void *)priv,
2984 		      filter_op);
2985 		ret = EINVAL;
2986 		break;
2987 	}
2988 	return ret;
2989 }
2990 
2991 /**
2992  * Manage filter operations.
2993  *
2994  * @param dev
2995  *   Pointer to Ethernet device structure.
2996  * @param filter_type
2997  *   Filter type.
2998  * @param filter_op
2999  *   Operation to perform.
3000  * @param arg
3001  *   Pointer to operation-specific structure.
3002  *
3003  * @return
3004  *   0 on success, negative errno value on failure.
3005  */
3006 int
3007 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3008 		     enum rte_filter_type filter_type,
3009 		     enum rte_filter_op filter_op,
3010 		     void *arg)
3011 {
3012 	int ret = EINVAL;
3013 	struct priv *priv = dev->data->dev_private;
3014 
3015 	switch (filter_type) {
3016 	case RTE_ETH_FILTER_GENERIC:
3017 		if (filter_op != RTE_ETH_FILTER_GET)
3018 			return -EINVAL;
3019 		*(const void **)arg = &mlx5_flow_ops;
3020 		return 0;
3021 	case RTE_ETH_FILTER_FDIR:
3022 		priv_lock(priv);
3023 		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3024 		priv_unlock(priv);
3025 		break;
3026 	default:
3027 		ERROR("%p: filter type (%d) not supported",
3028 		      (void *)dev, filter_type);
3029 		break;
3030 	}
3031 	return -ret;
3032 }
3033