xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision d429cc0b53735cc7b1e304ec1d0f35ae06ace7d0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5 
6 #include <sys/queue.h>
7 #include <string.h>
8 
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18 
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24 
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29 
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32 
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36 
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39 	int dummy;
40 };
41 #endif
42 
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46 
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49 		     const void *default_mask,
50 		     void *data);
51 
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54 		      const void *default_mask,
55 		      void *data);
56 
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59 		      const void *default_mask,
60 		      void *data);
61 
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64 		      const void *default_mask,
65 		      void *data);
66 
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69 		     const void *default_mask,
70 		     void *data);
71 
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74 		     const void *default_mask,
75 		     void *data);
76 
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79 		       const void *default_mask,
80 		       void *data);
81 
82 struct mlx5_flow_parse;
83 
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86 		      unsigned int size);
87 
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90 
91 static int
92 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
93 
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96 	HASH_RXQ_TCPV4,
97 	HASH_RXQ_UDPV4,
98 	HASH_RXQ_IPV4,
99 	HASH_RXQ_TCPV6,
100 	HASH_RXQ_UDPV6,
101 	HASH_RXQ_IPV6,
102 	HASH_RXQ_ETH,
103 };
104 
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107 	uint64_t hash_fields; /* Fields that participate in the hash. */
108 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109 	unsigned int flow_priority; /* Flow priority to use. */
110 	unsigned int ip_version; /* Internet protocol. */
111 };
112 
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115 	[HASH_RXQ_TCPV4] = {
116 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117 				IBV_RX_HASH_DST_IPV4 |
118 				IBV_RX_HASH_SRC_PORT_TCP |
119 				IBV_RX_HASH_DST_PORT_TCP),
120 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121 		.flow_priority = 0,
122 		.ip_version = MLX5_IPV4,
123 	},
124 	[HASH_RXQ_UDPV4] = {
125 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126 				IBV_RX_HASH_DST_IPV4 |
127 				IBV_RX_HASH_SRC_PORT_UDP |
128 				IBV_RX_HASH_DST_PORT_UDP),
129 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130 		.flow_priority = 0,
131 		.ip_version = MLX5_IPV4,
132 	},
133 	[HASH_RXQ_IPV4] = {
134 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 				IBV_RX_HASH_DST_IPV4),
136 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
137 				ETH_RSS_FRAG_IPV4),
138 		.flow_priority = 1,
139 		.ip_version = MLX5_IPV4,
140 	},
141 	[HASH_RXQ_TCPV6] = {
142 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143 				IBV_RX_HASH_DST_IPV6 |
144 				IBV_RX_HASH_SRC_PORT_TCP |
145 				IBV_RX_HASH_DST_PORT_TCP),
146 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147 		.flow_priority = 0,
148 		.ip_version = MLX5_IPV6,
149 	},
150 	[HASH_RXQ_UDPV6] = {
151 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152 				IBV_RX_HASH_DST_IPV6 |
153 				IBV_RX_HASH_SRC_PORT_UDP |
154 				IBV_RX_HASH_DST_PORT_UDP),
155 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156 		.flow_priority = 0,
157 		.ip_version = MLX5_IPV6,
158 	},
159 	[HASH_RXQ_IPV6] = {
160 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 				IBV_RX_HASH_DST_IPV6),
162 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
163 				ETH_RSS_FRAG_IPV6),
164 		.flow_priority = 1,
165 		.ip_version = MLX5_IPV6,
166 	},
167 	[HASH_RXQ_ETH] = {
168 		.hash_fields = 0,
169 		.dpdk_rss_hf = 0,
170 		.flow_priority = 2,
171 	},
172 };
173 
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176 
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179 	uint64_t hits; /**< Number of packets matched by the rule. */
180 	uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182 
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186 	struct ibv_qp *qp; /**< Verbs queue pair. */
187 	struct ibv_wq *wq; /**< Verbs work queue. */
188 	struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190 
191 /* Flows structures. */
192 struct mlx5_flow {
193 	uint64_t hash_fields; /**< Fields that participate in the hash. */
194 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
196 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198 
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204 
205 struct rte_flow {
206 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207 	uint32_t mark:1; /**< Set if the flow is marked. */
208 	uint32_t drop:1; /**< Drop queue. */
209 	uint16_t queues_n; /**< Number of entries in queue[]. */
210 	uint16_t (*queues)[]; /**< Queues indexes to use. */
211 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212 	uint8_t rss_key[40]; /**< copy of the RSS key. */
213 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216 	/**< Flow with Rx queue. */
217 };
218 
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221 	(const enum rte_flow_item_type []){ \
222 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223 	}
224 
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227 	/** List of possible actions for these items. */
228 	const enum rte_flow_action_type *const actions;
229 	/** Bit-masks corresponding to the possibilities for the item. */
230 	const void *mask;
231 	/**
232 	 * Default bit-masks to use when item->mask is not provided. When
233 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
234 	 * used instead.
235 	 */
236 	const void *default_mask;
237 	/** Bit-masks size in bytes. */
238 	const unsigned int mask_sz;
239 	/**
240 	 * Conversion function from rte_flow to NIC specific flow.
241 	 *
242 	 * @param item
243 	 *   rte_flow item to convert.
244 	 * @param default_mask
245 	 *   Default bit-masks to use when item->mask is not provided.
246 	 * @param data
247 	 *   Internal structure to store the conversion.
248 	 *
249 	 * @return
250 	 *   0 on success, negative value otherwise.
251 	 */
252 	int (*convert)(const struct rte_flow_item *item,
253 		       const void *default_mask,
254 		       void *data);
255 	/** Size in bytes of the destination structure. */
256 	const unsigned int dst_sz;
257 	/** List of possible following items.  */
258 	const enum rte_flow_item_type *const items;
259 };
260 
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263 	RTE_FLOW_ACTION_TYPE_DROP,
264 	RTE_FLOW_ACTION_TYPE_QUEUE,
265 	RTE_FLOW_ACTION_TYPE_MARK,
266 	RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268 	RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270 	RTE_FLOW_ACTION_TYPE_END,
271 };
272 
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275 	[RTE_FLOW_ITEM_TYPE_END] = {
276 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277 			       RTE_FLOW_ITEM_TYPE_VXLAN),
278 	},
279 	[RTE_FLOW_ITEM_TYPE_ETH] = {
280 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281 			       RTE_FLOW_ITEM_TYPE_IPV4,
282 			       RTE_FLOW_ITEM_TYPE_IPV6),
283 		.actions = valid_actions,
284 		.mask = &(const struct rte_flow_item_eth){
285 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287 			.type = -1,
288 		},
289 		.default_mask = &rte_flow_item_eth_mask,
290 		.mask_sz = sizeof(struct rte_flow_item_eth),
291 		.convert = mlx5_flow_create_eth,
292 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
293 	},
294 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
295 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296 			       RTE_FLOW_ITEM_TYPE_IPV6),
297 		.actions = valid_actions,
298 		.mask = &(const struct rte_flow_item_vlan){
299 			.tci = -1,
300 		},
301 		.default_mask = &rte_flow_item_vlan_mask,
302 		.mask_sz = sizeof(struct rte_flow_item_vlan),
303 		.convert = mlx5_flow_create_vlan,
304 		.dst_sz = 0,
305 	},
306 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
307 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308 			       RTE_FLOW_ITEM_TYPE_TCP),
309 		.actions = valid_actions,
310 		.mask = &(const struct rte_flow_item_ipv4){
311 			.hdr = {
312 				.src_addr = -1,
313 				.dst_addr = -1,
314 				.type_of_service = -1,
315 				.next_proto_id = -1,
316 			},
317 		},
318 		.default_mask = &rte_flow_item_ipv4_mask,
319 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
320 		.convert = mlx5_flow_create_ipv4,
321 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
322 	},
323 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
324 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
325 			       RTE_FLOW_ITEM_TYPE_TCP),
326 		.actions = valid_actions,
327 		.mask = &(const struct rte_flow_item_ipv6){
328 			.hdr = {
329 				.src_addr = {
330 					0xff, 0xff, 0xff, 0xff,
331 					0xff, 0xff, 0xff, 0xff,
332 					0xff, 0xff, 0xff, 0xff,
333 					0xff, 0xff, 0xff, 0xff,
334 				},
335 				.dst_addr = {
336 					0xff, 0xff, 0xff, 0xff,
337 					0xff, 0xff, 0xff, 0xff,
338 					0xff, 0xff, 0xff, 0xff,
339 					0xff, 0xff, 0xff, 0xff,
340 				},
341 				.vtc_flow = -1,
342 				.proto = -1,
343 				.hop_limits = -1,
344 			},
345 		},
346 		.default_mask = &rte_flow_item_ipv6_mask,
347 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
348 		.convert = mlx5_flow_create_ipv6,
349 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
350 	},
351 	[RTE_FLOW_ITEM_TYPE_UDP] = {
352 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
353 		.actions = valid_actions,
354 		.mask = &(const struct rte_flow_item_udp){
355 			.hdr = {
356 				.src_port = -1,
357 				.dst_port = -1,
358 			},
359 		},
360 		.default_mask = &rte_flow_item_udp_mask,
361 		.mask_sz = sizeof(struct rte_flow_item_udp),
362 		.convert = mlx5_flow_create_udp,
363 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
364 	},
365 	[RTE_FLOW_ITEM_TYPE_TCP] = {
366 		.actions = valid_actions,
367 		.mask = &(const struct rte_flow_item_tcp){
368 			.hdr = {
369 				.src_port = -1,
370 				.dst_port = -1,
371 			},
372 		},
373 		.default_mask = &rte_flow_item_tcp_mask,
374 		.mask_sz = sizeof(struct rte_flow_item_tcp),
375 		.convert = mlx5_flow_create_tcp,
376 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
377 	},
378 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
379 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
380 		.actions = valid_actions,
381 		.mask = &(const struct rte_flow_item_vxlan){
382 			.vni = "\xff\xff\xff",
383 		},
384 		.default_mask = &rte_flow_item_vxlan_mask,
385 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
386 		.convert = mlx5_flow_create_vxlan,
387 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
388 	},
389 };
390 
391 /** Structure to pass to the conversion function. */
392 struct mlx5_flow_parse {
393 	uint32_t inner; /**< Set once VXLAN is encountered. */
394 	uint32_t create:1;
395 	/**< Whether resources should remain after a validate. */
396 	uint32_t drop:1; /**< Target is a drop queue. */
397 	uint32_t mark:1; /**< Mark is present in the flow. */
398 	uint32_t count:1; /**< Count is present in the flow. */
399 	uint32_t mark_id; /**< Mark identifier. */
400 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
401 	uint16_t queues_n; /**< Number of entries in queue[]. */
402 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
403 	uint8_t rss_key[40]; /**< copy of the RSS key. */
404 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
405 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
406 	struct {
407 		struct ibv_flow_attr *ibv_attr;
408 		/**< Pointer to Verbs attributes. */
409 		unsigned int offset;
410 		/**< Current position or total size of the attribute. */
411 	} queue[RTE_DIM(hash_rxq_init)];
412 };
413 
414 static const struct rte_flow_ops mlx5_flow_ops = {
415 	.validate = mlx5_flow_validate,
416 	.create = mlx5_flow_create,
417 	.destroy = mlx5_flow_destroy,
418 	.flush = mlx5_flow_flush,
419 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
420 	.query = mlx5_flow_query,
421 #else
422 	.query = NULL,
423 #endif
424 	.isolate = mlx5_flow_isolate,
425 };
426 
427 /* Convert FDIR request to Generic flow. */
428 struct mlx5_fdir {
429 	struct rte_flow_attr attr;
430 	struct rte_flow_action actions[2];
431 	struct rte_flow_item items[4];
432 	struct rte_flow_item_eth l2;
433 	struct rte_flow_item_eth l2_mask;
434 	union {
435 		struct rte_flow_item_ipv4 ipv4;
436 		struct rte_flow_item_ipv6 ipv6;
437 	} l3;
438 	union {
439 		struct rte_flow_item_udp udp;
440 		struct rte_flow_item_tcp tcp;
441 	} l4;
442 	struct rte_flow_action_queue queue;
443 };
444 
445 /* Verbs specification header. */
446 struct ibv_spec_header {
447 	enum ibv_flow_spec_type type;
448 	uint16_t size;
449 };
450 
451 /**
452  * Check support for a given item.
453  *
454  * @param item[in]
455  *   Item specification.
456  * @param mask[in]
457  *   Bit-masks covering supported fields to compare with spec, last and mask in
458  *   \item.
459  * @param size
460  *   Bit-Mask size in bytes.
461  *
462  * @return
463  *   0 on success.
464  */
465 static int
466 mlx5_flow_item_validate(const struct rte_flow_item *item,
467 			const uint8_t *mask, unsigned int size)
468 {
469 	int ret = 0;
470 
471 	if (!item->spec && (item->mask || item->last))
472 		return -1;
473 	if (item->spec && !item->mask) {
474 		unsigned int i;
475 		const uint8_t *spec = item->spec;
476 
477 		for (i = 0; i < size; ++i)
478 			if ((spec[i] | mask[i]) != mask[i])
479 				return -1;
480 	}
481 	if (item->last && !item->mask) {
482 		unsigned int i;
483 		const uint8_t *spec = item->last;
484 
485 		for (i = 0; i < size; ++i)
486 			if ((spec[i] | mask[i]) != mask[i])
487 				return -1;
488 	}
489 	if (item->mask) {
490 		unsigned int i;
491 		const uint8_t *spec = item->spec;
492 
493 		for (i = 0; i < size; ++i)
494 			if ((spec[i] | mask[i]) != mask[i])
495 				return -1;
496 	}
497 	if (item->spec && item->last) {
498 		uint8_t spec[size];
499 		uint8_t last[size];
500 		const uint8_t *apply = mask;
501 		unsigned int i;
502 
503 		if (item->mask)
504 			apply = item->mask;
505 		for (i = 0; i < size; ++i) {
506 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
507 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
508 		}
509 		ret = memcmp(spec, last, size);
510 	}
511 	return ret;
512 }
513 
514 /**
515  * Copy the RSS configuration from the user ones, of the rss_conf is null,
516  * uses the driver one.
517  *
518  * @param priv
519  *   Pointer to private structure.
520  * @param parser
521  *   Internal parser structure.
522  * @param rss_conf
523  *   User RSS configuration to save.
524  *
525  * @return
526  *   0 on success, errno value on failure.
527  */
528 static int
529 priv_flow_convert_rss_conf(struct priv *priv,
530 			   struct mlx5_flow_parse *parser,
531 			   const struct rte_eth_rss_conf *rss_conf)
532 {
533 	/*
534 	 * This function is also called at the beginning of
535 	 * priv_flow_convert_actions() to initialize the parser with the
536 	 * device default RSS configuration.
537 	 */
538 	(void)priv;
539 	if (rss_conf) {
540 		if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
541 			return EINVAL;
542 		if (rss_conf->rss_key_len != 40)
543 			return EINVAL;
544 		if (rss_conf->rss_key_len && rss_conf->rss_key) {
545 			parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
546 			memcpy(parser->rss_key, rss_conf->rss_key,
547 			       rss_conf->rss_key_len);
548 			parser->rss_conf.rss_key = parser->rss_key;
549 		}
550 		parser->rss_conf.rss_hf = rss_conf->rss_hf;
551 	}
552 	return 0;
553 }
554 
555 /**
556  * Extract attribute to the parser.
557  *
558  * @param priv
559  *   Pointer to private structure.
560  * @param[in] attr
561  *   Flow rule attributes.
562  * @param[out] error
563  *   Perform verbose error reporting if not NULL.
564  * @param[in, out] parser
565  *   Internal parser structure.
566  *
567  * @return
568  *   0 on success, a negative errno value otherwise and rte_errno is set.
569  */
570 static int
571 priv_flow_convert_attributes(struct priv *priv,
572 			     const struct rte_flow_attr *attr,
573 			     struct rte_flow_error *error,
574 			     struct mlx5_flow_parse *parser)
575 {
576 	(void)priv;
577 	(void)parser;
578 	if (attr->group) {
579 		rte_flow_error_set(error, ENOTSUP,
580 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
581 				   NULL,
582 				   "groups are not supported");
583 		return -rte_errno;
584 	}
585 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
586 		rte_flow_error_set(error, ENOTSUP,
587 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
588 				   NULL,
589 				   "priorities are not supported");
590 		return -rte_errno;
591 	}
592 	if (attr->egress) {
593 		rte_flow_error_set(error, ENOTSUP,
594 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
595 				   NULL,
596 				   "egress is not supported");
597 		return -rte_errno;
598 	}
599 	if (!attr->ingress) {
600 		rte_flow_error_set(error, ENOTSUP,
601 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
602 				   NULL,
603 				   "only ingress is supported");
604 		return -rte_errno;
605 	}
606 	return 0;
607 }
608 
609 /**
610  * Extract actions request to the parser.
611  *
612  * @param priv
613  *   Pointer to private structure.
614  * @param[in] actions
615  *   Associated actions (list terminated by the END action).
616  * @param[out] error
617  *   Perform verbose error reporting if not NULL.
618  * @param[in, out] parser
619  *   Internal parser structure.
620  *
621  * @return
622  *   0 on success, a negative errno value otherwise and rte_errno is set.
623  */
624 static int
625 priv_flow_convert_actions(struct priv *priv,
626 			  const struct rte_flow_action actions[],
627 			  struct rte_flow_error *error,
628 			  struct mlx5_flow_parse *parser)
629 {
630 	/*
631 	 * Add default RSS configuration necessary for Verbs to create QP even
632 	 * if no RSS is necessary.
633 	 */
634 	priv_flow_convert_rss_conf(priv, parser,
635 				   (const struct rte_eth_rss_conf *)
636 				   &priv->rss_conf);
637 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
638 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
639 			continue;
640 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
641 			parser->drop = 1;
642 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
643 			const struct rte_flow_action_queue *queue =
644 				(const struct rte_flow_action_queue *)
645 				actions->conf;
646 			uint16_t n;
647 			uint16_t found = 0;
648 
649 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
650 				goto exit_action_not_supported;
651 			for (n = 0; n < parser->queues_n; ++n) {
652 				if (parser->queues[n] == queue->index) {
653 					found = 1;
654 					break;
655 				}
656 			}
657 			if (parser->queues_n > 1 && !found) {
658 				rte_flow_error_set(error, ENOTSUP,
659 					   RTE_FLOW_ERROR_TYPE_ACTION,
660 					   actions,
661 					   "queue action not in RSS queues");
662 				return -rte_errno;
663 			}
664 			if (!found) {
665 				parser->queues_n = 1;
666 				parser->queues[0] = queue->index;
667 			}
668 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
669 			const struct rte_flow_action_rss *rss =
670 				(const struct rte_flow_action_rss *)
671 				actions->conf;
672 			uint16_t n;
673 
674 			if (!rss || !rss->num) {
675 				rte_flow_error_set(error, EINVAL,
676 						   RTE_FLOW_ERROR_TYPE_ACTION,
677 						   actions,
678 						   "no valid queues");
679 				return -rte_errno;
680 			}
681 			if (parser->queues_n == 1) {
682 				uint16_t found = 0;
683 
684 				assert(parser->queues_n);
685 				for (n = 0; n < rss->num; ++n) {
686 					if (parser->queues[0] ==
687 					    rss->queue[n]) {
688 						found = 1;
689 						break;
690 					}
691 				}
692 				if (!found) {
693 					rte_flow_error_set(error, ENOTSUP,
694 						   RTE_FLOW_ERROR_TYPE_ACTION,
695 						   actions,
696 						   "queue action not in RSS"
697 						   " queues");
698 					return -rte_errno;
699 				}
700 			}
701 			for (n = 0; n < rss->num; ++n) {
702 				if (rss->queue[n] >= priv->rxqs_n) {
703 					rte_flow_error_set(error, EINVAL,
704 						   RTE_FLOW_ERROR_TYPE_ACTION,
705 						   actions,
706 						   "queue id > number of"
707 						   " queues");
708 					return -rte_errno;
709 				}
710 			}
711 			for (n = 0; n < rss->num; ++n)
712 				parser->queues[n] = rss->queue[n];
713 			parser->queues_n = rss->num;
714 			if (priv_flow_convert_rss_conf(priv, parser,
715 						       rss->rss_conf)) {
716 				rte_flow_error_set(error, EINVAL,
717 						   RTE_FLOW_ERROR_TYPE_ACTION,
718 						   actions,
719 						   "wrong RSS configuration");
720 				return -rte_errno;
721 			}
722 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
723 			const struct rte_flow_action_mark *mark =
724 				(const struct rte_flow_action_mark *)
725 				actions->conf;
726 
727 			if (!mark) {
728 				rte_flow_error_set(error, EINVAL,
729 						   RTE_FLOW_ERROR_TYPE_ACTION,
730 						   actions,
731 						   "mark must be defined");
732 				return -rte_errno;
733 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
734 				rte_flow_error_set(error, ENOTSUP,
735 						   RTE_FLOW_ERROR_TYPE_ACTION,
736 						   actions,
737 						   "mark must be between 0"
738 						   " and 16777199");
739 				return -rte_errno;
740 			}
741 			parser->mark = 1;
742 			parser->mark_id = mark->id;
743 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
744 			parser->mark = 1;
745 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
746 			   priv->config.flow_counter_en) {
747 			parser->count = 1;
748 		} else {
749 			goto exit_action_not_supported;
750 		}
751 	}
752 	if (parser->drop && parser->mark)
753 		parser->mark = 0;
754 	if (!parser->queues_n && !parser->drop) {
755 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
756 				   NULL, "no valid action");
757 		return -rte_errno;
758 	}
759 	return 0;
760 exit_action_not_supported:
761 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
762 			   actions, "action not supported");
763 	return -rte_errno;
764 }
765 
766 /**
767  * Validate items.
768  *
769  * @param priv
770  *   Pointer to private structure.
771  * @param[in] items
772  *   Pattern specification (list terminated by the END pattern item).
773  * @param[out] error
774  *   Perform verbose error reporting if not NULL.
775  * @param[in, out] parser
776  *   Internal parser structure.
777  *
778  * @return
779  *   0 on success, a negative errno value otherwise and rte_errno is set.
780  */
781 static int
782 priv_flow_convert_items_validate(struct priv *priv,
783 				 const struct rte_flow_item items[],
784 				 struct rte_flow_error *error,
785 				 struct mlx5_flow_parse *parser)
786 {
787 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
788 	unsigned int i;
789 
790 	(void)priv;
791 	/* Initialise the offsets to start after verbs attribute. */
792 	for (i = 0; i != hash_rxq_init_n; ++i)
793 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
794 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
795 		const struct mlx5_flow_items *token = NULL;
796 		unsigned int n;
797 		int err;
798 
799 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
800 			continue;
801 		for (i = 0;
802 		     cur_item->items &&
803 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
804 		     ++i) {
805 			if (cur_item->items[i] == items->type) {
806 				token = &mlx5_flow_items[items->type];
807 				break;
808 			}
809 		}
810 		if (!token)
811 			goto exit_item_not_supported;
812 		cur_item = token;
813 		err = mlx5_flow_item_validate(items,
814 					      (const uint8_t *)cur_item->mask,
815 					      cur_item->mask_sz);
816 		if (err)
817 			goto exit_item_not_supported;
818 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
819 			if (parser->inner) {
820 				rte_flow_error_set(error, ENOTSUP,
821 						   RTE_FLOW_ERROR_TYPE_ITEM,
822 						   items,
823 						   "cannot recognize multiple"
824 						   " VXLAN encapsulations");
825 				return -rte_errno;
826 			}
827 			parser->inner = IBV_FLOW_SPEC_INNER;
828 		}
829 		if (parser->drop) {
830 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
831 		} else {
832 			for (n = 0; n != hash_rxq_init_n; ++n)
833 				parser->queue[n].offset += cur_item->dst_sz;
834 		}
835 	}
836 	if (parser->drop) {
837 		parser->queue[HASH_RXQ_ETH].offset +=
838 			sizeof(struct ibv_flow_spec_action_drop);
839 	}
840 	if (parser->mark) {
841 		for (i = 0; i != hash_rxq_init_n; ++i)
842 			parser->queue[i].offset +=
843 				sizeof(struct ibv_flow_spec_action_tag);
844 	}
845 	if (parser->count) {
846 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
847 
848 		for (i = 0; i != hash_rxq_init_n; ++i)
849 			parser->queue[i].offset += size;
850 	}
851 	return 0;
852 exit_item_not_supported:
853 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
854 			   items, "item not supported");
855 	return -rte_errno;
856 }
857 
858 /**
859  * Allocate memory space to store verbs flow attributes.
860  *
861  * @param priv
862  *   Pointer to private structure.
863  * @param[in] priority
864  *   Flow priority.
865  * @param[in] size
866  *   Amount of byte to allocate.
867  * @param[out] error
868  *   Perform verbose error reporting if not NULL.
869  *
870  * @return
871  *   A verbs flow attribute on success, NULL otherwise.
872  */
873 static struct ibv_flow_attr*
874 priv_flow_convert_allocate(struct priv *priv,
875 			   unsigned int priority,
876 			   unsigned int size,
877 			   struct rte_flow_error *error)
878 {
879 	struct ibv_flow_attr *ibv_attr;
880 
881 	(void)priv;
882 	ibv_attr = rte_calloc(__func__, 1, size, 0);
883 	if (!ibv_attr) {
884 		rte_flow_error_set(error, ENOMEM,
885 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
886 				   NULL,
887 				   "cannot allocate verbs spec attributes.");
888 		return NULL;
889 	}
890 	ibv_attr->priority = priority;
891 	return ibv_attr;
892 }
893 
894 /**
895  * Finalise verbs flow attributes.
896  *
897  * @param priv
898  *   Pointer to private structure.
899  * @param[in, out] parser
900  *   Internal parser structure.
901  */
902 static void
903 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
904 {
905 	const unsigned int ipv4 =
906 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
907 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
908 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
909 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
910 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
911 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
912 	unsigned int i;
913 
914 	(void)priv;
915 	if (parser->layer == HASH_RXQ_ETH) {
916 		goto fill;
917 	} else {
918 		/*
919 		 * This layer becomes useless as the pattern define under
920 		 * layers.
921 		 */
922 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
923 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
924 	}
925 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
926 	for (i = ohmin; i != (ohmax + 1); ++i) {
927 		if (!parser->queue[i].ibv_attr)
928 			continue;
929 		rte_free(parser->queue[i].ibv_attr);
930 		parser->queue[i].ibv_attr = NULL;
931 	}
932 	/* Remove impossible flow according to the RSS configuration. */
933 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
934 	    parser->rss_conf.rss_hf) {
935 		/* Remove any other flow. */
936 		for (i = hmin; i != (hmax + 1); ++i) {
937 			if ((i == parser->layer) ||
938 			     (!parser->queue[i].ibv_attr))
939 				continue;
940 			rte_free(parser->queue[i].ibv_attr);
941 			parser->queue[i].ibv_attr = NULL;
942 		}
943 	} else  if (!parser->queue[ip].ibv_attr) {
944 		/* no RSS possible with the current configuration. */
945 		parser->queues_n = 1;
946 		return;
947 	}
948 fill:
949 	/*
950 	 * Fill missing layers in verbs specifications, or compute the correct
951 	 * offset to allocate the memory space for the attributes and
952 	 * specifications.
953 	 */
954 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
955 		union {
956 			struct ibv_flow_spec_ipv4_ext ipv4;
957 			struct ibv_flow_spec_ipv6 ipv6;
958 			struct ibv_flow_spec_tcp_udp udp_tcp;
959 		} specs;
960 		void *dst;
961 		uint16_t size;
962 
963 		if (i == parser->layer)
964 			continue;
965 		if (parser->layer == HASH_RXQ_ETH) {
966 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
967 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
968 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
969 					.type = IBV_FLOW_SPEC_IPV4_EXT,
970 					.size = size,
971 				};
972 			} else {
973 				size = sizeof(struct ibv_flow_spec_ipv6);
974 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
975 					.type = IBV_FLOW_SPEC_IPV6,
976 					.size = size,
977 				};
978 			}
979 			if (parser->queue[i].ibv_attr) {
980 				dst = (void *)((uintptr_t)
981 					       parser->queue[i].ibv_attr +
982 					       parser->queue[i].offset);
983 				memcpy(dst, &specs, size);
984 				++parser->queue[i].ibv_attr->num_of_specs;
985 			}
986 			parser->queue[i].offset += size;
987 		}
988 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
989 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
990 			size = sizeof(struct ibv_flow_spec_tcp_udp);
991 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
992 				.type = ((i == HASH_RXQ_UDPV4 ||
993 					  i == HASH_RXQ_UDPV6) ?
994 					 IBV_FLOW_SPEC_UDP :
995 					 IBV_FLOW_SPEC_TCP),
996 				.size = size,
997 			};
998 			if (parser->queue[i].ibv_attr) {
999 				dst = (void *)((uintptr_t)
1000 					       parser->queue[i].ibv_attr +
1001 					       parser->queue[i].offset);
1002 				memcpy(dst, &specs, size);
1003 				++parser->queue[i].ibv_attr->num_of_specs;
1004 			}
1005 			parser->queue[i].offset += size;
1006 		}
1007 	}
1008 }
1009 
1010 /**
1011  * Validate and convert a flow supported by the NIC.
1012  *
1013  * @param priv
1014  *   Pointer to private structure.
1015  * @param[in] attr
1016  *   Flow rule attributes.
1017  * @param[in] pattern
1018  *   Pattern specification (list terminated by the END pattern item).
1019  * @param[in] actions
1020  *   Associated actions (list terminated by the END action).
1021  * @param[out] error
1022  *   Perform verbose error reporting if not NULL.
1023  * @param[in, out] parser
1024  *   Internal parser structure.
1025  *
1026  * @return
1027  *   0 on success, a negative errno value otherwise and rte_errno is set.
1028  */
1029 static int
1030 priv_flow_convert(struct priv *priv,
1031 		  const struct rte_flow_attr *attr,
1032 		  const struct rte_flow_item items[],
1033 		  const struct rte_flow_action actions[],
1034 		  struct rte_flow_error *error,
1035 		  struct mlx5_flow_parse *parser)
1036 {
1037 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1038 	unsigned int i;
1039 	int ret;
1040 
1041 	/* First step. Validate the attributes, items and actions. */
1042 	*parser = (struct mlx5_flow_parse){
1043 		.create = parser->create,
1044 		.layer = HASH_RXQ_ETH,
1045 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1046 	};
1047 	ret = priv_flow_convert_attributes(priv, attr, error, parser);
1048 	if (ret)
1049 		return ret;
1050 	ret = priv_flow_convert_actions(priv, actions, error, parser);
1051 	if (ret)
1052 		return ret;
1053 	ret = priv_flow_convert_items_validate(priv, items, error, parser);
1054 	if (ret)
1055 		return ret;
1056 	priv_flow_convert_finalise(priv, parser);
1057 	/*
1058 	 * Second step.
1059 	 * Allocate the memory space to store verbs specifications.
1060 	 */
1061 	if (parser->drop) {
1062 		unsigned int priority =
1063 			attr->priority +
1064 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1065 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1066 
1067 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1068 			priv_flow_convert_allocate(priv, priority,
1069 						   offset, error);
1070 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1071 			return ENOMEM;
1072 		parser->queue[HASH_RXQ_ETH].offset =
1073 			sizeof(struct ibv_flow_attr);
1074 	} else {
1075 		for (i = 0; i != hash_rxq_init_n; ++i) {
1076 			unsigned int priority =
1077 				attr->priority +
1078 				hash_rxq_init[i].flow_priority;
1079 			unsigned int offset;
1080 
1081 			if (!(parser->rss_conf.rss_hf &
1082 			      hash_rxq_init[i].dpdk_rss_hf) &&
1083 			    (i != HASH_RXQ_ETH))
1084 				continue;
1085 			offset = parser->queue[i].offset;
1086 			parser->queue[i].ibv_attr =
1087 				priv_flow_convert_allocate(priv, priority,
1088 							   offset, error);
1089 			if (!parser->queue[i].ibv_attr)
1090 				goto exit_enomem;
1091 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1092 		}
1093 	}
1094 	/* Third step. Conversion parse, fill the specifications. */
1095 	parser->inner = 0;
1096 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1097 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1098 			continue;
1099 		cur_item = &mlx5_flow_items[items->type];
1100 		ret = cur_item->convert(items,
1101 					(cur_item->default_mask ?
1102 					 cur_item->default_mask :
1103 					 cur_item->mask),
1104 					parser);
1105 		if (ret) {
1106 			rte_flow_error_set(error, ret,
1107 					   RTE_FLOW_ERROR_TYPE_ITEM,
1108 					   items, "item not supported");
1109 			goto exit_free;
1110 		}
1111 	}
1112 	if (parser->mark)
1113 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1114 	if (parser->count && parser->create) {
1115 		mlx5_flow_create_count(priv, parser);
1116 		if (!parser->cs)
1117 			goto exit_count_error;
1118 	}
1119 	/*
1120 	 * Last step. Complete missing specification to reach the RSS
1121 	 * configuration.
1122 	 */
1123 	if (!parser->drop) {
1124 		priv_flow_convert_finalise(priv, parser);
1125 	} else {
1126 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1127 			attr->priority +
1128 			hash_rxq_init[parser->layer].flow_priority;
1129 	}
1130 exit_free:
1131 	/* Only verification is expected, all resources should be released. */
1132 	if (!parser->create) {
1133 		for (i = 0; i != hash_rxq_init_n; ++i) {
1134 			if (parser->queue[i].ibv_attr) {
1135 				rte_free(parser->queue[i].ibv_attr);
1136 				parser->queue[i].ibv_attr = NULL;
1137 			}
1138 		}
1139 	}
1140 	return ret;
1141 exit_enomem:
1142 	for (i = 0; i != hash_rxq_init_n; ++i) {
1143 		if (parser->queue[i].ibv_attr) {
1144 			rte_free(parser->queue[i].ibv_attr);
1145 			parser->queue[i].ibv_attr = NULL;
1146 		}
1147 	}
1148 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1149 			   NULL, "cannot allocate verbs spec attributes.");
1150 	return ret;
1151 exit_count_error:
1152 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1153 			   NULL, "cannot create counter.");
1154 	return rte_errno;
1155 }
1156 
1157 /**
1158  * Copy the specification created into the flow.
1159  *
1160  * @param parser
1161  *   Internal parser structure.
1162  * @param src
1163  *   Create specification.
1164  * @param size
1165  *   Size in bytes of the specification to copy.
1166  */
1167 static void
1168 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1169 		      unsigned int size)
1170 {
1171 	unsigned int i;
1172 	void *dst;
1173 
1174 	for (i = 0; i != hash_rxq_init_n; ++i) {
1175 		if (!parser->queue[i].ibv_attr)
1176 			continue;
1177 		/* Specification must be the same l3 type or none. */
1178 		if (parser->layer == HASH_RXQ_ETH ||
1179 		    (hash_rxq_init[parser->layer].ip_version ==
1180 		     hash_rxq_init[i].ip_version) ||
1181 		    (hash_rxq_init[i].ip_version == 0)) {
1182 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1183 					parser->queue[i].offset);
1184 			memcpy(dst, src, size);
1185 			++parser->queue[i].ibv_attr->num_of_specs;
1186 			parser->queue[i].offset += size;
1187 		}
1188 	}
1189 }
1190 
1191 /**
1192  * Convert Ethernet item to Verbs specification.
1193  *
1194  * @param item[in]
1195  *   Item specification.
1196  * @param default_mask[in]
1197  *   Default bit-masks to use when item->mask is not provided.
1198  * @param data[in, out]
1199  *   User structure.
1200  */
1201 static int
1202 mlx5_flow_create_eth(const struct rte_flow_item *item,
1203 		     const void *default_mask,
1204 		     void *data)
1205 {
1206 	const struct rte_flow_item_eth *spec = item->spec;
1207 	const struct rte_flow_item_eth *mask = item->mask;
1208 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1209 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1210 	struct ibv_flow_spec_eth eth = {
1211 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1212 		.size = eth_size,
1213 	};
1214 
1215 	/* Don't update layer for the inner pattern. */
1216 	if (!parser->inner)
1217 		parser->layer = HASH_RXQ_ETH;
1218 	if (spec) {
1219 		unsigned int i;
1220 
1221 		if (!mask)
1222 			mask = default_mask;
1223 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1224 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1225 		eth.val.ether_type = spec->type;
1226 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1227 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1228 		eth.mask.ether_type = mask->type;
1229 		/* Remove unwanted bits from values. */
1230 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1231 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1232 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1233 		}
1234 		eth.val.ether_type &= eth.mask.ether_type;
1235 	}
1236 	mlx5_flow_create_copy(parser, &eth, eth_size);
1237 	return 0;
1238 }
1239 
1240 /**
1241  * Convert VLAN item to Verbs specification.
1242  *
1243  * @param item[in]
1244  *   Item specification.
1245  * @param default_mask[in]
1246  *   Default bit-masks to use when item->mask is not provided.
1247  * @param data[in, out]
1248  *   User structure.
1249  */
1250 static int
1251 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1252 		      const void *default_mask,
1253 		      void *data)
1254 {
1255 	const struct rte_flow_item_vlan *spec = item->spec;
1256 	const struct rte_flow_item_vlan *mask = item->mask;
1257 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1258 	struct ibv_flow_spec_eth *eth;
1259 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1260 
1261 	if (spec) {
1262 		unsigned int i;
1263 		if (!mask)
1264 			mask = default_mask;
1265 
1266 		for (i = 0; i != hash_rxq_init_n; ++i) {
1267 			if (!parser->queue[i].ibv_attr)
1268 				continue;
1269 
1270 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1271 				       parser->queue[i].offset - eth_size);
1272 			eth->val.vlan_tag = spec->tci;
1273 			eth->mask.vlan_tag = mask->tci;
1274 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1275 		}
1276 	}
1277 	return 0;
1278 }
1279 
1280 /**
1281  * Convert IPv4 item to Verbs specification.
1282  *
1283  * @param item[in]
1284  *   Item specification.
1285  * @param default_mask[in]
1286  *   Default bit-masks to use when item->mask is not provided.
1287  * @param data[in, out]
1288  *   User structure.
1289  */
1290 static int
1291 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1292 		      const void *default_mask,
1293 		      void *data)
1294 {
1295 	const struct rte_flow_item_ipv4 *spec = item->spec;
1296 	const struct rte_flow_item_ipv4 *mask = item->mask;
1297 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1298 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1299 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1300 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1301 		.size = ipv4_size,
1302 	};
1303 
1304 	/* Don't update layer for the inner pattern. */
1305 	if (!parser->inner)
1306 		parser->layer = HASH_RXQ_IPV4;
1307 	if (spec) {
1308 		if (!mask)
1309 			mask = default_mask;
1310 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1311 			.src_ip = spec->hdr.src_addr,
1312 			.dst_ip = spec->hdr.dst_addr,
1313 			.proto = spec->hdr.next_proto_id,
1314 			.tos = spec->hdr.type_of_service,
1315 		};
1316 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1317 			.src_ip = mask->hdr.src_addr,
1318 			.dst_ip = mask->hdr.dst_addr,
1319 			.proto = mask->hdr.next_proto_id,
1320 			.tos = mask->hdr.type_of_service,
1321 		};
1322 		/* Remove unwanted bits from values. */
1323 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1324 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1325 		ipv4.val.proto &= ipv4.mask.proto;
1326 		ipv4.val.tos &= ipv4.mask.tos;
1327 	}
1328 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1329 	return 0;
1330 }
1331 
1332 /**
1333  * Convert IPv6 item to Verbs specification.
1334  *
1335  * @param item[in]
1336  *   Item specification.
1337  * @param default_mask[in]
1338  *   Default bit-masks to use when item->mask is not provided.
1339  * @param data[in, out]
1340  *   User structure.
1341  */
1342 static int
1343 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1344 		      const void *default_mask,
1345 		      void *data)
1346 {
1347 	const struct rte_flow_item_ipv6 *spec = item->spec;
1348 	const struct rte_flow_item_ipv6 *mask = item->mask;
1349 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1350 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1351 	struct ibv_flow_spec_ipv6 ipv6 = {
1352 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1353 		.size = ipv6_size,
1354 	};
1355 
1356 	/* Don't update layer for the inner pattern. */
1357 	if (!parser->inner)
1358 		parser->layer = HASH_RXQ_IPV6;
1359 	if (spec) {
1360 		unsigned int i;
1361 		uint32_t vtc_flow_val;
1362 		uint32_t vtc_flow_mask;
1363 
1364 		if (!mask)
1365 			mask = default_mask;
1366 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1367 		       RTE_DIM(ipv6.val.src_ip));
1368 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1369 		       RTE_DIM(ipv6.val.dst_ip));
1370 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1371 		       RTE_DIM(ipv6.mask.src_ip));
1372 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1373 		       RTE_DIM(ipv6.mask.dst_ip));
1374 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1375 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1376 		ipv6.val.flow_label =
1377 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1378 					 IPV6_HDR_FL_SHIFT);
1379 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1380 					 IPV6_HDR_TC_SHIFT;
1381 		ipv6.val.next_hdr = spec->hdr.proto;
1382 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1383 		ipv6.mask.flow_label =
1384 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1385 					 IPV6_HDR_FL_SHIFT);
1386 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1387 					  IPV6_HDR_TC_SHIFT;
1388 		ipv6.mask.next_hdr = mask->hdr.proto;
1389 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1390 		/* Remove unwanted bits from values. */
1391 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1392 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1393 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1394 		}
1395 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1396 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1397 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1398 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1399 	}
1400 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1401 	return 0;
1402 }
1403 
1404 /**
1405  * Convert UDP item to Verbs specification.
1406  *
1407  * @param item[in]
1408  *   Item specification.
1409  * @param default_mask[in]
1410  *   Default bit-masks to use when item->mask is not provided.
1411  * @param data[in, out]
1412  *   User structure.
1413  */
1414 static int
1415 mlx5_flow_create_udp(const struct rte_flow_item *item,
1416 		     const void *default_mask,
1417 		     void *data)
1418 {
1419 	const struct rte_flow_item_udp *spec = item->spec;
1420 	const struct rte_flow_item_udp *mask = item->mask;
1421 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1422 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1423 	struct ibv_flow_spec_tcp_udp udp = {
1424 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1425 		.size = udp_size,
1426 	};
1427 
1428 	/* Don't update layer for the inner pattern. */
1429 	if (!parser->inner) {
1430 		if (parser->layer == HASH_RXQ_IPV4)
1431 			parser->layer = HASH_RXQ_UDPV4;
1432 		else
1433 			parser->layer = HASH_RXQ_UDPV6;
1434 	}
1435 	if (spec) {
1436 		if (!mask)
1437 			mask = default_mask;
1438 		udp.val.dst_port = spec->hdr.dst_port;
1439 		udp.val.src_port = spec->hdr.src_port;
1440 		udp.mask.dst_port = mask->hdr.dst_port;
1441 		udp.mask.src_port = mask->hdr.src_port;
1442 		/* Remove unwanted bits from values. */
1443 		udp.val.src_port &= udp.mask.src_port;
1444 		udp.val.dst_port &= udp.mask.dst_port;
1445 	}
1446 	mlx5_flow_create_copy(parser, &udp, udp_size);
1447 	return 0;
1448 }
1449 
1450 /**
1451  * Convert TCP item to Verbs specification.
1452  *
1453  * @param item[in]
1454  *   Item specification.
1455  * @param default_mask[in]
1456  *   Default bit-masks to use when item->mask is not provided.
1457  * @param data[in, out]
1458  *   User structure.
1459  */
1460 static int
1461 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1462 		     const void *default_mask,
1463 		     void *data)
1464 {
1465 	const struct rte_flow_item_tcp *spec = item->spec;
1466 	const struct rte_flow_item_tcp *mask = item->mask;
1467 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1468 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1469 	struct ibv_flow_spec_tcp_udp tcp = {
1470 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1471 		.size = tcp_size,
1472 	};
1473 
1474 	/* Don't update layer for the inner pattern. */
1475 	if (!parser->inner) {
1476 		if (parser->layer == HASH_RXQ_IPV4)
1477 			parser->layer = HASH_RXQ_TCPV4;
1478 		else
1479 			parser->layer = HASH_RXQ_TCPV6;
1480 	}
1481 	if (spec) {
1482 		if (!mask)
1483 			mask = default_mask;
1484 		tcp.val.dst_port = spec->hdr.dst_port;
1485 		tcp.val.src_port = spec->hdr.src_port;
1486 		tcp.mask.dst_port = mask->hdr.dst_port;
1487 		tcp.mask.src_port = mask->hdr.src_port;
1488 		/* Remove unwanted bits from values. */
1489 		tcp.val.src_port &= tcp.mask.src_port;
1490 		tcp.val.dst_port &= tcp.mask.dst_port;
1491 	}
1492 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1493 	return 0;
1494 }
1495 
1496 /**
1497  * Convert VXLAN item to Verbs specification.
1498  *
1499  * @param item[in]
1500  *   Item specification.
1501  * @param default_mask[in]
1502  *   Default bit-masks to use when item->mask is not provided.
1503  * @param data[in, out]
1504  *   User structure.
1505  */
1506 static int
1507 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1508 		       const void *default_mask,
1509 		       void *data)
1510 {
1511 	const struct rte_flow_item_vxlan *spec = item->spec;
1512 	const struct rte_flow_item_vxlan *mask = item->mask;
1513 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1514 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1515 	struct ibv_flow_spec_tunnel vxlan = {
1516 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1517 		.size = size,
1518 	};
1519 	union vni {
1520 		uint32_t vlan_id;
1521 		uint8_t vni[4];
1522 	} id;
1523 
1524 	id.vni[0] = 0;
1525 	parser->inner = IBV_FLOW_SPEC_INNER;
1526 	if (spec) {
1527 		if (!mask)
1528 			mask = default_mask;
1529 		memcpy(&id.vni[1], spec->vni, 3);
1530 		vxlan.val.tunnel_id = id.vlan_id;
1531 		memcpy(&id.vni[1], mask->vni, 3);
1532 		vxlan.mask.tunnel_id = id.vlan_id;
1533 		/* Remove unwanted bits from values. */
1534 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1535 	}
1536 	/*
1537 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1538 	 * layer is defined in the Verbs specification it is interpreted as
1539 	 * wildcard and all packets will match this rule, if it follows a full
1540 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1541 	 * before will also match this rule.
1542 	 * To avoid such situation, VNI 0 is currently refused.
1543 	 */
1544 	if (!vxlan.val.tunnel_id)
1545 		return EINVAL;
1546 	mlx5_flow_create_copy(parser, &vxlan, size);
1547 	return 0;
1548 }
1549 
1550 /**
1551  * Convert mark/flag action to Verbs specification.
1552  *
1553  * @param parser
1554  *   Internal parser structure.
1555  * @param mark_id
1556  *   Mark identifier.
1557  */
1558 static int
1559 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1560 {
1561 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1562 	struct ibv_flow_spec_action_tag tag = {
1563 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1564 		.size = size,
1565 		.tag_id = mlx5_flow_mark_set(mark_id),
1566 	};
1567 
1568 	assert(parser->mark);
1569 	mlx5_flow_create_copy(parser, &tag, size);
1570 	return 0;
1571 }
1572 
1573 /**
1574  * Convert count action to Verbs specification.
1575  *
1576  * @param priv
1577  *   Pointer to private structure.
1578  * @param parser
1579  *   Pointer to MLX5 flow parser structure.
1580  *
1581  * @return
1582  *   0 on success, errno value on failure.
1583  */
1584 static int
1585 mlx5_flow_create_count(struct priv *priv __rte_unused,
1586 		       struct mlx5_flow_parse *parser __rte_unused)
1587 {
1588 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1589 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1590 	struct ibv_counter_set_init_attr init_attr = {0};
1591 	struct ibv_flow_spec_counter_action counter = {
1592 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1593 		.size = size,
1594 		.counter_set_handle = 0,
1595 	};
1596 
1597 	init_attr.counter_set_id = 0;
1598 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1599 	if (!parser->cs)
1600 		return EINVAL;
1601 	counter.counter_set_handle = parser->cs->handle;
1602 	mlx5_flow_create_copy(parser, &counter, size);
1603 #endif
1604 	return 0;
1605 }
1606 
1607 /**
1608  * Complete flow rule creation with a drop queue.
1609  *
1610  * @param priv
1611  *   Pointer to private structure.
1612  * @param parser
1613  *   Internal parser structure.
1614  * @param flow
1615  *   Pointer to the rte_flow.
1616  * @param[out] error
1617  *   Perform verbose error reporting if not NULL.
1618  *
1619  * @return
1620  *   0 on success, errno value on failure.
1621  */
1622 static int
1623 priv_flow_create_action_queue_drop(struct priv *priv,
1624 				   struct mlx5_flow_parse *parser,
1625 				   struct rte_flow *flow,
1626 				   struct rte_flow_error *error)
1627 {
1628 	struct ibv_flow_spec_action_drop *drop;
1629 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1630 	int err = 0;
1631 
1632 	assert(priv->pd);
1633 	assert(priv->ctx);
1634 	flow->drop = 1;
1635 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1636 			parser->queue[HASH_RXQ_ETH].offset);
1637 	*drop = (struct ibv_flow_spec_action_drop){
1638 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1639 			.size = size,
1640 	};
1641 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1642 	parser->queue[HASH_RXQ_ETH].offset += size;
1643 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1644 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1645 	if (parser->count)
1646 		flow->cs = parser->cs;
1647 	if (!priv->dev->data->dev_started)
1648 		return 0;
1649 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1650 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1651 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1652 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
1653 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1654 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1655 				   NULL, "flow rule creation failure");
1656 		err = ENOMEM;
1657 		goto error;
1658 	}
1659 	return 0;
1660 error:
1661 	assert(flow);
1662 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1663 		claim_zero(mlx5_glue->destroy_flow
1664 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1665 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1666 	}
1667 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1668 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1669 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1670 	}
1671 	if (flow->cs) {
1672 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1673 		flow->cs = NULL;
1674 		parser->cs = NULL;
1675 	}
1676 	return err;
1677 }
1678 
1679 /**
1680  * Create hash Rx queues when RSS is enabled.
1681  *
1682  * @param priv
1683  *   Pointer to private structure.
1684  * @param parser
1685  *   Internal parser structure.
1686  * @param flow
1687  *   Pointer to the rte_flow.
1688  * @param[out] error
1689  *   Perform verbose error reporting if not NULL.
1690  *
1691  * @return
1692  *   0 on success, a errno value otherwise and rte_errno is set.
1693  */
1694 static int
1695 priv_flow_create_action_queue_rss(struct priv *priv,
1696 				  struct mlx5_flow_parse *parser,
1697 				  struct rte_flow *flow,
1698 				  struct rte_flow_error *error)
1699 {
1700 	unsigned int i;
1701 
1702 	for (i = 0; i != hash_rxq_init_n; ++i) {
1703 		uint64_t hash_fields;
1704 
1705 		if (!parser->queue[i].ibv_attr)
1706 			continue;
1707 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1708 		parser->queue[i].ibv_attr = NULL;
1709 		hash_fields = hash_rxq_init[i].hash_fields;
1710 		if (!priv->dev->data->dev_started)
1711 			continue;
1712 		flow->frxq[i].hrxq =
1713 			mlx5_priv_hrxq_get(priv,
1714 					   parser->rss_conf.rss_key,
1715 					   parser->rss_conf.rss_key_len,
1716 					   hash_fields,
1717 					   parser->queues,
1718 					   parser->queues_n);
1719 		if (flow->frxq[i].hrxq)
1720 			continue;
1721 		flow->frxq[i].hrxq =
1722 			mlx5_priv_hrxq_new(priv,
1723 					   parser->rss_conf.rss_key,
1724 					   parser->rss_conf.rss_key_len,
1725 					   hash_fields,
1726 					   parser->queues,
1727 					   parser->queues_n);
1728 		if (!flow->frxq[i].hrxq) {
1729 			rte_flow_error_set(error, ENOMEM,
1730 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1731 					   NULL, "cannot create hash rxq");
1732 			return ENOMEM;
1733 		}
1734 	}
1735 	return 0;
1736 }
1737 
1738 /**
1739  * Complete flow rule creation.
1740  *
1741  * @param priv
1742  *   Pointer to private structure.
1743  * @param parser
1744  *   Internal parser structure.
1745  * @param flow
1746  *   Pointer to the rte_flow.
1747  * @param[out] error
1748  *   Perform verbose error reporting if not NULL.
1749  *
1750  * @return
1751  *   0 on success, a errno value otherwise and rte_errno is set.
1752  */
1753 static int
1754 priv_flow_create_action_queue(struct priv *priv,
1755 			      struct mlx5_flow_parse *parser,
1756 			      struct rte_flow *flow,
1757 			      struct rte_flow_error *error)
1758 {
1759 	int err = 0;
1760 	unsigned int i;
1761 
1762 	assert(priv->pd);
1763 	assert(priv->ctx);
1764 	assert(!parser->drop);
1765 	err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1766 	if (err)
1767 		goto error;
1768 	if (parser->count)
1769 		flow->cs = parser->cs;
1770 	if (!priv->dev->data->dev_started)
1771 		return 0;
1772 	for (i = 0; i != hash_rxq_init_n; ++i) {
1773 		if (!flow->frxq[i].hrxq)
1774 			continue;
1775 		flow->frxq[i].ibv_flow =
1776 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1777 					       flow->frxq[i].ibv_attr);
1778 		if (!flow->frxq[i].ibv_flow) {
1779 			rte_flow_error_set(error, ENOMEM,
1780 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1781 					   NULL, "flow rule creation failure");
1782 			err = ENOMEM;
1783 			goto error;
1784 		}
1785 		DEBUG("%p type %d QP %p ibv_flow %p",
1786 		      (void *)flow, i,
1787 		      (void *)flow->frxq[i].hrxq,
1788 		      (void *)flow->frxq[i].ibv_flow);
1789 	}
1790 	for (i = 0; i != parser->queues_n; ++i) {
1791 		struct mlx5_rxq_data *q =
1792 			(*priv->rxqs)[parser->queues[i]];
1793 
1794 		q->mark |= parser->mark;
1795 	}
1796 	return 0;
1797 error:
1798 	assert(flow);
1799 	for (i = 0; i != hash_rxq_init_n; ++i) {
1800 		if (flow->frxq[i].ibv_flow) {
1801 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1802 
1803 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1804 		}
1805 		if (flow->frxq[i].hrxq)
1806 			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1807 		if (flow->frxq[i].ibv_attr)
1808 			rte_free(flow->frxq[i].ibv_attr);
1809 	}
1810 	if (flow->cs) {
1811 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1812 		flow->cs = NULL;
1813 		parser->cs = NULL;
1814 	}
1815 	return err;
1816 }
1817 
1818 /**
1819  * Convert a flow.
1820  *
1821  * @param priv
1822  *   Pointer to private structure.
1823  * @param list
1824  *   Pointer to a TAILQ flow list.
1825  * @param[in] attr
1826  *   Flow rule attributes.
1827  * @param[in] pattern
1828  *   Pattern specification (list terminated by the END pattern item).
1829  * @param[in] actions
1830  *   Associated actions (list terminated by the END action).
1831  * @param[out] error
1832  *   Perform verbose error reporting if not NULL.
1833  *
1834  * @return
1835  *   A flow on success, NULL otherwise.
1836  */
1837 static struct rte_flow *
1838 priv_flow_create(struct priv *priv,
1839 		 struct mlx5_flows *list,
1840 		 const struct rte_flow_attr *attr,
1841 		 const struct rte_flow_item items[],
1842 		 const struct rte_flow_action actions[],
1843 		 struct rte_flow_error *error)
1844 {
1845 	struct mlx5_flow_parse parser = { .create = 1, };
1846 	struct rte_flow *flow = NULL;
1847 	unsigned int i;
1848 	int err;
1849 
1850 	err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1851 	if (err)
1852 		goto exit;
1853 	flow = rte_calloc(__func__, 1,
1854 			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1855 			  0);
1856 	if (!flow) {
1857 		rte_flow_error_set(error, ENOMEM,
1858 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1859 				   NULL,
1860 				   "cannot allocate flow memory");
1861 		return NULL;
1862 	}
1863 	/* Copy queues configuration. */
1864 	flow->queues = (uint16_t (*)[])(flow + 1);
1865 	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1866 	flow->queues_n = parser.queues_n;
1867 	flow->mark = parser.mark;
1868 	/* Copy RSS configuration. */
1869 	flow->rss_conf = parser.rss_conf;
1870 	flow->rss_conf.rss_key = flow->rss_key;
1871 	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1872 	/* finalise the flow. */
1873 	if (parser.drop)
1874 		err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1875 							 error);
1876 	else
1877 		err = priv_flow_create_action_queue(priv, &parser, flow, error);
1878 	if (err)
1879 		goto exit;
1880 	TAILQ_INSERT_TAIL(list, flow, next);
1881 	DEBUG("Flow created %p", (void *)flow);
1882 	return flow;
1883 exit:
1884 	ERROR("flow creation error: %s", error->message);
1885 	for (i = 0; i != hash_rxq_init_n; ++i) {
1886 		if (parser.queue[i].ibv_attr)
1887 			rte_free(parser.queue[i].ibv_attr);
1888 	}
1889 	rte_free(flow);
1890 	return NULL;
1891 }
1892 
1893 /**
1894  * Validate a flow supported by the NIC.
1895  *
1896  * @see rte_flow_validate()
1897  * @see rte_flow_ops
1898  */
1899 int
1900 mlx5_flow_validate(struct rte_eth_dev *dev,
1901 		   const struct rte_flow_attr *attr,
1902 		   const struct rte_flow_item items[],
1903 		   const struct rte_flow_action actions[],
1904 		   struct rte_flow_error *error)
1905 {
1906 	struct priv *priv = dev->data->dev_private;
1907 	int ret;
1908 	struct mlx5_flow_parse parser = { .create = 0, };
1909 
1910 	priv_lock(priv);
1911 	ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1912 	priv_unlock(priv);
1913 	return ret;
1914 }
1915 
1916 /**
1917  * Create a flow.
1918  *
1919  * @see rte_flow_create()
1920  * @see rte_flow_ops
1921  */
1922 struct rte_flow *
1923 mlx5_flow_create(struct rte_eth_dev *dev,
1924 		 const struct rte_flow_attr *attr,
1925 		 const struct rte_flow_item items[],
1926 		 const struct rte_flow_action actions[],
1927 		 struct rte_flow_error *error)
1928 {
1929 	struct priv *priv = dev->data->dev_private;
1930 	struct rte_flow *flow;
1931 
1932 	priv_lock(priv);
1933 	flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1934 				error);
1935 	priv_unlock(priv);
1936 	return flow;
1937 }
1938 
1939 /**
1940  * Destroy a flow.
1941  *
1942  * @param priv
1943  *   Pointer to private structure.
1944  * @param list
1945  *   Pointer to a TAILQ flow list.
1946  * @param[in] flow
1947  *   Flow to destroy.
1948  */
1949 static void
1950 priv_flow_destroy(struct priv *priv,
1951 		  struct mlx5_flows *list,
1952 		  struct rte_flow *flow)
1953 {
1954 	unsigned int i;
1955 
1956 	if (flow->drop || !flow->mark)
1957 		goto free;
1958 	for (i = 0; i != flow->queues_n; ++i) {
1959 		struct rte_flow *tmp;
1960 		int mark = 0;
1961 
1962 		/*
1963 		 * To remove the mark from the queue, the queue must not be
1964 		 * present in any other marked flow (RSS or not).
1965 		 */
1966 		TAILQ_FOREACH(tmp, list, next) {
1967 			unsigned int j;
1968 			uint16_t *tqs = NULL;
1969 			uint16_t tq_n = 0;
1970 
1971 			if (!tmp->mark)
1972 				continue;
1973 			for (j = 0; j != hash_rxq_init_n; ++j) {
1974 				if (!tmp->frxq[j].hrxq)
1975 					continue;
1976 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
1977 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1978 			}
1979 			if (!tq_n)
1980 				continue;
1981 			for (j = 0; (j != tq_n) && !mark; j++)
1982 				if (tqs[j] == (*flow->queues)[i])
1983 					mark = 1;
1984 		}
1985 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1986 	}
1987 free:
1988 	if (flow->drop) {
1989 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
1990 			claim_zero(mlx5_glue->destroy_flow
1991 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1992 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1993 	} else {
1994 		for (i = 0; i != hash_rxq_init_n; ++i) {
1995 			struct mlx5_flow *frxq = &flow->frxq[i];
1996 
1997 			if (frxq->ibv_flow)
1998 				claim_zero(mlx5_glue->destroy_flow
1999 					   (frxq->ibv_flow));
2000 			if (frxq->hrxq)
2001 				mlx5_priv_hrxq_release(priv, frxq->hrxq);
2002 			if (frxq->ibv_attr)
2003 				rte_free(frxq->ibv_attr);
2004 		}
2005 	}
2006 	if (flow->cs) {
2007 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2008 		flow->cs = NULL;
2009 	}
2010 	TAILQ_REMOVE(list, flow, next);
2011 	DEBUG("Flow destroyed %p", (void *)flow);
2012 	rte_free(flow);
2013 }
2014 
2015 /**
2016  * Destroy all flows.
2017  *
2018  * @param priv
2019  *   Pointer to private structure.
2020  * @param list
2021  *   Pointer to a TAILQ flow list.
2022  */
2023 void
2024 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2025 {
2026 	while (!TAILQ_EMPTY(list)) {
2027 		struct rte_flow *flow;
2028 
2029 		flow = TAILQ_FIRST(list);
2030 		priv_flow_destroy(priv, list, flow);
2031 	}
2032 }
2033 
2034 /**
2035  * Create drop queue.
2036  *
2037  * @param priv
2038  *   Pointer to private structure.
2039  *
2040  * @return
2041  *   0 on success.
2042  */
2043 int
2044 priv_flow_create_drop_queue(struct priv *priv)
2045 {
2046 	struct mlx5_hrxq_drop *fdq = NULL;
2047 
2048 	assert(priv->pd);
2049 	assert(priv->ctx);
2050 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2051 	if (!fdq) {
2052 		WARN("cannot allocate memory for drop queue");
2053 		goto error;
2054 	}
2055 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2056 	if (!fdq->cq) {
2057 		WARN("cannot allocate CQ for drop queue");
2058 		goto error;
2059 	}
2060 	fdq->wq = mlx5_glue->create_wq
2061 		(priv->ctx,
2062 		 &(struct ibv_wq_init_attr){
2063 			.wq_type = IBV_WQT_RQ,
2064 			.max_wr = 1,
2065 			.max_sge = 1,
2066 			.pd = priv->pd,
2067 			.cq = fdq->cq,
2068 		 });
2069 	if (!fdq->wq) {
2070 		WARN("cannot allocate WQ for drop queue");
2071 		goto error;
2072 	}
2073 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2074 		(priv->ctx,
2075 		 &(struct ibv_rwq_ind_table_init_attr){
2076 			.log_ind_tbl_size = 0,
2077 			.ind_tbl = &fdq->wq,
2078 			.comp_mask = 0,
2079 		 });
2080 	if (!fdq->ind_table) {
2081 		WARN("cannot allocate indirection table for drop queue");
2082 		goto error;
2083 	}
2084 	fdq->qp = mlx5_glue->create_qp_ex
2085 		(priv->ctx,
2086 		 &(struct ibv_qp_init_attr_ex){
2087 			.qp_type = IBV_QPT_RAW_PACKET,
2088 			.comp_mask =
2089 				IBV_QP_INIT_ATTR_PD |
2090 				IBV_QP_INIT_ATTR_IND_TABLE |
2091 				IBV_QP_INIT_ATTR_RX_HASH,
2092 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2093 				.rx_hash_function =
2094 					IBV_RX_HASH_FUNC_TOEPLITZ,
2095 				.rx_hash_key_len = rss_hash_default_key_len,
2096 				.rx_hash_key = rss_hash_default_key,
2097 				.rx_hash_fields_mask = 0,
2098 				},
2099 			.rwq_ind_tbl = fdq->ind_table,
2100 			.pd = priv->pd
2101 		 });
2102 	if (!fdq->qp) {
2103 		WARN("cannot allocate QP for drop queue");
2104 		goto error;
2105 	}
2106 	priv->flow_drop_queue = fdq;
2107 	return 0;
2108 error:
2109 	if (fdq->qp)
2110 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2111 	if (fdq->ind_table)
2112 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2113 	if (fdq->wq)
2114 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2115 	if (fdq->cq)
2116 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2117 	if (fdq)
2118 		rte_free(fdq);
2119 	priv->flow_drop_queue = NULL;
2120 	return -1;
2121 }
2122 
2123 /**
2124  * Delete drop queue.
2125  *
2126  * @param priv
2127  *   Pointer to private structure.
2128  */
2129 void
2130 priv_flow_delete_drop_queue(struct priv *priv)
2131 {
2132 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2133 
2134 	if (!fdq)
2135 		return;
2136 	if (fdq->qp)
2137 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2138 	if (fdq->ind_table)
2139 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2140 	if (fdq->wq)
2141 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2142 	if (fdq->cq)
2143 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2144 	rte_free(fdq);
2145 	priv->flow_drop_queue = NULL;
2146 }
2147 
2148 /**
2149  * Remove all flows.
2150  *
2151  * @param priv
2152  *   Pointer to private structure.
2153  * @param list
2154  *   Pointer to a TAILQ flow list.
2155  */
2156 void
2157 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2158 {
2159 	struct rte_flow *flow;
2160 
2161 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2162 		unsigned int i;
2163 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2164 
2165 		if (flow->drop) {
2166 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2167 				continue;
2168 			claim_zero(mlx5_glue->destroy_flow
2169 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2170 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2171 			DEBUG("Flow %p removed", (void *)flow);
2172 			/* Next flow. */
2173 			continue;
2174 		}
2175 		/* Verify the flow has not already been cleaned. */
2176 		for (i = 0; i != hash_rxq_init_n; ++i) {
2177 			if (!flow->frxq[i].ibv_flow)
2178 				continue;
2179 			/*
2180 			 * Indirection table may be necessary to remove the
2181 			 * flags in the Rx queues.
2182 			 * This helps to speed-up the process by avoiding
2183 			 * another loop.
2184 			 */
2185 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2186 			break;
2187 		}
2188 		if (i == hash_rxq_init_n)
2189 			return;
2190 		if (flow->mark) {
2191 			assert(ind_tbl);
2192 			for (i = 0; i != ind_tbl->queues_n; ++i)
2193 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2194 		}
2195 		for (i = 0; i != hash_rxq_init_n; ++i) {
2196 			if (!flow->frxq[i].ibv_flow)
2197 				continue;
2198 			claim_zero(mlx5_glue->destroy_flow
2199 				   (flow->frxq[i].ibv_flow));
2200 			flow->frxq[i].ibv_flow = NULL;
2201 			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2202 			flow->frxq[i].hrxq = NULL;
2203 		}
2204 		DEBUG("Flow %p removed", (void *)flow);
2205 	}
2206 }
2207 
2208 /**
2209  * Add all flows.
2210  *
2211  * @param priv
2212  *   Pointer to private structure.
2213  * @param list
2214  *   Pointer to a TAILQ flow list.
2215  *
2216  * @return
2217  *   0 on success, a errno value otherwise and rte_errno is set.
2218  */
2219 int
2220 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2221 {
2222 	struct rte_flow *flow;
2223 
2224 	TAILQ_FOREACH(flow, list, next) {
2225 		unsigned int i;
2226 
2227 		if (flow->drop) {
2228 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2229 				mlx5_glue->create_flow
2230 				(priv->flow_drop_queue->qp,
2231 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2232 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2233 				DEBUG("Flow %p cannot be applied",
2234 				      (void *)flow);
2235 				rte_errno = EINVAL;
2236 				return rte_errno;
2237 			}
2238 			DEBUG("Flow %p applied", (void *)flow);
2239 			/* Next flow. */
2240 			continue;
2241 		}
2242 		for (i = 0; i != hash_rxq_init_n; ++i) {
2243 			if (!flow->frxq[i].ibv_attr)
2244 				continue;
2245 			flow->frxq[i].hrxq =
2246 				mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2247 						   flow->rss_conf.rss_key_len,
2248 						   hash_rxq_init[i].hash_fields,
2249 						   (*flow->queues),
2250 						   flow->queues_n);
2251 			if (flow->frxq[i].hrxq)
2252 				goto flow_create;
2253 			flow->frxq[i].hrxq =
2254 				mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2255 						   flow->rss_conf.rss_key_len,
2256 						   hash_rxq_init[i].hash_fields,
2257 						   (*flow->queues),
2258 						   flow->queues_n);
2259 			if (!flow->frxq[i].hrxq) {
2260 				DEBUG("Flow %p cannot be applied",
2261 				      (void *)flow);
2262 				rte_errno = EINVAL;
2263 				return rte_errno;
2264 			}
2265 flow_create:
2266 			flow->frxq[i].ibv_flow =
2267 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2268 						       flow->frxq[i].ibv_attr);
2269 			if (!flow->frxq[i].ibv_flow) {
2270 				DEBUG("Flow %p cannot be applied",
2271 				      (void *)flow);
2272 				rte_errno = EINVAL;
2273 				return rte_errno;
2274 			}
2275 			DEBUG("Flow %p applied", (void *)flow);
2276 		}
2277 		if (!flow->mark)
2278 			continue;
2279 		for (i = 0; i != flow->queues_n; ++i)
2280 			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2281 	}
2282 	return 0;
2283 }
2284 
2285 /**
2286  * Verify the flow list is empty
2287  *
2288  * @param priv
2289  *  Pointer to private structure.
2290  *
2291  * @return the number of flows not released.
2292  */
2293 int
2294 priv_flow_verify(struct priv *priv)
2295 {
2296 	struct rte_flow *flow;
2297 	int ret = 0;
2298 
2299 	TAILQ_FOREACH(flow, &priv->flows, next) {
2300 		DEBUG("%p: flow %p still referenced", (void *)priv,
2301 		      (void *)flow);
2302 		++ret;
2303 	}
2304 	return ret;
2305 }
2306 
2307 /**
2308  * Enable a control flow configured from the control plane.
2309  *
2310  * @param dev
2311  *   Pointer to Ethernet device.
2312  * @param eth_spec
2313  *   An Ethernet flow spec to apply.
2314  * @param eth_mask
2315  *   An Ethernet flow mask to apply.
2316  * @param vlan_spec
2317  *   A VLAN flow spec to apply.
2318  * @param vlan_mask
2319  *   A VLAN flow mask to apply.
2320  *
2321  * @return
2322  *   0 on success.
2323  */
2324 int
2325 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2326 		    struct rte_flow_item_eth *eth_spec,
2327 		    struct rte_flow_item_eth *eth_mask,
2328 		    struct rte_flow_item_vlan *vlan_spec,
2329 		    struct rte_flow_item_vlan *vlan_mask)
2330 {
2331 	struct priv *priv = dev->data->dev_private;
2332 	const struct rte_flow_attr attr = {
2333 		.ingress = 1,
2334 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2335 	};
2336 	struct rte_flow_item items[] = {
2337 		{
2338 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2339 			.spec = eth_spec,
2340 			.last = NULL,
2341 			.mask = eth_mask,
2342 		},
2343 		{
2344 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2345 				RTE_FLOW_ITEM_TYPE_END,
2346 			.spec = vlan_spec,
2347 			.last = NULL,
2348 			.mask = vlan_mask,
2349 		},
2350 		{
2351 			.type = RTE_FLOW_ITEM_TYPE_END,
2352 		},
2353 	};
2354 	struct rte_flow_action actions[] = {
2355 		{
2356 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2357 		},
2358 		{
2359 			.type = RTE_FLOW_ACTION_TYPE_END,
2360 		},
2361 	};
2362 	struct rte_flow *flow;
2363 	struct rte_flow_error error;
2364 	unsigned int i;
2365 	union {
2366 		struct rte_flow_action_rss rss;
2367 		struct {
2368 			const struct rte_eth_rss_conf *rss_conf;
2369 			uint16_t num;
2370 			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2371 		} local;
2372 	} action_rss;
2373 
2374 	if (!priv->reta_idx_n)
2375 		return EINVAL;
2376 	for (i = 0; i != priv->reta_idx_n; ++i)
2377 		action_rss.local.queue[i] = (*priv->reta_idx)[i];
2378 	action_rss.local.rss_conf = &priv->rss_conf;
2379 	action_rss.local.num = priv->reta_idx_n;
2380 	actions[0].conf = (const void *)&action_rss.rss;
2381 	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2382 				&error);
2383 	if (!flow)
2384 		return rte_errno;
2385 	return 0;
2386 }
2387 
2388 /**
2389  * Enable a flow control configured from the control plane.
2390  *
2391  * @param dev
2392  *   Pointer to Ethernet device.
2393  * @param eth_spec
2394  *   An Ethernet flow spec to apply.
2395  * @param eth_mask
2396  *   An Ethernet flow mask to apply.
2397  *
2398  * @return
2399  *   0 on success.
2400  */
2401 int
2402 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2403 	       struct rte_flow_item_eth *eth_spec,
2404 	       struct rte_flow_item_eth *eth_mask)
2405 {
2406 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2407 }
2408 
2409 /**
2410  * Destroy a flow.
2411  *
2412  * @see rte_flow_destroy()
2413  * @see rte_flow_ops
2414  */
2415 int
2416 mlx5_flow_destroy(struct rte_eth_dev *dev,
2417 		  struct rte_flow *flow,
2418 		  struct rte_flow_error *error)
2419 {
2420 	struct priv *priv = dev->data->dev_private;
2421 
2422 	(void)error;
2423 	priv_lock(priv);
2424 	priv_flow_destroy(priv, &priv->flows, flow);
2425 	priv_unlock(priv);
2426 	return 0;
2427 }
2428 
2429 /**
2430  * Destroy all flows.
2431  *
2432  * @see rte_flow_flush()
2433  * @see rte_flow_ops
2434  */
2435 int
2436 mlx5_flow_flush(struct rte_eth_dev *dev,
2437 		struct rte_flow_error *error)
2438 {
2439 	struct priv *priv = dev->data->dev_private;
2440 
2441 	(void)error;
2442 	priv_lock(priv);
2443 	priv_flow_flush(priv, &priv->flows);
2444 	priv_unlock(priv);
2445 	return 0;
2446 }
2447 
2448 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2449 /**
2450  * Query flow counter.
2451  *
2452  * @param cs
2453  *   the counter set.
2454  * @param counter_value
2455  *   returned data from the counter.
2456  *
2457  * @return
2458  *   0 on success, a errno value otherwise and rte_errno is set.
2459  */
2460 static int
2461 priv_flow_query_count(struct ibv_counter_set *cs,
2462 		      struct mlx5_flow_counter_stats *counter_stats,
2463 		      struct rte_flow_query_count *query_count,
2464 		      struct rte_flow_error *error)
2465 {
2466 	uint64_t counters[2];
2467 	struct ibv_query_counter_set_attr query_cs_attr = {
2468 		.cs = cs,
2469 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2470 	};
2471 	struct ibv_counter_set_data query_out = {
2472 		.out = counters,
2473 		.outlen = 2 * sizeof(uint64_t),
2474 	};
2475 	int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2476 
2477 	if (res) {
2478 		rte_flow_error_set(error, -res,
2479 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2480 				   NULL,
2481 				   "cannot read counter");
2482 		return -res;
2483 	}
2484 	query_count->hits_set = 1;
2485 	query_count->bytes_set = 1;
2486 	query_count->hits = counters[0] - counter_stats->hits;
2487 	query_count->bytes = counters[1] - counter_stats->bytes;
2488 	if (query_count->reset) {
2489 		counter_stats->hits = counters[0];
2490 		counter_stats->bytes = counters[1];
2491 	}
2492 	return 0;
2493 }
2494 
2495 /**
2496  * Query a flows.
2497  *
2498  * @see rte_flow_query()
2499  * @see rte_flow_ops
2500  */
2501 int
2502 mlx5_flow_query(struct rte_eth_dev *dev,
2503 		struct rte_flow *flow,
2504 		enum rte_flow_action_type action __rte_unused,
2505 		void *data,
2506 		struct rte_flow_error *error)
2507 {
2508 	struct priv *priv = dev->data->dev_private;
2509 	int res = EINVAL;
2510 
2511 	priv_lock(priv);
2512 	if (flow->cs) {
2513 		res = priv_flow_query_count(flow->cs,
2514 					&flow->counter_stats,
2515 					(struct rte_flow_query_count *)data,
2516 					error);
2517 	} else {
2518 		rte_flow_error_set(error, res,
2519 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2520 				   NULL,
2521 				   "no counter found for flow");
2522 	}
2523 	priv_unlock(priv);
2524 	return -res;
2525 }
2526 #endif
2527 
2528 /**
2529  * Isolated mode.
2530  *
2531  * @see rte_flow_isolate()
2532  * @see rte_flow_ops
2533  */
2534 int
2535 mlx5_flow_isolate(struct rte_eth_dev *dev,
2536 		  int enable,
2537 		  struct rte_flow_error *error)
2538 {
2539 	struct priv *priv = dev->data->dev_private;
2540 
2541 	priv_lock(priv);
2542 	if (dev->data->dev_started) {
2543 		rte_flow_error_set(error, EBUSY,
2544 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2545 				   NULL,
2546 				   "port must be stopped first");
2547 		priv_unlock(priv);
2548 		return -rte_errno;
2549 	}
2550 	priv->isolated = !!enable;
2551 	if (enable)
2552 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2553 	else
2554 		priv->dev->dev_ops = &mlx5_dev_ops;
2555 	priv_unlock(priv);
2556 	return 0;
2557 }
2558 
2559 /**
2560  * Convert a flow director filter to a generic flow.
2561  *
2562  * @param priv
2563  *   Private structure.
2564  * @param fdir_filter
2565  *   Flow director filter to add.
2566  * @param attributes
2567  *   Generic flow parameters structure.
2568  *
2569  * @return
2570  *  0 on success, errno value on error.
2571  */
2572 static int
2573 priv_fdir_filter_convert(struct priv *priv,
2574 			 const struct rte_eth_fdir_filter *fdir_filter,
2575 			 struct mlx5_fdir *attributes)
2576 {
2577 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2578 
2579 	/* Validate queue number. */
2580 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2581 		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2582 		return EINVAL;
2583 	}
2584 	attributes->attr.ingress = 1;
2585 	attributes->items[0] = (struct rte_flow_item) {
2586 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2587 		.spec = &attributes->l2,
2588 		.mask = &attributes->l2_mask,
2589 	};
2590 	switch (fdir_filter->action.behavior) {
2591 	case RTE_ETH_FDIR_ACCEPT:
2592 		attributes->actions[0] = (struct rte_flow_action){
2593 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2594 			.conf = &attributes->queue,
2595 		};
2596 		break;
2597 	case RTE_ETH_FDIR_REJECT:
2598 		attributes->actions[0] = (struct rte_flow_action){
2599 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2600 		};
2601 		break;
2602 	default:
2603 		ERROR("invalid behavior %d", fdir_filter->action.behavior);
2604 		return ENOTSUP;
2605 	}
2606 	attributes->queue.index = fdir_filter->action.rx_queue;
2607 	switch (fdir_filter->input.flow_type) {
2608 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2609 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2610 			.src_addr = input->flow.udp4_flow.ip.src_ip,
2611 			.dst_addr = input->flow.udp4_flow.ip.dst_ip,
2612 			.time_to_live = input->flow.udp4_flow.ip.ttl,
2613 			.type_of_service = input->flow.udp4_flow.ip.tos,
2614 			.next_proto_id = input->flow.udp4_flow.ip.proto,
2615 		};
2616 		attributes->l4.udp.hdr = (struct udp_hdr){
2617 			.src_port = input->flow.udp4_flow.src_port,
2618 			.dst_port = input->flow.udp4_flow.dst_port,
2619 		};
2620 		attributes->items[1] = (struct rte_flow_item){
2621 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2622 			.spec = &attributes->l3,
2623 			.mask = &attributes->l3,
2624 		};
2625 		attributes->items[2] = (struct rte_flow_item){
2626 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2627 			.spec = &attributes->l4,
2628 			.mask = &attributes->l4,
2629 		};
2630 		break;
2631 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2632 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2633 			.src_addr = input->flow.tcp4_flow.ip.src_ip,
2634 			.dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2635 			.time_to_live = input->flow.tcp4_flow.ip.ttl,
2636 			.type_of_service = input->flow.tcp4_flow.ip.tos,
2637 			.next_proto_id = input->flow.tcp4_flow.ip.proto,
2638 		};
2639 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2640 			.src_port = input->flow.tcp4_flow.src_port,
2641 			.dst_port = input->flow.tcp4_flow.dst_port,
2642 		};
2643 		attributes->items[1] = (struct rte_flow_item){
2644 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2645 			.spec = &attributes->l3,
2646 			.mask = &attributes->l3,
2647 		};
2648 		attributes->items[2] = (struct rte_flow_item){
2649 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2650 			.spec = &attributes->l4,
2651 			.mask = &attributes->l4,
2652 		};
2653 		break;
2654 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2655 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2656 			.src_addr = input->flow.ip4_flow.src_ip,
2657 			.dst_addr = input->flow.ip4_flow.dst_ip,
2658 			.time_to_live = input->flow.ip4_flow.ttl,
2659 			.type_of_service = input->flow.ip4_flow.tos,
2660 			.next_proto_id = input->flow.ip4_flow.proto,
2661 		};
2662 		attributes->items[1] = (struct rte_flow_item){
2663 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2664 			.spec = &attributes->l3,
2665 			.mask = &attributes->l3,
2666 		};
2667 		break;
2668 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2669 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2670 			.hop_limits = input->flow.udp6_flow.ip.hop_limits,
2671 			.proto = input->flow.udp6_flow.ip.proto,
2672 		};
2673 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2674 		       input->flow.udp6_flow.ip.src_ip,
2675 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2676 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2677 		       input->flow.udp6_flow.ip.dst_ip,
2678 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2679 		attributes->l4.udp.hdr = (struct udp_hdr){
2680 			.src_port = input->flow.udp6_flow.src_port,
2681 			.dst_port = input->flow.udp6_flow.dst_port,
2682 		};
2683 		attributes->items[1] = (struct rte_flow_item){
2684 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2685 			.spec = &attributes->l3,
2686 			.mask = &attributes->l3,
2687 		};
2688 		attributes->items[2] = (struct rte_flow_item){
2689 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2690 			.spec = &attributes->l4,
2691 			.mask = &attributes->l4,
2692 		};
2693 		break;
2694 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2695 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2696 			.hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2697 			.proto = input->flow.tcp6_flow.ip.proto,
2698 		};
2699 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2700 		       input->flow.tcp6_flow.ip.src_ip,
2701 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2702 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2703 		       input->flow.tcp6_flow.ip.dst_ip,
2704 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2705 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2706 			.src_port = input->flow.tcp6_flow.src_port,
2707 			.dst_port = input->flow.tcp6_flow.dst_port,
2708 		};
2709 		attributes->items[1] = (struct rte_flow_item){
2710 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2711 			.spec = &attributes->l3,
2712 			.mask = &attributes->l3,
2713 		};
2714 		attributes->items[2] = (struct rte_flow_item){
2715 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2716 			.spec = &attributes->l4,
2717 			.mask = &attributes->l4,
2718 		};
2719 		break;
2720 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2721 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2722 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2723 			.proto = input->flow.ipv6_flow.proto,
2724 		};
2725 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2726 		       input->flow.ipv6_flow.src_ip,
2727 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2728 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2729 		       input->flow.ipv6_flow.dst_ip,
2730 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2731 		attributes->items[1] = (struct rte_flow_item){
2732 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2733 			.spec = &attributes->l3,
2734 			.mask = &attributes->l3,
2735 		};
2736 		break;
2737 	default:
2738 		ERROR("invalid flow type%d",
2739 		      fdir_filter->input.flow_type);
2740 		return ENOTSUP;
2741 	}
2742 	return 0;
2743 }
2744 
2745 /**
2746  * Add new flow director filter and store it in list.
2747  *
2748  * @param priv
2749  *   Private structure.
2750  * @param fdir_filter
2751  *   Flow director filter to add.
2752  *
2753  * @return
2754  *   0 on success, errno value on failure.
2755  */
2756 static int
2757 priv_fdir_filter_add(struct priv *priv,
2758 		     const struct rte_eth_fdir_filter *fdir_filter)
2759 {
2760 	struct mlx5_fdir attributes = {
2761 		.attr.group = 0,
2762 		.l2_mask = {
2763 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2764 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2765 			.type = 0,
2766 		},
2767 	};
2768 	struct mlx5_flow_parse parser = {
2769 		.layer = HASH_RXQ_ETH,
2770 	};
2771 	struct rte_flow_error error;
2772 	struct rte_flow *flow;
2773 	int ret;
2774 
2775 	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2776 	if (ret)
2777 		return -ret;
2778 	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2779 				attributes.actions, &error, &parser);
2780 	if (ret)
2781 		return -ret;
2782 	flow = priv_flow_create(priv,
2783 				&priv->flows,
2784 				&attributes.attr,
2785 				attributes.items,
2786 				attributes.actions,
2787 				&error);
2788 	if (flow) {
2789 		DEBUG("FDIR created %p", (void *)flow);
2790 		return 0;
2791 	}
2792 	return ENOTSUP;
2793 }
2794 
2795 /**
2796  * Delete specific filter.
2797  *
2798  * @param priv
2799  *   Private structure.
2800  * @param fdir_filter
2801  *   Filter to be deleted.
2802  *
2803  * @return
2804  *   0 on success, errno value on failure.
2805  */
2806 static int
2807 priv_fdir_filter_delete(struct priv *priv,
2808 			const struct rte_eth_fdir_filter *fdir_filter)
2809 {
2810 	struct mlx5_fdir attributes = {
2811 		.attr.group = 0,
2812 	};
2813 	struct mlx5_flow_parse parser = {
2814 		.create = 1,
2815 		.layer = HASH_RXQ_ETH,
2816 	};
2817 	struct rte_flow_error error;
2818 	struct rte_flow *flow;
2819 	unsigned int i;
2820 	int ret;
2821 
2822 	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2823 	if (ret)
2824 		return -ret;
2825 	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2826 				attributes.actions, &error, &parser);
2827 	if (ret)
2828 		goto exit;
2829 	/*
2830 	 * Special case for drop action which is only set in the
2831 	 * specifications when the flow is created.  In this situation the
2832 	 * drop specification is missing.
2833 	 */
2834 	if (parser.drop) {
2835 		struct ibv_flow_spec_action_drop *drop;
2836 
2837 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2838 				parser.queue[HASH_RXQ_ETH].offset);
2839 		*drop = (struct ibv_flow_spec_action_drop){
2840 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2841 			.size = sizeof(struct ibv_flow_spec_action_drop),
2842 		};
2843 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2844 	}
2845 	TAILQ_FOREACH(flow, &priv->flows, next) {
2846 		struct ibv_flow_attr *attr;
2847 		struct ibv_spec_header *attr_h;
2848 		void *spec;
2849 		struct ibv_flow_attr *flow_attr;
2850 		struct ibv_spec_header *flow_h;
2851 		void *flow_spec;
2852 		unsigned int specs_n;
2853 
2854 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2855 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2856 		/* Compare first the attributes. */
2857 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2858 			continue;
2859 		if (attr->num_of_specs == 0)
2860 			continue;
2861 		spec = (void *)((uintptr_t)attr +
2862 				sizeof(struct ibv_flow_attr));
2863 		flow_spec = (void *)((uintptr_t)flow_attr +
2864 				     sizeof(struct ibv_flow_attr));
2865 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2866 		for (i = 0; i != specs_n; ++i) {
2867 			attr_h = spec;
2868 			flow_h = flow_spec;
2869 			if (memcmp(spec, flow_spec,
2870 				   RTE_MIN(attr_h->size, flow_h->size)))
2871 				goto wrong_flow;
2872 			spec = (void *)((uintptr_t)spec + attr_h->size);
2873 			flow_spec = (void *)((uintptr_t)flow_spec +
2874 					     flow_h->size);
2875 		}
2876 		/* At this point, the flow match. */
2877 		break;
2878 wrong_flow:
2879 		/* The flow does not match. */
2880 		continue;
2881 	}
2882 	if (flow)
2883 		priv_flow_destroy(priv, &priv->flows, flow);
2884 exit:
2885 	for (i = 0; i != hash_rxq_init_n; ++i) {
2886 		if (parser.queue[i].ibv_attr)
2887 			rte_free(parser.queue[i].ibv_attr);
2888 	}
2889 	return -ret;
2890 }
2891 
2892 /**
2893  * Update queue for specific filter.
2894  *
2895  * @param priv
2896  *   Private structure.
2897  * @param fdir_filter
2898  *   Filter to be updated.
2899  *
2900  * @return
2901  *   0 on success, errno value on failure.
2902  */
2903 static int
2904 priv_fdir_filter_update(struct priv *priv,
2905 			const struct rte_eth_fdir_filter *fdir_filter)
2906 {
2907 	int ret;
2908 
2909 	ret = priv_fdir_filter_delete(priv, fdir_filter);
2910 	if (ret)
2911 		return ret;
2912 	ret = priv_fdir_filter_add(priv, fdir_filter);
2913 	return ret;
2914 }
2915 
2916 /**
2917  * Flush all filters.
2918  *
2919  * @param priv
2920  *   Private structure.
2921  */
2922 static void
2923 priv_fdir_filter_flush(struct priv *priv)
2924 {
2925 	priv_flow_flush(priv, &priv->flows);
2926 }
2927 
2928 /**
2929  * Get flow director information.
2930  *
2931  * @param priv
2932  *   Private structure.
2933  * @param[out] fdir_info
2934  *   Resulting flow director information.
2935  */
2936 static void
2937 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2938 {
2939 	struct rte_eth_fdir_masks *mask =
2940 		&priv->dev->data->dev_conf.fdir_conf.mask;
2941 
2942 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2943 	fdir_info->guarant_spc = 0;
2944 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2945 	fdir_info->max_flexpayload = 0;
2946 	fdir_info->flow_types_mask[0] = 0;
2947 	fdir_info->flex_payload_unit = 0;
2948 	fdir_info->max_flex_payload_segment_num = 0;
2949 	fdir_info->flex_payload_limit = 0;
2950 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2951 }
2952 
2953 /**
2954  * Deal with flow director operations.
2955  *
2956  * @param priv
2957  *   Pointer to private structure.
2958  * @param filter_op
2959  *   Operation to perform.
2960  * @param arg
2961  *   Pointer to operation-specific structure.
2962  *
2963  * @return
2964  *   0 on success, errno value on failure.
2965  */
2966 static int
2967 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2968 {
2969 	enum rte_fdir_mode fdir_mode =
2970 		priv->dev->data->dev_conf.fdir_conf.mode;
2971 	int ret = 0;
2972 
2973 	if (filter_op == RTE_ETH_FILTER_NOP)
2974 		return 0;
2975 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2976 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2977 		ERROR("%p: flow director mode %d not supported",
2978 		      (void *)priv, fdir_mode);
2979 		return EINVAL;
2980 	}
2981 	switch (filter_op) {
2982 	case RTE_ETH_FILTER_ADD:
2983 		ret = priv_fdir_filter_add(priv, arg);
2984 		break;
2985 	case RTE_ETH_FILTER_UPDATE:
2986 		ret = priv_fdir_filter_update(priv, arg);
2987 		break;
2988 	case RTE_ETH_FILTER_DELETE:
2989 		ret = priv_fdir_filter_delete(priv, arg);
2990 		break;
2991 	case RTE_ETH_FILTER_FLUSH:
2992 		priv_fdir_filter_flush(priv);
2993 		break;
2994 	case RTE_ETH_FILTER_INFO:
2995 		priv_fdir_info_get(priv, arg);
2996 		break;
2997 	default:
2998 		DEBUG("%p: unknown operation %u", (void *)priv,
2999 		      filter_op);
3000 		ret = EINVAL;
3001 		break;
3002 	}
3003 	return ret;
3004 }
3005 
3006 /**
3007  * Manage filter operations.
3008  *
3009  * @param dev
3010  *   Pointer to Ethernet device structure.
3011  * @param filter_type
3012  *   Filter type.
3013  * @param filter_op
3014  *   Operation to perform.
3015  * @param arg
3016  *   Pointer to operation-specific structure.
3017  *
3018  * @return
3019  *   0 on success, negative errno value on failure.
3020  */
3021 int
3022 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3023 		     enum rte_filter_type filter_type,
3024 		     enum rte_filter_op filter_op,
3025 		     void *arg)
3026 {
3027 	int ret = EINVAL;
3028 	struct priv *priv = dev->data->dev_private;
3029 
3030 	switch (filter_type) {
3031 	case RTE_ETH_FILTER_GENERIC:
3032 		if (filter_op != RTE_ETH_FILTER_GET)
3033 			return -EINVAL;
3034 		*(const void **)arg = &mlx5_flow_ops;
3035 		return 0;
3036 	case RTE_ETH_FILTER_FDIR:
3037 		priv_lock(priv);
3038 		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3039 		priv_unlock(priv);
3040 		break;
3041 	default:
3042 		ERROR("%p: filter type (%d) not supported",
3043 		      (void *)dev, filter_type);
3044 		break;
3045 	}
3046 	return -ret;
3047 }
3048