xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision d9aa619c60b67131d0f8d0ad308576ca72ce7c90)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <string.h>
8 
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18 
19 #include <rte_common.h>
20 #include <rte_ethdev_driver.h>
21 #include <rte_flow.h>
22 #include <rte_flow_driver.h>
23 #include <rte_malloc.h>
24 #include <rte_ip.h>
25 
26 #include "mlx5.h"
27 #include "mlx5_defs.h"
28 #include "mlx5_prm.h"
29 #include "mlx5_glue.h"
30 
31 /* Define minimal priority for control plane flows. */
32 #define MLX5_CTRL_FLOW_PRIORITY 4
33 
34 /* Internet Protocol versions. */
35 #define MLX5_IPV4 4
36 #define MLX5_IPV6 6
37 
38 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
39 struct ibv_flow_spec_counter_action {
40 	int dummy;
41 };
42 #endif
43 
44 /* Dev ops structure defined in mlx5.c */
45 extern const struct eth_dev_ops mlx5_dev_ops;
46 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
47 
48 /** Structure give to the conversion functions. */
49 struct mlx5_flow_data {
50 	struct mlx5_flow_parse *parser; /** Parser context. */
51 	struct rte_flow_error *error; /** Error context. */
52 };
53 
54 static int
55 mlx5_flow_create_eth(const struct rte_flow_item *item,
56 		     const void *default_mask,
57 		     struct mlx5_flow_data *data);
58 
59 static int
60 mlx5_flow_create_vlan(const struct rte_flow_item *item,
61 		      const void *default_mask,
62 		      struct mlx5_flow_data *data);
63 
64 static int
65 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
66 		      const void *default_mask,
67 		      struct mlx5_flow_data *data);
68 
69 static int
70 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
71 		      const void *default_mask,
72 		      struct mlx5_flow_data *data);
73 
74 static int
75 mlx5_flow_create_udp(const struct rte_flow_item *item,
76 		     const void *default_mask,
77 		     struct mlx5_flow_data *data);
78 
79 static int
80 mlx5_flow_create_tcp(const struct rte_flow_item *item,
81 		     const void *default_mask,
82 		     struct mlx5_flow_data *data);
83 
84 static int
85 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
86 		       const void *default_mask,
87 		       struct mlx5_flow_data *data);
88 
89 struct mlx5_flow_parse;
90 
91 static void
92 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
93 		      unsigned int size);
94 
95 static int
96 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
97 
98 static int
99 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
100 
101 /* Hash RX queue types. */
102 enum hash_rxq_type {
103 	HASH_RXQ_TCPV4,
104 	HASH_RXQ_UDPV4,
105 	HASH_RXQ_IPV4,
106 	HASH_RXQ_TCPV6,
107 	HASH_RXQ_UDPV6,
108 	HASH_RXQ_IPV6,
109 	HASH_RXQ_ETH,
110 };
111 
112 /* Initialization data for hash RX queue. */
113 struct hash_rxq_init {
114 	uint64_t hash_fields; /* Fields that participate in the hash. */
115 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
116 	unsigned int flow_priority; /* Flow priority to use. */
117 	unsigned int ip_version; /* Internet protocol. */
118 };
119 
120 /* Initialization data for hash RX queues. */
121 const struct hash_rxq_init hash_rxq_init[] = {
122 	[HASH_RXQ_TCPV4] = {
123 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
124 				IBV_RX_HASH_DST_IPV4 |
125 				IBV_RX_HASH_SRC_PORT_TCP |
126 				IBV_RX_HASH_DST_PORT_TCP),
127 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
128 		.flow_priority = 1,
129 		.ip_version = MLX5_IPV4,
130 	},
131 	[HASH_RXQ_UDPV4] = {
132 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
133 				IBV_RX_HASH_DST_IPV4 |
134 				IBV_RX_HASH_SRC_PORT_UDP |
135 				IBV_RX_HASH_DST_PORT_UDP),
136 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
137 		.flow_priority = 1,
138 		.ip_version = MLX5_IPV4,
139 	},
140 	[HASH_RXQ_IPV4] = {
141 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
142 				IBV_RX_HASH_DST_IPV4),
143 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
144 				ETH_RSS_FRAG_IPV4),
145 		.flow_priority = 2,
146 		.ip_version = MLX5_IPV4,
147 	},
148 	[HASH_RXQ_TCPV6] = {
149 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
150 				IBV_RX_HASH_DST_IPV6 |
151 				IBV_RX_HASH_SRC_PORT_TCP |
152 				IBV_RX_HASH_DST_PORT_TCP),
153 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
154 		.flow_priority = 1,
155 		.ip_version = MLX5_IPV6,
156 	},
157 	[HASH_RXQ_UDPV6] = {
158 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
159 				IBV_RX_HASH_DST_IPV6 |
160 				IBV_RX_HASH_SRC_PORT_UDP |
161 				IBV_RX_HASH_DST_PORT_UDP),
162 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
163 		.flow_priority = 1,
164 		.ip_version = MLX5_IPV6,
165 	},
166 	[HASH_RXQ_IPV6] = {
167 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
168 				IBV_RX_HASH_DST_IPV6),
169 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
170 				ETH_RSS_FRAG_IPV6),
171 		.flow_priority = 2,
172 		.ip_version = MLX5_IPV6,
173 	},
174 	[HASH_RXQ_ETH] = {
175 		.hash_fields = 0,
176 		.dpdk_rss_hf = 0,
177 		.flow_priority = 3,
178 	},
179 };
180 
181 /* Number of entries in hash_rxq_init[]. */
182 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
183 
184 /** Structure for holding counter stats. */
185 struct mlx5_flow_counter_stats {
186 	uint64_t hits; /**< Number of packets matched by the rule. */
187 	uint64_t bytes; /**< Number of bytes matched by the rule. */
188 };
189 
190 /** Structure for Drop queue. */
191 struct mlx5_hrxq_drop {
192 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
193 	struct ibv_qp *qp; /**< Verbs queue pair. */
194 	struct ibv_wq *wq; /**< Verbs work queue. */
195 	struct ibv_cq *cq; /**< Verbs completion queue. */
196 };
197 
198 /* Flows structures. */
199 struct mlx5_flow {
200 	uint64_t hash_fields; /**< Fields that participate in the hash. */
201 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
204 };
205 
206 /* Drop flows structures. */
207 struct mlx5_flow_drop {
208 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
209 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
210 };
211 
212 struct rte_flow {
213 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
214 	uint32_t mark:1; /**< Set if the flow is marked. */
215 	uint32_t drop:1; /**< Drop queue. */
216 	uint16_t queues_n; /**< Number of entries in queue[]. */
217 	uint16_t (*queues)[]; /**< Queues indexes to use. */
218 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
219 	uint8_t rss_key[40]; /**< copy of the RSS key. */
220 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
221 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
222 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
223 	/**< Flow with Rx queue. */
224 };
225 
226 /** Static initializer for items. */
227 #define ITEMS(...) \
228 	(const enum rte_flow_item_type []){ \
229 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
230 	}
231 
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234 	/** List of possible actions for these items. */
235 	const enum rte_flow_action_type *const actions;
236 	/** Bit-masks corresponding to the possibilities for the item. */
237 	const void *mask;
238 	/**
239 	 * Default bit-masks to use when item->mask is not provided. When
240 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
241 	 * used instead.
242 	 */
243 	const void *default_mask;
244 	/** Bit-masks size in bytes. */
245 	const unsigned int mask_sz;
246 	/**
247 	 * Conversion function from rte_flow to NIC specific flow.
248 	 *
249 	 * @param item
250 	 *   rte_flow item to convert.
251 	 * @param default_mask
252 	 *   Default bit-masks to use when item->mask is not provided.
253 	 * @param data
254 	 *   Internal structure to store the conversion.
255 	 *
256 	 * @return
257 	 *   0 on success, a negative errno value otherwise and rte_errno is
258 	 *   set.
259 	 */
260 	int (*convert)(const struct rte_flow_item *item,
261 		       const void *default_mask,
262 		       struct mlx5_flow_data *data);
263 	/** Size in bytes of the destination structure. */
264 	const unsigned int dst_sz;
265 	/** List of possible following items.  */
266 	const enum rte_flow_item_type *const items;
267 };
268 
269 /** Valid action for this PMD. */
270 static const enum rte_flow_action_type valid_actions[] = {
271 	RTE_FLOW_ACTION_TYPE_DROP,
272 	RTE_FLOW_ACTION_TYPE_QUEUE,
273 	RTE_FLOW_ACTION_TYPE_MARK,
274 	RTE_FLOW_ACTION_TYPE_FLAG,
275 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
276 	RTE_FLOW_ACTION_TYPE_COUNT,
277 #endif
278 	RTE_FLOW_ACTION_TYPE_END,
279 };
280 
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283 	[RTE_FLOW_ITEM_TYPE_END] = {
284 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285 			       RTE_FLOW_ITEM_TYPE_VXLAN),
286 	},
287 	[RTE_FLOW_ITEM_TYPE_ETH] = {
288 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289 			       RTE_FLOW_ITEM_TYPE_IPV4,
290 			       RTE_FLOW_ITEM_TYPE_IPV6),
291 		.actions = valid_actions,
292 		.mask = &(const struct rte_flow_item_eth){
293 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 			.type = -1,
296 		},
297 		.default_mask = &rte_flow_item_eth_mask,
298 		.mask_sz = sizeof(struct rte_flow_item_eth),
299 		.convert = mlx5_flow_create_eth,
300 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
301 	},
302 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
303 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304 			       RTE_FLOW_ITEM_TYPE_IPV6),
305 		.actions = valid_actions,
306 		.mask = &(const struct rte_flow_item_vlan){
307 			.tci = -1,
308 		},
309 		.default_mask = &rte_flow_item_vlan_mask,
310 		.mask_sz = sizeof(struct rte_flow_item_vlan),
311 		.convert = mlx5_flow_create_vlan,
312 		.dst_sz = 0,
313 	},
314 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
315 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316 			       RTE_FLOW_ITEM_TYPE_TCP),
317 		.actions = valid_actions,
318 		.mask = &(const struct rte_flow_item_ipv4){
319 			.hdr = {
320 				.src_addr = -1,
321 				.dst_addr = -1,
322 				.type_of_service = -1,
323 				.next_proto_id = -1,
324 			},
325 		},
326 		.default_mask = &rte_flow_item_ipv4_mask,
327 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
328 		.convert = mlx5_flow_create_ipv4,
329 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
330 	},
331 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
332 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333 			       RTE_FLOW_ITEM_TYPE_TCP),
334 		.actions = valid_actions,
335 		.mask = &(const struct rte_flow_item_ipv6){
336 			.hdr = {
337 				.src_addr = {
338 					0xff, 0xff, 0xff, 0xff,
339 					0xff, 0xff, 0xff, 0xff,
340 					0xff, 0xff, 0xff, 0xff,
341 					0xff, 0xff, 0xff, 0xff,
342 				},
343 				.dst_addr = {
344 					0xff, 0xff, 0xff, 0xff,
345 					0xff, 0xff, 0xff, 0xff,
346 					0xff, 0xff, 0xff, 0xff,
347 					0xff, 0xff, 0xff, 0xff,
348 				},
349 				.vtc_flow = -1,
350 				.proto = -1,
351 				.hop_limits = -1,
352 			},
353 		},
354 		.default_mask = &rte_flow_item_ipv6_mask,
355 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
356 		.convert = mlx5_flow_create_ipv6,
357 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
358 	},
359 	[RTE_FLOW_ITEM_TYPE_UDP] = {
360 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361 		.actions = valid_actions,
362 		.mask = &(const struct rte_flow_item_udp){
363 			.hdr = {
364 				.src_port = -1,
365 				.dst_port = -1,
366 			},
367 		},
368 		.default_mask = &rte_flow_item_udp_mask,
369 		.mask_sz = sizeof(struct rte_flow_item_udp),
370 		.convert = mlx5_flow_create_udp,
371 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
372 	},
373 	[RTE_FLOW_ITEM_TYPE_TCP] = {
374 		.actions = valid_actions,
375 		.mask = &(const struct rte_flow_item_tcp){
376 			.hdr = {
377 				.src_port = -1,
378 				.dst_port = -1,
379 			},
380 		},
381 		.default_mask = &rte_flow_item_tcp_mask,
382 		.mask_sz = sizeof(struct rte_flow_item_tcp),
383 		.convert = mlx5_flow_create_tcp,
384 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
385 	},
386 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
387 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388 		.actions = valid_actions,
389 		.mask = &(const struct rte_flow_item_vxlan){
390 			.vni = "\xff\xff\xff",
391 		},
392 		.default_mask = &rte_flow_item_vxlan_mask,
393 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
394 		.convert = mlx5_flow_create_vxlan,
395 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
396 	},
397 };
398 
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401 	uint32_t inner; /**< Set once VXLAN is encountered. */
402 	uint32_t create:1;
403 	/**< Whether resources should remain after a validate. */
404 	uint32_t drop:1; /**< Target is a drop queue. */
405 	uint32_t mark:1; /**< Mark is present in the flow. */
406 	uint32_t count:1; /**< Count is present in the flow. */
407 	uint32_t mark_id; /**< Mark identifier. */
408 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
409 	uint16_t queues_n; /**< Number of entries in queue[]. */
410 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
411 	uint8_t rss_key[40]; /**< copy of the RSS key. */
412 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
413 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
414 	struct {
415 		struct ibv_flow_attr *ibv_attr;
416 		/**< Pointer to Verbs attributes. */
417 		unsigned int offset;
418 		/**< Current position or total size of the attribute. */
419 	} queue[RTE_DIM(hash_rxq_init)];
420 };
421 
422 static const struct rte_flow_ops mlx5_flow_ops = {
423 	.validate = mlx5_flow_validate,
424 	.create = mlx5_flow_create,
425 	.destroy = mlx5_flow_destroy,
426 	.flush = mlx5_flow_flush,
427 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
428 	.query = mlx5_flow_query,
429 #else
430 	.query = NULL,
431 #endif
432 	.isolate = mlx5_flow_isolate,
433 };
434 
435 /* Convert FDIR request to Generic flow. */
436 struct mlx5_fdir {
437 	struct rte_flow_attr attr;
438 	struct rte_flow_action actions[2];
439 	struct rte_flow_item items[4];
440 	struct rte_flow_item_eth l2;
441 	struct rte_flow_item_eth l2_mask;
442 	union {
443 		struct rte_flow_item_ipv4 ipv4;
444 		struct rte_flow_item_ipv6 ipv6;
445 	} l3;
446 	union {
447 		struct rte_flow_item_ipv4 ipv4;
448 		struct rte_flow_item_ipv6 ipv6;
449 	} l3_mask;
450 	union {
451 		struct rte_flow_item_udp udp;
452 		struct rte_flow_item_tcp tcp;
453 	} l4;
454 	union {
455 		struct rte_flow_item_udp udp;
456 		struct rte_flow_item_tcp tcp;
457 	} l4_mask;
458 	struct rte_flow_action_queue queue;
459 };
460 
461 /* Verbs specification header. */
462 struct ibv_spec_header {
463 	enum ibv_flow_spec_type type;
464 	uint16_t size;
465 };
466 
467 /**
468  * Check support for a given item.
469  *
470  * @param item[in]
471  *   Item specification.
472  * @param mask[in]
473  *   Bit-masks covering supported fields to compare with spec, last and mask in
474  *   \item.
475  * @param size
476  *   Bit-Mask size in bytes.
477  *
478  * @return
479  *   0 on success, a negative errno value otherwise and rte_errno is set.
480  */
481 static int
482 mlx5_flow_item_validate(const struct rte_flow_item *item,
483 			const uint8_t *mask, unsigned int size)
484 {
485 	if (!item->spec && (item->mask || item->last)) {
486 		rte_errno = EINVAL;
487 		return -rte_errno;
488 	}
489 	if (item->spec && !item->mask) {
490 		unsigned int i;
491 		const uint8_t *spec = item->spec;
492 
493 		for (i = 0; i < size; ++i)
494 			if ((spec[i] | mask[i]) != mask[i]) {
495 				rte_errno = EINVAL;
496 				return -rte_errno;
497 			}
498 	}
499 	if (item->last && !item->mask) {
500 		unsigned int i;
501 		const uint8_t *spec = item->last;
502 
503 		for (i = 0; i < size; ++i)
504 			if ((spec[i] | mask[i]) != mask[i]) {
505 				rte_errno = EINVAL;
506 				return -rte_errno;
507 			}
508 	}
509 	if (item->mask) {
510 		unsigned int i;
511 		const uint8_t *spec = item->spec;
512 
513 		for (i = 0; i < size; ++i)
514 			if ((spec[i] | mask[i]) != mask[i]) {
515 				rte_errno = EINVAL;
516 				return -rte_errno;
517 			}
518 	}
519 	if (item->spec && item->last) {
520 		uint8_t spec[size];
521 		uint8_t last[size];
522 		const uint8_t *apply = mask;
523 		unsigned int i;
524 		int ret;
525 
526 		if (item->mask)
527 			apply = item->mask;
528 		for (i = 0; i < size; ++i) {
529 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
530 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
531 		}
532 		ret = memcmp(spec, last, size);
533 		if (ret != 0) {
534 			rte_errno = EINVAL;
535 			return -rte_errno;
536 		}
537 	}
538 	return 0;
539 }
540 
541 /**
542  * Copy the RSS configuration from the user ones, of the rss_conf is null,
543  * uses the driver one.
544  *
545  * @param parser
546  *   Internal parser structure.
547  * @param rss_conf
548  *   User RSS configuration to save.
549  *
550  * @return
551  *   0 on success, a negative errno value otherwise and rte_errno is set.
552  */
553 static int
554 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
555 			   const struct rte_eth_rss_conf *rss_conf)
556 {
557 	/*
558 	 * This function is also called at the beginning of
559 	 * mlx5_flow_convert_actions() to initialize the parser with the
560 	 * device default RSS configuration.
561 	 */
562 	if (rss_conf) {
563 		if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
564 			rte_errno = EINVAL;
565 			return -rte_errno;
566 		}
567 		if (rss_conf->rss_key_len != 40) {
568 			rte_errno = EINVAL;
569 			return -rte_errno;
570 		}
571 		if (rss_conf->rss_key_len && rss_conf->rss_key) {
572 			parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
573 			memcpy(parser->rss_key, rss_conf->rss_key,
574 			       rss_conf->rss_key_len);
575 			parser->rss_conf.rss_key = parser->rss_key;
576 		}
577 		parser->rss_conf.rss_hf = rss_conf->rss_hf;
578 	}
579 	return 0;
580 }
581 
582 /**
583  * Extract attribute to the parser.
584  *
585  * @param[in] attr
586  *   Flow rule attributes.
587  * @param[out] error
588  *   Perform verbose error reporting if not NULL.
589  *
590  * @return
591  *   0 on success, a negative errno value otherwise and rte_errno is set.
592  */
593 static int
594 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
595 			     struct rte_flow_error *error)
596 {
597 	if (attr->group) {
598 		rte_flow_error_set(error, ENOTSUP,
599 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
600 				   NULL,
601 				   "groups are not supported");
602 		return -rte_errno;
603 	}
604 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
605 		rte_flow_error_set(error, ENOTSUP,
606 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
607 				   NULL,
608 				   "priorities are not supported");
609 		return -rte_errno;
610 	}
611 	if (attr->egress) {
612 		rte_flow_error_set(error, ENOTSUP,
613 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
614 				   NULL,
615 				   "egress is not supported");
616 		return -rte_errno;
617 	}
618 	if (!attr->ingress) {
619 		rte_flow_error_set(error, ENOTSUP,
620 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
621 				   NULL,
622 				   "only ingress is supported");
623 		return -rte_errno;
624 	}
625 	return 0;
626 }
627 
628 /**
629  * Extract actions request to the parser.
630  *
631  * @param dev
632  *   Pointer to Ethernet device.
633  * @param[in] actions
634  *   Associated actions (list terminated by the END action).
635  * @param[out] error
636  *   Perform verbose error reporting if not NULL.
637  * @param[in, out] parser
638  *   Internal parser structure.
639  *
640  * @return
641  *   0 on success, a negative errno value otherwise and rte_errno is set.
642  */
643 static int
644 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
645 			  const struct rte_flow_action actions[],
646 			  struct rte_flow_error *error,
647 			  struct mlx5_flow_parse *parser)
648 {
649 	struct priv *priv = dev->data->dev_private;
650 	int ret;
651 
652 	/*
653 	 * Add default RSS configuration necessary for Verbs to create QP even
654 	 * if no RSS is necessary.
655 	 */
656 	ret = mlx5_flow_convert_rss_conf(parser,
657 					 (const struct rte_eth_rss_conf *)
658 					 &priv->rss_conf);
659 	if (ret)
660 		return ret;
661 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
662 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
663 			continue;
664 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
665 			parser->drop = 1;
666 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
667 			const struct rte_flow_action_queue *queue =
668 				(const struct rte_flow_action_queue *)
669 				actions->conf;
670 			uint16_t n;
671 			uint16_t found = 0;
672 
673 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
674 				goto exit_action_not_supported;
675 			for (n = 0; n < parser->queues_n; ++n) {
676 				if (parser->queues[n] == queue->index) {
677 					found = 1;
678 					break;
679 				}
680 			}
681 			if (parser->queues_n > 1 && !found) {
682 				rte_flow_error_set(error, ENOTSUP,
683 					   RTE_FLOW_ERROR_TYPE_ACTION,
684 					   actions,
685 					   "queue action not in RSS queues");
686 				return -rte_errno;
687 			}
688 			if (!found) {
689 				parser->queues_n = 1;
690 				parser->queues[0] = queue->index;
691 			}
692 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
693 			const struct rte_flow_action_rss *rss =
694 				(const struct rte_flow_action_rss *)
695 				actions->conf;
696 			uint16_t n;
697 
698 			if (!rss || !rss->num) {
699 				rte_flow_error_set(error, EINVAL,
700 						   RTE_FLOW_ERROR_TYPE_ACTION,
701 						   actions,
702 						   "no valid queues");
703 				return -rte_errno;
704 			}
705 			if (parser->queues_n == 1) {
706 				uint16_t found = 0;
707 
708 				assert(parser->queues_n);
709 				for (n = 0; n < rss->num; ++n) {
710 					if (parser->queues[0] ==
711 					    rss->queue[n]) {
712 						found = 1;
713 						break;
714 					}
715 				}
716 				if (!found) {
717 					rte_flow_error_set(error, ENOTSUP,
718 						   RTE_FLOW_ERROR_TYPE_ACTION,
719 						   actions,
720 						   "queue action not in RSS"
721 						   " queues");
722 					return -rte_errno;
723 				}
724 			}
725 			if (rss->num > RTE_DIM(parser->queues)) {
726 				rte_flow_error_set(error, EINVAL,
727 						   RTE_FLOW_ERROR_TYPE_ACTION,
728 						   actions,
729 						   "too many queues for RSS"
730 						   " context");
731 				return -rte_errno;
732 			}
733 			for (n = 0; n < rss->num; ++n) {
734 				if (rss->queue[n] >= priv->rxqs_n) {
735 					rte_flow_error_set(error, EINVAL,
736 						   RTE_FLOW_ERROR_TYPE_ACTION,
737 						   actions,
738 						   "queue id > number of"
739 						   " queues");
740 					return -rte_errno;
741 				}
742 			}
743 			for (n = 0; n < rss->num; ++n)
744 				parser->queues[n] = rss->queue[n];
745 			parser->queues_n = rss->num;
746 			if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
747 				rte_flow_error_set(error, EINVAL,
748 						   RTE_FLOW_ERROR_TYPE_ACTION,
749 						   actions,
750 						   "wrong RSS configuration");
751 				return -rte_errno;
752 			}
753 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
754 			const struct rte_flow_action_mark *mark =
755 				(const struct rte_flow_action_mark *)
756 				actions->conf;
757 
758 			if (!mark) {
759 				rte_flow_error_set(error, EINVAL,
760 						   RTE_FLOW_ERROR_TYPE_ACTION,
761 						   actions,
762 						   "mark must be defined");
763 				return -rte_errno;
764 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
765 				rte_flow_error_set(error, ENOTSUP,
766 						   RTE_FLOW_ERROR_TYPE_ACTION,
767 						   actions,
768 						   "mark must be between 0"
769 						   " and 16777199");
770 				return -rte_errno;
771 			}
772 			parser->mark = 1;
773 			parser->mark_id = mark->id;
774 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
775 			parser->mark = 1;
776 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
777 			   priv->config.flow_counter_en) {
778 			parser->count = 1;
779 		} else {
780 			goto exit_action_not_supported;
781 		}
782 	}
783 	if (parser->drop && parser->mark)
784 		parser->mark = 0;
785 	if (!parser->queues_n && !parser->drop) {
786 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
787 				   NULL, "no valid action");
788 		return -rte_errno;
789 	}
790 	return 0;
791 exit_action_not_supported:
792 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
793 			   actions, "action not supported");
794 	return -rte_errno;
795 }
796 
797 /**
798  * Validate items.
799  *
800  * @param[in] items
801  *   Pattern specification (list terminated by the END pattern item).
802  * @param[out] error
803  *   Perform verbose error reporting if not NULL.
804  * @param[in, out] parser
805  *   Internal parser structure.
806  *
807  * @return
808  *   0 on success, a negative errno value otherwise and rte_errno is set.
809  */
810 static int
811 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
812 				 struct rte_flow_error *error,
813 				 struct mlx5_flow_parse *parser)
814 {
815 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
816 	unsigned int i;
817 	int ret = 0;
818 
819 	/* Initialise the offsets to start after verbs attribute. */
820 	for (i = 0; i != hash_rxq_init_n; ++i)
821 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
822 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
823 		const struct mlx5_flow_items *token = NULL;
824 		unsigned int n;
825 
826 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
827 			continue;
828 		for (i = 0;
829 		     cur_item->items &&
830 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
831 		     ++i) {
832 			if (cur_item->items[i] == items->type) {
833 				token = &mlx5_flow_items[items->type];
834 				break;
835 			}
836 		}
837 		if (!token) {
838 			ret = -ENOTSUP;
839 			goto exit_item_not_supported;
840 		}
841 		cur_item = token;
842 		ret = mlx5_flow_item_validate(items,
843 					      (const uint8_t *)cur_item->mask,
844 					      cur_item->mask_sz);
845 		if (ret)
846 			goto exit_item_not_supported;
847 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
848 			if (parser->inner) {
849 				rte_flow_error_set(error, ENOTSUP,
850 						   RTE_FLOW_ERROR_TYPE_ITEM,
851 						   items,
852 						   "cannot recognize multiple"
853 						   " VXLAN encapsulations");
854 				return -rte_errno;
855 			}
856 			parser->inner = IBV_FLOW_SPEC_INNER;
857 		}
858 		if (parser->drop) {
859 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
860 		} else {
861 			for (n = 0; n != hash_rxq_init_n; ++n)
862 				parser->queue[n].offset += cur_item->dst_sz;
863 		}
864 	}
865 	if (parser->drop) {
866 		parser->queue[HASH_RXQ_ETH].offset +=
867 			sizeof(struct ibv_flow_spec_action_drop);
868 	}
869 	if (parser->mark) {
870 		for (i = 0; i != hash_rxq_init_n; ++i)
871 			parser->queue[i].offset +=
872 				sizeof(struct ibv_flow_spec_action_tag);
873 	}
874 	if (parser->count) {
875 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
876 
877 		for (i = 0; i != hash_rxq_init_n; ++i)
878 			parser->queue[i].offset += size;
879 	}
880 	return 0;
881 exit_item_not_supported:
882 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
883 				  items, "item not supported");
884 }
885 
886 /**
887  * Allocate memory space to store verbs flow attributes.
888  *
889  * @param[in] size
890  *   Amount of byte to allocate.
891  * @param[out] error
892  *   Perform verbose error reporting if not NULL.
893  *
894  * @return
895  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
896  */
897 static struct ibv_flow_attr *
898 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
899 {
900 	struct ibv_flow_attr *ibv_attr;
901 
902 	ibv_attr = rte_calloc(__func__, 1, size, 0);
903 	if (!ibv_attr) {
904 		rte_flow_error_set(error, ENOMEM,
905 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
906 				   NULL,
907 				   "cannot allocate verbs spec attributes");
908 		return NULL;
909 	}
910 	return ibv_attr;
911 }
912 
913 /**
914  * Make inner packet matching with an higher priority from the non Inner
915  * matching.
916  *
917  * @param[in, out] parser
918  *   Internal parser structure.
919  * @param attr
920  *   User flow attribute.
921  */
922 static void
923 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
924 			  const struct rte_flow_attr *attr)
925 {
926 	unsigned int i;
927 
928 	if (parser->drop) {
929 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
930 			attr->priority +
931 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
932 		return;
933 	}
934 	for (i = 0; i != hash_rxq_init_n; ++i) {
935 		if (parser->queue[i].ibv_attr) {
936 			parser->queue[i].ibv_attr->priority =
937 				attr->priority +
938 				hash_rxq_init[i].flow_priority -
939 				(parser->inner ? 1 : 0);
940 		}
941 	}
942 }
943 
944 /**
945  * Finalise verbs flow attributes.
946  *
947  * @param[in, out] parser
948  *   Internal parser structure.
949  */
950 static void
951 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
952 {
953 	const unsigned int ipv4 =
954 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
955 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
956 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
957 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
958 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
959 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
960 	unsigned int i;
961 
962 	/* Remove any other flow not matching the pattern. */
963 	if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
964 		for (i = 0; i != hash_rxq_init_n; ++i) {
965 			if (i == HASH_RXQ_ETH)
966 				continue;
967 			rte_free(parser->queue[i].ibv_attr);
968 			parser->queue[i].ibv_attr = NULL;
969 		}
970 		return;
971 	}
972 	if (parser->layer == HASH_RXQ_ETH) {
973 		goto fill;
974 	} else {
975 		/*
976 		 * This layer becomes useless as the pattern define under
977 		 * layers.
978 		 */
979 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
980 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
981 	}
982 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
983 	for (i = ohmin; i != (ohmax + 1); ++i) {
984 		if (!parser->queue[i].ibv_attr)
985 			continue;
986 		rte_free(parser->queue[i].ibv_attr);
987 		parser->queue[i].ibv_attr = NULL;
988 	}
989 	/* Remove impossible flow according to the RSS configuration. */
990 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
991 	    parser->rss_conf.rss_hf) {
992 		/* Remove any other flow. */
993 		for (i = hmin; i != (hmax + 1); ++i) {
994 			if ((i == parser->layer) ||
995 			     (!parser->queue[i].ibv_attr))
996 				continue;
997 			rte_free(parser->queue[i].ibv_attr);
998 			parser->queue[i].ibv_attr = NULL;
999 		}
1000 	} else  if (!parser->queue[ip].ibv_attr) {
1001 		/* no RSS possible with the current configuration. */
1002 		parser->queues_n = 1;
1003 		return;
1004 	}
1005 fill:
1006 	/*
1007 	 * Fill missing layers in verbs specifications, or compute the correct
1008 	 * offset to allocate the memory space for the attributes and
1009 	 * specifications.
1010 	 */
1011 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1012 		union {
1013 			struct ibv_flow_spec_ipv4_ext ipv4;
1014 			struct ibv_flow_spec_ipv6 ipv6;
1015 			struct ibv_flow_spec_tcp_udp udp_tcp;
1016 		} specs;
1017 		void *dst;
1018 		uint16_t size;
1019 
1020 		if (i == parser->layer)
1021 			continue;
1022 		if (parser->layer == HASH_RXQ_ETH) {
1023 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1024 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1025 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1026 					.type = IBV_FLOW_SPEC_IPV4_EXT,
1027 					.size = size,
1028 				};
1029 			} else {
1030 				size = sizeof(struct ibv_flow_spec_ipv6);
1031 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1032 					.type = IBV_FLOW_SPEC_IPV6,
1033 					.size = size,
1034 				};
1035 			}
1036 			if (parser->queue[i].ibv_attr) {
1037 				dst = (void *)((uintptr_t)
1038 					       parser->queue[i].ibv_attr +
1039 					       parser->queue[i].offset);
1040 				memcpy(dst, &specs, size);
1041 				++parser->queue[i].ibv_attr->num_of_specs;
1042 			}
1043 			parser->queue[i].offset += size;
1044 		}
1045 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1046 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1047 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1048 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1049 				.type = ((i == HASH_RXQ_UDPV4 ||
1050 					  i == HASH_RXQ_UDPV6) ?
1051 					 IBV_FLOW_SPEC_UDP :
1052 					 IBV_FLOW_SPEC_TCP),
1053 				.size = size,
1054 			};
1055 			if (parser->queue[i].ibv_attr) {
1056 				dst = (void *)((uintptr_t)
1057 					       parser->queue[i].ibv_attr +
1058 					       parser->queue[i].offset);
1059 				memcpy(dst, &specs, size);
1060 				++parser->queue[i].ibv_attr->num_of_specs;
1061 			}
1062 			parser->queue[i].offset += size;
1063 		}
1064 	}
1065 }
1066 
1067 /**
1068  * Validate and convert a flow supported by the NIC.
1069  *
1070  * @param dev
1071  *   Pointer to Ethernet device.
1072  * @param[in] attr
1073  *   Flow rule attributes.
1074  * @param[in] pattern
1075  *   Pattern specification (list terminated by the END pattern item).
1076  * @param[in] actions
1077  *   Associated actions (list terminated by the END action).
1078  * @param[out] error
1079  *   Perform verbose error reporting if not NULL.
1080  * @param[in, out] parser
1081  *   Internal parser structure.
1082  *
1083  * @return
1084  *   0 on success, a negative errno value otherwise and rte_errno is set.
1085  */
1086 static int
1087 mlx5_flow_convert(struct rte_eth_dev *dev,
1088 		  const struct rte_flow_attr *attr,
1089 		  const struct rte_flow_item items[],
1090 		  const struct rte_flow_action actions[],
1091 		  struct rte_flow_error *error,
1092 		  struct mlx5_flow_parse *parser)
1093 {
1094 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1095 	unsigned int i;
1096 	int ret;
1097 
1098 	/* First step. Validate the attributes, items and actions. */
1099 	*parser = (struct mlx5_flow_parse){
1100 		.create = parser->create,
1101 		.layer = HASH_RXQ_ETH,
1102 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1103 	};
1104 	ret = mlx5_flow_convert_attributes(attr, error);
1105 	if (ret)
1106 		return ret;
1107 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1108 	if (ret)
1109 		return ret;
1110 	ret = mlx5_flow_convert_items_validate(items, error, parser);
1111 	if (ret)
1112 		return ret;
1113 	mlx5_flow_convert_finalise(parser);
1114 	/*
1115 	 * Second step.
1116 	 * Allocate the memory space to store verbs specifications.
1117 	 */
1118 	if (parser->drop) {
1119 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1120 
1121 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1122 			mlx5_flow_convert_allocate(offset, error);
1123 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1124 			goto exit_enomem;
1125 		parser->queue[HASH_RXQ_ETH].offset =
1126 			sizeof(struct ibv_flow_attr);
1127 	} else {
1128 		for (i = 0; i != hash_rxq_init_n; ++i) {
1129 			unsigned int offset;
1130 
1131 			if (!(parser->rss_conf.rss_hf &
1132 			      hash_rxq_init[i].dpdk_rss_hf) &&
1133 			    (i != HASH_RXQ_ETH))
1134 				continue;
1135 			offset = parser->queue[i].offset;
1136 			parser->queue[i].ibv_attr =
1137 				mlx5_flow_convert_allocate(offset, error);
1138 			if (!parser->queue[i].ibv_attr)
1139 				goto exit_enomem;
1140 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1141 		}
1142 	}
1143 	/* Third step. Conversion parse, fill the specifications. */
1144 	parser->inner = 0;
1145 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1146 		struct mlx5_flow_data data = {
1147 			.parser = parser,
1148 			.error = error,
1149 		};
1150 
1151 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1152 			continue;
1153 		cur_item = &mlx5_flow_items[items->type];
1154 		ret = cur_item->convert(items,
1155 					(cur_item->default_mask ?
1156 					 cur_item->default_mask :
1157 					 cur_item->mask),
1158 					 &data);
1159 		if (ret)
1160 			goto exit_free;
1161 	}
1162 	if (parser->mark)
1163 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1164 	if (parser->count && parser->create) {
1165 		mlx5_flow_create_count(dev, parser);
1166 		if (!parser->cs)
1167 			goto exit_count_error;
1168 	}
1169 	/*
1170 	 * Last step. Complete missing specification to reach the RSS
1171 	 * configuration.
1172 	 */
1173 	if (!parser->drop)
1174 		mlx5_flow_convert_finalise(parser);
1175 	mlx5_flow_update_priority(parser, attr);
1176 exit_free:
1177 	/* Only verification is expected, all resources should be released. */
1178 	if (!parser->create) {
1179 		for (i = 0; i != hash_rxq_init_n; ++i) {
1180 			if (parser->queue[i].ibv_attr) {
1181 				rte_free(parser->queue[i].ibv_attr);
1182 				parser->queue[i].ibv_attr = NULL;
1183 			}
1184 		}
1185 	}
1186 	return ret;
1187 exit_enomem:
1188 	for (i = 0; i != hash_rxq_init_n; ++i) {
1189 		if (parser->queue[i].ibv_attr) {
1190 			rte_free(parser->queue[i].ibv_attr);
1191 			parser->queue[i].ibv_attr = NULL;
1192 		}
1193 	}
1194 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1195 			   NULL, "cannot allocate verbs spec attributes");
1196 	return -rte_errno;
1197 exit_count_error:
1198 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1199 			   NULL, "cannot create counter");
1200 	return -rte_errno;
1201 }
1202 
1203 /**
1204  * Copy the specification created into the flow.
1205  *
1206  * @param parser
1207  *   Internal parser structure.
1208  * @param src
1209  *   Create specification.
1210  * @param size
1211  *   Size in bytes of the specification to copy.
1212  */
1213 static void
1214 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1215 		      unsigned int size)
1216 {
1217 	unsigned int i;
1218 	void *dst;
1219 
1220 	for (i = 0; i != hash_rxq_init_n; ++i) {
1221 		if (!parser->queue[i].ibv_attr)
1222 			continue;
1223 		/* Specification must be the same l3 type or none. */
1224 		if (parser->layer == HASH_RXQ_ETH ||
1225 		    (hash_rxq_init[parser->layer].ip_version ==
1226 		     hash_rxq_init[i].ip_version) ||
1227 		    (hash_rxq_init[i].ip_version == 0)) {
1228 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1229 					parser->queue[i].offset);
1230 			memcpy(dst, src, size);
1231 			++parser->queue[i].ibv_attr->num_of_specs;
1232 			parser->queue[i].offset += size;
1233 		}
1234 	}
1235 }
1236 
1237 /**
1238  * Convert Ethernet item to Verbs specification.
1239  *
1240  * @param item[in]
1241  *   Item specification.
1242  * @param default_mask[in]
1243  *   Default bit-masks to use when item->mask is not provided.
1244  * @param data[in, out]
1245  *   User structure.
1246  *
1247  * @return
1248  *   0 on success, a negative errno value otherwise and rte_errno is set.
1249  */
1250 static int
1251 mlx5_flow_create_eth(const struct rte_flow_item *item,
1252 		     const void *default_mask,
1253 		     struct mlx5_flow_data *data)
1254 {
1255 	const struct rte_flow_item_eth *spec = item->spec;
1256 	const struct rte_flow_item_eth *mask = item->mask;
1257 	struct mlx5_flow_parse *parser = data->parser;
1258 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1259 	struct ibv_flow_spec_eth eth = {
1260 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1261 		.size = eth_size,
1262 	};
1263 
1264 	/* Don't update layer for the inner pattern. */
1265 	if (!parser->inner)
1266 		parser->layer = HASH_RXQ_ETH;
1267 	if (spec) {
1268 		unsigned int i;
1269 
1270 		if (!mask)
1271 			mask = default_mask;
1272 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1273 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1274 		eth.val.ether_type = spec->type;
1275 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1276 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1277 		eth.mask.ether_type = mask->type;
1278 		/* Remove unwanted bits from values. */
1279 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1280 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1281 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1282 		}
1283 		eth.val.ether_type &= eth.mask.ether_type;
1284 	}
1285 	mlx5_flow_create_copy(parser, &eth, eth_size);
1286 	return 0;
1287 }
1288 
1289 /**
1290  * Convert VLAN item to Verbs specification.
1291  *
1292  * @param item[in]
1293  *   Item specification.
1294  * @param default_mask[in]
1295  *   Default bit-masks to use when item->mask is not provided.
1296  * @param data[in, out]
1297  *   User structure.
1298  *
1299  * @return
1300  *   0 on success, a negative errno value otherwise and rte_errno is set.
1301  */
1302 static int
1303 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1304 		      const void *default_mask,
1305 		      struct mlx5_flow_data *data)
1306 {
1307 	const struct rte_flow_item_vlan *spec = item->spec;
1308 	const struct rte_flow_item_vlan *mask = item->mask;
1309 	struct mlx5_flow_parse *parser = data->parser;
1310 	struct ibv_flow_spec_eth *eth;
1311 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1312 
1313 	if (spec) {
1314 		unsigned int i;
1315 		if (!mask)
1316 			mask = default_mask;
1317 
1318 		for (i = 0; i != hash_rxq_init_n; ++i) {
1319 			if (!parser->queue[i].ibv_attr)
1320 				continue;
1321 
1322 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1323 				       parser->queue[i].offset - eth_size);
1324 			eth->val.vlan_tag = spec->tci;
1325 			eth->mask.vlan_tag = mask->tci;
1326 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1327 			/*
1328 			 * From verbs perspective an empty VLAN is equivalent
1329 			 * to a packet without VLAN layer.
1330 			 */
1331 			if (!eth->mask.vlan_tag)
1332 				goto error;
1333 		}
1334 		return 0;
1335 	}
1336 error:
1337 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1338 				  item, "VLAN cannot be empty");
1339 }
1340 
1341 /**
1342  * Convert IPv4 item to Verbs specification.
1343  *
1344  * @param item[in]
1345  *   Item specification.
1346  * @param default_mask[in]
1347  *   Default bit-masks to use when item->mask is not provided.
1348  * @param data[in, out]
1349  *   User structure.
1350  *
1351  * @return
1352  *   0 on success, a negative errno value otherwise and rte_errno is set.
1353  */
1354 static int
1355 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1356 		      const void *default_mask,
1357 		      struct mlx5_flow_data *data)
1358 {
1359 	const struct rte_flow_item_ipv4 *spec = item->spec;
1360 	const struct rte_flow_item_ipv4 *mask = item->mask;
1361 	struct mlx5_flow_parse *parser = data->parser;
1362 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1363 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1364 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1365 		.size = ipv4_size,
1366 	};
1367 
1368 	/* Don't update layer for the inner pattern. */
1369 	if (!parser->inner)
1370 		parser->layer = HASH_RXQ_IPV4;
1371 	if (spec) {
1372 		if (!mask)
1373 			mask = default_mask;
1374 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1375 			.src_ip = spec->hdr.src_addr,
1376 			.dst_ip = spec->hdr.dst_addr,
1377 			.proto = spec->hdr.next_proto_id,
1378 			.tos = spec->hdr.type_of_service,
1379 		};
1380 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1381 			.src_ip = mask->hdr.src_addr,
1382 			.dst_ip = mask->hdr.dst_addr,
1383 			.proto = mask->hdr.next_proto_id,
1384 			.tos = mask->hdr.type_of_service,
1385 		};
1386 		/* Remove unwanted bits from values. */
1387 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1388 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1389 		ipv4.val.proto &= ipv4.mask.proto;
1390 		ipv4.val.tos &= ipv4.mask.tos;
1391 	}
1392 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1393 	return 0;
1394 }
1395 
1396 /**
1397  * Convert IPv6 item to Verbs specification.
1398  *
1399  * @param item[in]
1400  *   Item specification.
1401  * @param default_mask[in]
1402  *   Default bit-masks to use when item->mask is not provided.
1403  * @param data[in, out]
1404  *   User structure.
1405  *
1406  * @return
1407  *   0 on success, a negative errno value otherwise and rte_errno is set.
1408  */
1409 static int
1410 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1411 		      const void *default_mask,
1412 		      struct mlx5_flow_data *data)
1413 {
1414 	const struct rte_flow_item_ipv6 *spec = item->spec;
1415 	const struct rte_flow_item_ipv6 *mask = item->mask;
1416 	struct mlx5_flow_parse *parser = data->parser;
1417 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1418 	struct ibv_flow_spec_ipv6 ipv6 = {
1419 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1420 		.size = ipv6_size,
1421 	};
1422 
1423 	/* Don't update layer for the inner pattern. */
1424 	if (!parser->inner)
1425 		parser->layer = HASH_RXQ_IPV6;
1426 	if (spec) {
1427 		unsigned int i;
1428 		uint32_t vtc_flow_val;
1429 		uint32_t vtc_flow_mask;
1430 
1431 		if (!mask)
1432 			mask = default_mask;
1433 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1434 		       RTE_DIM(ipv6.val.src_ip));
1435 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1436 		       RTE_DIM(ipv6.val.dst_ip));
1437 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1438 		       RTE_DIM(ipv6.mask.src_ip));
1439 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1440 		       RTE_DIM(ipv6.mask.dst_ip));
1441 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1442 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1443 		ipv6.val.flow_label =
1444 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1445 					 IPV6_HDR_FL_SHIFT);
1446 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1447 					 IPV6_HDR_TC_SHIFT;
1448 		ipv6.val.next_hdr = spec->hdr.proto;
1449 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1450 		ipv6.mask.flow_label =
1451 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1452 					 IPV6_HDR_FL_SHIFT);
1453 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1454 					  IPV6_HDR_TC_SHIFT;
1455 		ipv6.mask.next_hdr = mask->hdr.proto;
1456 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1457 		/* Remove unwanted bits from values. */
1458 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1459 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1460 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1461 		}
1462 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1463 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1464 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1465 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1466 	}
1467 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1468 	return 0;
1469 }
1470 
1471 /**
1472  * Convert UDP item to Verbs specification.
1473  *
1474  * @param item[in]
1475  *   Item specification.
1476  * @param default_mask[in]
1477  *   Default bit-masks to use when item->mask is not provided.
1478  * @param data[in, out]
1479  *   User structure.
1480  *
1481  * @return
1482  *   0 on success, a negative errno value otherwise and rte_errno is set.
1483  */
1484 static int
1485 mlx5_flow_create_udp(const struct rte_flow_item *item,
1486 		     const void *default_mask,
1487 		     struct mlx5_flow_data *data)
1488 {
1489 	const struct rte_flow_item_udp *spec = item->spec;
1490 	const struct rte_flow_item_udp *mask = item->mask;
1491 	struct mlx5_flow_parse *parser = data->parser;
1492 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1493 	struct ibv_flow_spec_tcp_udp udp = {
1494 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1495 		.size = udp_size,
1496 	};
1497 
1498 	/* Don't update layer for the inner pattern. */
1499 	if (!parser->inner) {
1500 		if (parser->layer == HASH_RXQ_IPV4)
1501 			parser->layer = HASH_RXQ_UDPV4;
1502 		else
1503 			parser->layer = HASH_RXQ_UDPV6;
1504 	}
1505 	if (spec) {
1506 		if (!mask)
1507 			mask = default_mask;
1508 		udp.val.dst_port = spec->hdr.dst_port;
1509 		udp.val.src_port = spec->hdr.src_port;
1510 		udp.mask.dst_port = mask->hdr.dst_port;
1511 		udp.mask.src_port = mask->hdr.src_port;
1512 		/* Remove unwanted bits from values. */
1513 		udp.val.src_port &= udp.mask.src_port;
1514 		udp.val.dst_port &= udp.mask.dst_port;
1515 	}
1516 	mlx5_flow_create_copy(parser, &udp, udp_size);
1517 	return 0;
1518 }
1519 
1520 /**
1521  * Convert TCP item to Verbs specification.
1522  *
1523  * @param item[in]
1524  *   Item specification.
1525  * @param default_mask[in]
1526  *   Default bit-masks to use when item->mask is not provided.
1527  * @param data[in, out]
1528  *   User structure.
1529  *
1530  * @return
1531  *   0 on success, a negative errno value otherwise and rte_errno is set.
1532  */
1533 static int
1534 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1535 		     const void *default_mask,
1536 		     struct mlx5_flow_data *data)
1537 {
1538 	const struct rte_flow_item_tcp *spec = item->spec;
1539 	const struct rte_flow_item_tcp *mask = item->mask;
1540 	struct mlx5_flow_parse *parser = data->parser;
1541 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1542 	struct ibv_flow_spec_tcp_udp tcp = {
1543 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1544 		.size = tcp_size,
1545 	};
1546 
1547 	/* Don't update layer for the inner pattern. */
1548 	if (!parser->inner) {
1549 		if (parser->layer == HASH_RXQ_IPV4)
1550 			parser->layer = HASH_RXQ_TCPV4;
1551 		else
1552 			parser->layer = HASH_RXQ_TCPV6;
1553 	}
1554 	if (spec) {
1555 		if (!mask)
1556 			mask = default_mask;
1557 		tcp.val.dst_port = spec->hdr.dst_port;
1558 		tcp.val.src_port = spec->hdr.src_port;
1559 		tcp.mask.dst_port = mask->hdr.dst_port;
1560 		tcp.mask.src_port = mask->hdr.src_port;
1561 		/* Remove unwanted bits from values. */
1562 		tcp.val.src_port &= tcp.mask.src_port;
1563 		tcp.val.dst_port &= tcp.mask.dst_port;
1564 	}
1565 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1566 	return 0;
1567 }
1568 
1569 /**
1570  * Convert VXLAN item to Verbs specification.
1571  *
1572  * @param item[in]
1573  *   Item specification.
1574  * @param default_mask[in]
1575  *   Default bit-masks to use when item->mask is not provided.
1576  * @param data[in, out]
1577  *   User structure.
1578  *
1579  * @return
1580  *   0 on success, a negative errno value otherwise and rte_errno is set.
1581  */
1582 static int
1583 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1584 		       const void *default_mask,
1585 		       struct mlx5_flow_data *data)
1586 {
1587 	const struct rte_flow_item_vxlan *spec = item->spec;
1588 	const struct rte_flow_item_vxlan *mask = item->mask;
1589 	struct mlx5_flow_parse *parser = data->parser;
1590 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1591 	struct ibv_flow_spec_tunnel vxlan = {
1592 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1593 		.size = size,
1594 	};
1595 	union vni {
1596 		uint32_t vlan_id;
1597 		uint8_t vni[4];
1598 	} id;
1599 
1600 	id.vni[0] = 0;
1601 	parser->inner = IBV_FLOW_SPEC_INNER;
1602 	if (spec) {
1603 		if (!mask)
1604 			mask = default_mask;
1605 		memcpy(&id.vni[1], spec->vni, 3);
1606 		vxlan.val.tunnel_id = id.vlan_id;
1607 		memcpy(&id.vni[1], mask->vni, 3);
1608 		vxlan.mask.tunnel_id = id.vlan_id;
1609 		/* Remove unwanted bits from values. */
1610 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1611 	}
1612 	/*
1613 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1614 	 * layer is defined in the Verbs specification it is interpreted as
1615 	 * wildcard and all packets will match this rule, if it follows a full
1616 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1617 	 * before will also match this rule.
1618 	 * To avoid such situation, VNI 0 is currently refused.
1619 	 */
1620 	if (!vxlan.val.tunnel_id)
1621 		return rte_flow_error_set(data->error, EINVAL,
1622 					  RTE_FLOW_ERROR_TYPE_ITEM,
1623 					  item,
1624 					  "VxLAN vni cannot be 0");
1625 	mlx5_flow_create_copy(parser, &vxlan, size);
1626 	return 0;
1627 }
1628 
1629 /**
1630  * Convert mark/flag action to Verbs specification.
1631  *
1632  * @param parser
1633  *   Internal parser structure.
1634  * @param mark_id
1635  *   Mark identifier.
1636  *
1637  * @return
1638  *   0 on success, a negative errno value otherwise and rte_errno is set.
1639  */
1640 static int
1641 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1642 {
1643 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1644 	struct ibv_flow_spec_action_tag tag = {
1645 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1646 		.size = size,
1647 		.tag_id = mlx5_flow_mark_set(mark_id),
1648 	};
1649 
1650 	assert(parser->mark);
1651 	mlx5_flow_create_copy(parser, &tag, size);
1652 	return 0;
1653 }
1654 
1655 /**
1656  * Convert count action to Verbs specification.
1657  *
1658  * @param dev
1659  *   Pointer to Ethernet device.
1660  * @param parser
1661  *   Pointer to MLX5 flow parser structure.
1662  *
1663  * @return
1664  *   0 on success, a negative errno value otherwise and rte_errno is set.
1665  */
1666 static int
1667 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1668 		       struct mlx5_flow_parse *parser __rte_unused)
1669 {
1670 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1671 	struct priv *priv = dev->data->dev_private;
1672 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1673 	struct ibv_counter_set_init_attr init_attr = {0};
1674 	struct ibv_flow_spec_counter_action counter = {
1675 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1676 		.size = size,
1677 		.counter_set_handle = 0,
1678 	};
1679 
1680 	init_attr.counter_set_id = 0;
1681 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1682 	if (!parser->cs) {
1683 		rte_errno = EINVAL;
1684 		return -rte_errno;
1685 	}
1686 	counter.counter_set_handle = parser->cs->handle;
1687 	mlx5_flow_create_copy(parser, &counter, size);
1688 #endif
1689 	return 0;
1690 }
1691 
1692 /**
1693  * Complete flow rule creation with a drop queue.
1694  *
1695  * @param dev
1696  *   Pointer to Ethernet device.
1697  * @param parser
1698  *   Internal parser structure.
1699  * @param flow
1700  *   Pointer to the rte_flow.
1701  * @param[out] error
1702  *   Perform verbose error reporting if not NULL.
1703  *
1704  * @return
1705  *   0 on success, a negative errno value otherwise and rte_errno is set.
1706  */
1707 static int
1708 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1709 				   struct mlx5_flow_parse *parser,
1710 				   struct rte_flow *flow,
1711 				   struct rte_flow_error *error)
1712 {
1713 	struct priv *priv = dev->data->dev_private;
1714 	struct ibv_flow_spec_action_drop *drop;
1715 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1716 
1717 	assert(priv->pd);
1718 	assert(priv->ctx);
1719 	flow->drop = 1;
1720 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1721 			parser->queue[HASH_RXQ_ETH].offset);
1722 	*drop = (struct ibv_flow_spec_action_drop){
1723 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1724 			.size = size,
1725 	};
1726 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1727 	parser->queue[HASH_RXQ_ETH].offset += size;
1728 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1729 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1730 	if (parser->count)
1731 		flow->cs = parser->cs;
1732 	if (!priv->dev->data->dev_started)
1733 		return 0;
1734 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1735 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1736 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1737 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
1738 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1739 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1740 				   NULL, "flow rule creation failure");
1741 		goto error;
1742 	}
1743 	return 0;
1744 error:
1745 	assert(flow);
1746 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1747 		claim_zero(mlx5_glue->destroy_flow
1748 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1749 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1750 	}
1751 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1752 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1753 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1754 	}
1755 	if (flow->cs) {
1756 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1757 		flow->cs = NULL;
1758 		parser->cs = NULL;
1759 	}
1760 	return -rte_errno;
1761 }
1762 
1763 /**
1764  * Create hash Rx queues when RSS is enabled.
1765  *
1766  * @param dev
1767  *   Pointer to Ethernet device.
1768  * @param parser
1769  *   Internal parser structure.
1770  * @param flow
1771  *   Pointer to the rte_flow.
1772  * @param[out] error
1773  *   Perform verbose error reporting if not NULL.
1774  *
1775  * @return
1776  *   0 on success, a negative errno value otherwise and rte_errno is set.
1777  */
1778 static int
1779 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1780 				  struct mlx5_flow_parse *parser,
1781 				  struct rte_flow *flow,
1782 				  struct rte_flow_error *error)
1783 {
1784 	struct priv *priv = dev->data->dev_private;
1785 	unsigned int i;
1786 
1787 	for (i = 0; i != hash_rxq_init_n; ++i) {
1788 		uint64_t hash_fields;
1789 
1790 		if (!parser->queue[i].ibv_attr)
1791 			continue;
1792 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1793 		parser->queue[i].ibv_attr = NULL;
1794 		hash_fields = hash_rxq_init[i].hash_fields;
1795 		if (!priv->dev->data->dev_started)
1796 			continue;
1797 		flow->frxq[i].hrxq =
1798 			mlx5_hrxq_get(dev,
1799 				      parser->rss_conf.rss_key,
1800 				      parser->rss_conf.rss_key_len,
1801 				      hash_fields,
1802 				      parser->queues,
1803 				      parser->queues_n);
1804 		if (flow->frxq[i].hrxq)
1805 			continue;
1806 		flow->frxq[i].hrxq =
1807 			mlx5_hrxq_new(dev,
1808 				      parser->rss_conf.rss_key,
1809 				      parser->rss_conf.rss_key_len,
1810 				      hash_fields,
1811 				      parser->queues,
1812 				      parser->queues_n);
1813 		if (!flow->frxq[i].hrxq) {
1814 			return rte_flow_error_set(error, ENOMEM,
1815 						  RTE_FLOW_ERROR_TYPE_HANDLE,
1816 						  NULL,
1817 						  "cannot create hash rxq");
1818 		}
1819 	}
1820 	return 0;
1821 }
1822 
1823 /**
1824  * Complete flow rule creation.
1825  *
1826  * @param dev
1827  *   Pointer to Ethernet device.
1828  * @param parser
1829  *   Internal parser structure.
1830  * @param flow
1831  *   Pointer to the rte_flow.
1832  * @param[out] error
1833  *   Perform verbose error reporting if not NULL.
1834  *
1835  * @return
1836  *   0 on success, a negative errno value otherwise and rte_errno is set.
1837  */
1838 static int
1839 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1840 			      struct mlx5_flow_parse *parser,
1841 			      struct rte_flow *flow,
1842 			      struct rte_flow_error *error)
1843 {
1844 	struct priv *priv = dev->data->dev_private;
1845 	int ret;
1846 	unsigned int i;
1847 	unsigned int flows_n = 0;
1848 
1849 	assert(priv->pd);
1850 	assert(priv->ctx);
1851 	assert(!parser->drop);
1852 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1853 	if (ret)
1854 		goto error;
1855 	if (parser->count)
1856 		flow->cs = parser->cs;
1857 	if (!priv->dev->data->dev_started)
1858 		return 0;
1859 	for (i = 0; i != hash_rxq_init_n; ++i) {
1860 		if (!flow->frxq[i].hrxq)
1861 			continue;
1862 		flow->frxq[i].ibv_flow =
1863 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1864 					       flow->frxq[i].ibv_attr);
1865 		if (!flow->frxq[i].ibv_flow) {
1866 			rte_flow_error_set(error, ENOMEM,
1867 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1868 					   NULL, "flow rule creation failure");
1869 			goto error;
1870 		}
1871 		++flows_n;
1872 		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1873 			dev->data->port_id,
1874 			(void *)flow, i,
1875 			(void *)flow->frxq[i].hrxq,
1876 			(void *)flow->frxq[i].ibv_flow);
1877 	}
1878 	if (!flows_n) {
1879 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1880 				   NULL, "internal error in flow creation");
1881 		goto error;
1882 	}
1883 	for (i = 0; i != parser->queues_n; ++i) {
1884 		struct mlx5_rxq_data *q =
1885 			(*priv->rxqs)[parser->queues[i]];
1886 
1887 		q->mark |= parser->mark;
1888 	}
1889 	return 0;
1890 error:
1891 	ret = rte_errno; /* Save rte_errno before cleanup. */
1892 	assert(flow);
1893 	for (i = 0; i != hash_rxq_init_n; ++i) {
1894 		if (flow->frxq[i].ibv_flow) {
1895 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1896 
1897 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1898 		}
1899 		if (flow->frxq[i].hrxq)
1900 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1901 		if (flow->frxq[i].ibv_attr)
1902 			rte_free(flow->frxq[i].ibv_attr);
1903 	}
1904 	if (flow->cs) {
1905 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1906 		flow->cs = NULL;
1907 		parser->cs = NULL;
1908 	}
1909 	rte_errno = ret; /* Restore rte_errno. */
1910 	return -rte_errno;
1911 }
1912 
1913 /**
1914  * Convert a flow.
1915  *
1916  * @param dev
1917  *   Pointer to Ethernet device.
1918  * @param list
1919  *   Pointer to a TAILQ flow list.
1920  * @param[in] attr
1921  *   Flow rule attributes.
1922  * @param[in] pattern
1923  *   Pattern specification (list terminated by the END pattern item).
1924  * @param[in] actions
1925  *   Associated actions (list terminated by the END action).
1926  * @param[out] error
1927  *   Perform verbose error reporting if not NULL.
1928  *
1929  * @return
1930  *   A flow on success, NULL otherwise and rte_errno is set.
1931  */
1932 static struct rte_flow *
1933 mlx5_flow_list_create(struct rte_eth_dev *dev,
1934 		      struct mlx5_flows *list,
1935 		      const struct rte_flow_attr *attr,
1936 		      const struct rte_flow_item items[],
1937 		      const struct rte_flow_action actions[],
1938 		      struct rte_flow_error *error)
1939 {
1940 	struct mlx5_flow_parse parser = { .create = 1, };
1941 	struct rte_flow *flow = NULL;
1942 	unsigned int i;
1943 	int ret;
1944 
1945 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1946 	if (ret)
1947 		goto exit;
1948 	flow = rte_calloc(__func__, 1,
1949 			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1950 			  0);
1951 	if (!flow) {
1952 		rte_flow_error_set(error, ENOMEM,
1953 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1954 				   NULL,
1955 				   "cannot allocate flow memory");
1956 		return NULL;
1957 	}
1958 	/* Copy queues configuration. */
1959 	flow->queues = (uint16_t (*)[])(flow + 1);
1960 	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1961 	flow->queues_n = parser.queues_n;
1962 	flow->mark = parser.mark;
1963 	/* Copy RSS configuration. */
1964 	flow->rss_conf = parser.rss_conf;
1965 	flow->rss_conf.rss_key = flow->rss_key;
1966 	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1967 	/* finalise the flow. */
1968 	if (parser.drop)
1969 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1970 							 error);
1971 	else
1972 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1973 	if (ret)
1974 		goto exit;
1975 	TAILQ_INSERT_TAIL(list, flow, next);
1976 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1977 		(void *)flow);
1978 	return flow;
1979 exit:
1980 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1981 		error->message);
1982 	for (i = 0; i != hash_rxq_init_n; ++i) {
1983 		if (parser.queue[i].ibv_attr)
1984 			rte_free(parser.queue[i].ibv_attr);
1985 	}
1986 	rte_free(flow);
1987 	return NULL;
1988 }
1989 
1990 /**
1991  * Validate a flow supported by the NIC.
1992  *
1993  * @see rte_flow_validate()
1994  * @see rte_flow_ops
1995  */
1996 int
1997 mlx5_flow_validate(struct rte_eth_dev *dev,
1998 		   const struct rte_flow_attr *attr,
1999 		   const struct rte_flow_item items[],
2000 		   const struct rte_flow_action actions[],
2001 		   struct rte_flow_error *error)
2002 {
2003 	struct mlx5_flow_parse parser = { .create = 0, };
2004 
2005 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2006 }
2007 
2008 /**
2009  * Create a flow.
2010  *
2011  * @see rte_flow_create()
2012  * @see rte_flow_ops
2013  */
2014 struct rte_flow *
2015 mlx5_flow_create(struct rte_eth_dev *dev,
2016 		 const struct rte_flow_attr *attr,
2017 		 const struct rte_flow_item items[],
2018 		 const struct rte_flow_action actions[],
2019 		 struct rte_flow_error *error)
2020 {
2021 	struct priv *priv = dev->data->dev_private;
2022 
2023 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2024 				     error);
2025 }
2026 
2027 /**
2028  * Destroy a flow in a list.
2029  *
2030  * @param dev
2031  *   Pointer to Ethernet device.
2032  * @param list
2033  *   Pointer to a TAILQ flow list.
2034  * @param[in] flow
2035  *   Flow to destroy.
2036  */
2037 static void
2038 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2039 		       struct rte_flow *flow)
2040 {
2041 	struct priv *priv = dev->data->dev_private;
2042 	unsigned int i;
2043 
2044 	if (flow->drop || !flow->mark)
2045 		goto free;
2046 	for (i = 0; i != flow->queues_n; ++i) {
2047 		struct rte_flow *tmp;
2048 		int mark = 0;
2049 
2050 		/*
2051 		 * To remove the mark from the queue, the queue must not be
2052 		 * present in any other marked flow (RSS or not).
2053 		 */
2054 		TAILQ_FOREACH(tmp, list, next) {
2055 			unsigned int j;
2056 			uint16_t *tqs = NULL;
2057 			uint16_t tq_n = 0;
2058 
2059 			if (!tmp->mark)
2060 				continue;
2061 			for (j = 0; j != hash_rxq_init_n; ++j) {
2062 				if (!tmp->frxq[j].hrxq)
2063 					continue;
2064 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2065 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2066 			}
2067 			if (!tq_n)
2068 				continue;
2069 			for (j = 0; (j != tq_n) && !mark; j++)
2070 				if (tqs[j] == (*flow->queues)[i])
2071 					mark = 1;
2072 		}
2073 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2074 	}
2075 free:
2076 	if (flow->drop) {
2077 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2078 			claim_zero(mlx5_glue->destroy_flow
2079 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2080 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2081 	} else {
2082 		for (i = 0; i != hash_rxq_init_n; ++i) {
2083 			struct mlx5_flow *frxq = &flow->frxq[i];
2084 
2085 			if (frxq->ibv_flow)
2086 				claim_zero(mlx5_glue->destroy_flow
2087 					   (frxq->ibv_flow));
2088 			if (frxq->hrxq)
2089 				mlx5_hrxq_release(dev, frxq->hrxq);
2090 			if (frxq->ibv_attr)
2091 				rte_free(frxq->ibv_attr);
2092 		}
2093 	}
2094 	if (flow->cs) {
2095 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2096 		flow->cs = NULL;
2097 	}
2098 	TAILQ_REMOVE(list, flow, next);
2099 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2100 		(void *)flow);
2101 	rte_free(flow);
2102 }
2103 
2104 /**
2105  * Destroy all flows.
2106  *
2107  * @param dev
2108  *   Pointer to Ethernet device.
2109  * @param list
2110  *   Pointer to a TAILQ flow list.
2111  */
2112 void
2113 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2114 {
2115 	while (!TAILQ_EMPTY(list)) {
2116 		struct rte_flow *flow;
2117 
2118 		flow = TAILQ_FIRST(list);
2119 		mlx5_flow_list_destroy(dev, list, flow);
2120 	}
2121 }
2122 
2123 /**
2124  * Create drop queue.
2125  *
2126  * @param dev
2127  *   Pointer to Ethernet device.
2128  *
2129  * @return
2130  *   0 on success, a negative errno value otherwise and rte_errno is set.
2131  */
2132 int
2133 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2134 {
2135 	struct priv *priv = dev->data->dev_private;
2136 	struct mlx5_hrxq_drop *fdq = NULL;
2137 
2138 	assert(priv->pd);
2139 	assert(priv->ctx);
2140 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2141 	if (!fdq) {
2142 		DRV_LOG(WARNING,
2143 			"port %u cannot allocate memory for drop queue",
2144 			dev->data->port_id);
2145 		rte_errno = ENOMEM;
2146 		return -rte_errno;
2147 	}
2148 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2149 	if (!fdq->cq) {
2150 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2151 			dev->data->port_id);
2152 		rte_errno = errno;
2153 		goto error;
2154 	}
2155 	fdq->wq = mlx5_glue->create_wq
2156 		(priv->ctx,
2157 		 &(struct ibv_wq_init_attr){
2158 			.wq_type = IBV_WQT_RQ,
2159 			.max_wr = 1,
2160 			.max_sge = 1,
2161 			.pd = priv->pd,
2162 			.cq = fdq->cq,
2163 		 });
2164 	if (!fdq->wq) {
2165 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2166 			dev->data->port_id);
2167 		rte_errno = errno;
2168 		goto error;
2169 	}
2170 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2171 		(priv->ctx,
2172 		 &(struct ibv_rwq_ind_table_init_attr){
2173 			.log_ind_tbl_size = 0,
2174 			.ind_tbl = &fdq->wq,
2175 			.comp_mask = 0,
2176 		 });
2177 	if (!fdq->ind_table) {
2178 		DRV_LOG(WARNING,
2179 			"port %u cannot allocate indirection table for drop"
2180 			" queue",
2181 			dev->data->port_id);
2182 		rte_errno = errno;
2183 		goto error;
2184 	}
2185 	fdq->qp = mlx5_glue->create_qp_ex
2186 		(priv->ctx,
2187 		 &(struct ibv_qp_init_attr_ex){
2188 			.qp_type = IBV_QPT_RAW_PACKET,
2189 			.comp_mask =
2190 				IBV_QP_INIT_ATTR_PD |
2191 				IBV_QP_INIT_ATTR_IND_TABLE |
2192 				IBV_QP_INIT_ATTR_RX_HASH,
2193 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2194 				.rx_hash_function =
2195 					IBV_RX_HASH_FUNC_TOEPLITZ,
2196 				.rx_hash_key_len = rss_hash_default_key_len,
2197 				.rx_hash_key = rss_hash_default_key,
2198 				.rx_hash_fields_mask = 0,
2199 				},
2200 			.rwq_ind_tbl = fdq->ind_table,
2201 			.pd = priv->pd
2202 		 });
2203 	if (!fdq->qp) {
2204 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2205 			dev->data->port_id);
2206 		rte_errno = errno;
2207 		goto error;
2208 	}
2209 	priv->flow_drop_queue = fdq;
2210 	return 0;
2211 error:
2212 	if (fdq->qp)
2213 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2214 	if (fdq->ind_table)
2215 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2216 	if (fdq->wq)
2217 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2218 	if (fdq->cq)
2219 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2220 	if (fdq)
2221 		rte_free(fdq);
2222 	priv->flow_drop_queue = NULL;
2223 	return -rte_errno;
2224 }
2225 
2226 /**
2227  * Delete drop queue.
2228  *
2229  * @param dev
2230  *   Pointer to Ethernet device.
2231  */
2232 void
2233 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2234 {
2235 	struct priv *priv = dev->data->dev_private;
2236 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2237 
2238 	if (!fdq)
2239 		return;
2240 	if (fdq->qp)
2241 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2242 	if (fdq->ind_table)
2243 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2244 	if (fdq->wq)
2245 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2246 	if (fdq->cq)
2247 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2248 	rte_free(fdq);
2249 	priv->flow_drop_queue = NULL;
2250 }
2251 
2252 /**
2253  * Remove all flows.
2254  *
2255  * @param dev
2256  *   Pointer to Ethernet device.
2257  * @param list
2258  *   Pointer to a TAILQ flow list.
2259  */
2260 void
2261 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2262 {
2263 	struct priv *priv = dev->data->dev_private;
2264 	struct rte_flow *flow;
2265 
2266 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2267 		unsigned int i;
2268 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2269 
2270 		if (flow->drop) {
2271 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2272 				continue;
2273 			claim_zero(mlx5_glue->destroy_flow
2274 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2275 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2276 			DRV_LOG(DEBUG, "port %u flow %p removed",
2277 				dev->data->port_id, (void *)flow);
2278 			/* Next flow. */
2279 			continue;
2280 		}
2281 		/* Verify the flow has not already been cleaned. */
2282 		for (i = 0; i != hash_rxq_init_n; ++i) {
2283 			if (!flow->frxq[i].ibv_flow)
2284 				continue;
2285 			/*
2286 			 * Indirection table may be necessary to remove the
2287 			 * flags in the Rx queues.
2288 			 * This helps to speed-up the process by avoiding
2289 			 * another loop.
2290 			 */
2291 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2292 			break;
2293 		}
2294 		if (i == hash_rxq_init_n)
2295 			return;
2296 		if (flow->mark) {
2297 			assert(ind_tbl);
2298 			for (i = 0; i != ind_tbl->queues_n; ++i)
2299 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2300 		}
2301 		for (i = 0; i != hash_rxq_init_n; ++i) {
2302 			if (!flow->frxq[i].ibv_flow)
2303 				continue;
2304 			claim_zero(mlx5_glue->destroy_flow
2305 				   (flow->frxq[i].ibv_flow));
2306 			flow->frxq[i].ibv_flow = NULL;
2307 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2308 			flow->frxq[i].hrxq = NULL;
2309 		}
2310 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2311 			(void *)flow);
2312 	}
2313 }
2314 
2315 /**
2316  * Add all flows.
2317  *
2318  * @param dev
2319  *   Pointer to Ethernet device.
2320  * @param list
2321  *   Pointer to a TAILQ flow list.
2322  *
2323  * @return
2324  *   0 on success, a negative errno value otherwise and rte_errno is set.
2325  */
2326 int
2327 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2328 {
2329 	struct priv *priv = dev->data->dev_private;
2330 	struct rte_flow *flow;
2331 
2332 	TAILQ_FOREACH(flow, list, next) {
2333 		unsigned int i;
2334 
2335 		if (flow->drop) {
2336 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2337 				mlx5_glue->create_flow
2338 				(priv->flow_drop_queue->qp,
2339 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2340 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2341 				DRV_LOG(DEBUG,
2342 					"port %u flow %p cannot be applied",
2343 					dev->data->port_id, (void *)flow);
2344 				rte_errno = EINVAL;
2345 				return -rte_errno;
2346 			}
2347 			DRV_LOG(DEBUG, "port %u flow %p applied",
2348 				dev->data->port_id, (void *)flow);
2349 			/* Next flow. */
2350 			continue;
2351 		}
2352 		for (i = 0; i != hash_rxq_init_n; ++i) {
2353 			if (!flow->frxq[i].ibv_attr)
2354 				continue;
2355 			flow->frxq[i].hrxq =
2356 				mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2357 					      flow->rss_conf.rss_key_len,
2358 					      hash_rxq_init[i].hash_fields,
2359 					      (*flow->queues),
2360 					      flow->queues_n);
2361 			if (flow->frxq[i].hrxq)
2362 				goto flow_create;
2363 			flow->frxq[i].hrxq =
2364 				mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2365 					      flow->rss_conf.rss_key_len,
2366 					      hash_rxq_init[i].hash_fields,
2367 					      (*flow->queues),
2368 					      flow->queues_n);
2369 			if (!flow->frxq[i].hrxq) {
2370 				DRV_LOG(DEBUG,
2371 					"port %u flow %p cannot be applied",
2372 					dev->data->port_id, (void *)flow);
2373 				rte_errno = EINVAL;
2374 				return -rte_errno;
2375 			}
2376 flow_create:
2377 			flow->frxq[i].ibv_flow =
2378 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2379 						       flow->frxq[i].ibv_attr);
2380 			if (!flow->frxq[i].ibv_flow) {
2381 				DRV_LOG(DEBUG,
2382 					"port %u flow %p cannot be applied",
2383 					dev->data->port_id, (void *)flow);
2384 				rte_errno = EINVAL;
2385 				return -rte_errno;
2386 			}
2387 			DRV_LOG(DEBUG, "port %u flow %p applied",
2388 				dev->data->port_id, (void *)flow);
2389 		}
2390 		if (!flow->mark)
2391 			continue;
2392 		for (i = 0; i != flow->queues_n; ++i)
2393 			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2394 	}
2395 	return 0;
2396 }
2397 
2398 /**
2399  * Verify the flow list is empty
2400  *
2401  * @param dev
2402  *  Pointer to Ethernet device.
2403  *
2404  * @return the number of flows not released.
2405  */
2406 int
2407 mlx5_flow_verify(struct rte_eth_dev *dev)
2408 {
2409 	struct priv *priv = dev->data->dev_private;
2410 	struct rte_flow *flow;
2411 	int ret = 0;
2412 
2413 	TAILQ_FOREACH(flow, &priv->flows, next) {
2414 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2415 			dev->data->port_id, (void *)flow);
2416 		++ret;
2417 	}
2418 	return ret;
2419 }
2420 
2421 /**
2422  * Enable a control flow configured from the control plane.
2423  *
2424  * @param dev
2425  *   Pointer to Ethernet device.
2426  * @param eth_spec
2427  *   An Ethernet flow spec to apply.
2428  * @param eth_mask
2429  *   An Ethernet flow mask to apply.
2430  * @param vlan_spec
2431  *   A VLAN flow spec to apply.
2432  * @param vlan_mask
2433  *   A VLAN flow mask to apply.
2434  *
2435  * @return
2436  *   0 on success, a negative errno value otherwise and rte_errno is set.
2437  */
2438 int
2439 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2440 		    struct rte_flow_item_eth *eth_spec,
2441 		    struct rte_flow_item_eth *eth_mask,
2442 		    struct rte_flow_item_vlan *vlan_spec,
2443 		    struct rte_flow_item_vlan *vlan_mask)
2444 {
2445 	struct priv *priv = dev->data->dev_private;
2446 	const struct rte_flow_attr attr = {
2447 		.ingress = 1,
2448 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2449 	};
2450 	struct rte_flow_item items[] = {
2451 		{
2452 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2453 			.spec = eth_spec,
2454 			.last = NULL,
2455 			.mask = eth_mask,
2456 		},
2457 		{
2458 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2459 				RTE_FLOW_ITEM_TYPE_END,
2460 			.spec = vlan_spec,
2461 			.last = NULL,
2462 			.mask = vlan_mask,
2463 		},
2464 		{
2465 			.type = RTE_FLOW_ITEM_TYPE_END,
2466 		},
2467 	};
2468 	struct rte_flow_action actions[] = {
2469 		{
2470 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2471 		},
2472 		{
2473 			.type = RTE_FLOW_ACTION_TYPE_END,
2474 		},
2475 	};
2476 	struct rte_flow *flow;
2477 	struct rte_flow_error error;
2478 	unsigned int i;
2479 	union {
2480 		struct rte_flow_action_rss rss;
2481 		struct {
2482 			const struct rte_eth_rss_conf *rss_conf;
2483 			uint16_t num;
2484 			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2485 		} local;
2486 	} action_rss;
2487 
2488 	if (!priv->reta_idx_n) {
2489 		rte_errno = EINVAL;
2490 		return -rte_errno;
2491 	}
2492 	for (i = 0; i != priv->reta_idx_n; ++i)
2493 		action_rss.local.queue[i] = (*priv->reta_idx)[i];
2494 	action_rss.local.rss_conf = &priv->rss_conf;
2495 	action_rss.local.num = priv->reta_idx_n;
2496 	actions[0].conf = (const void *)&action_rss.rss;
2497 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2498 				     actions, &error);
2499 	if (!flow)
2500 		return -rte_errno;
2501 	return 0;
2502 }
2503 
2504 /**
2505  * Enable a flow control configured from the control plane.
2506  *
2507  * @param dev
2508  *   Pointer to Ethernet device.
2509  * @param eth_spec
2510  *   An Ethernet flow spec to apply.
2511  * @param eth_mask
2512  *   An Ethernet flow mask to apply.
2513  *
2514  * @return
2515  *   0 on success, a negative errno value otherwise and rte_errno is set.
2516  */
2517 int
2518 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2519 	       struct rte_flow_item_eth *eth_spec,
2520 	       struct rte_flow_item_eth *eth_mask)
2521 {
2522 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2523 }
2524 
2525 /**
2526  * Destroy a flow.
2527  *
2528  * @see rte_flow_destroy()
2529  * @see rte_flow_ops
2530  */
2531 int
2532 mlx5_flow_destroy(struct rte_eth_dev *dev,
2533 		  struct rte_flow *flow,
2534 		  struct rte_flow_error *error __rte_unused)
2535 {
2536 	struct priv *priv = dev->data->dev_private;
2537 
2538 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2539 	return 0;
2540 }
2541 
2542 /**
2543  * Destroy all flows.
2544  *
2545  * @see rte_flow_flush()
2546  * @see rte_flow_ops
2547  */
2548 int
2549 mlx5_flow_flush(struct rte_eth_dev *dev,
2550 		struct rte_flow_error *error __rte_unused)
2551 {
2552 	struct priv *priv = dev->data->dev_private;
2553 
2554 	mlx5_flow_list_flush(dev, &priv->flows);
2555 	return 0;
2556 }
2557 
2558 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2559 /**
2560  * Query flow counter.
2561  *
2562  * @param cs
2563  *   the counter set.
2564  * @param counter_value
2565  *   returned data from the counter.
2566  *
2567  * @return
2568  *   0 on success, a negative errno value otherwise and rte_errno is set.
2569  */
2570 static int
2571 mlx5_flow_query_count(struct ibv_counter_set *cs,
2572 		      struct mlx5_flow_counter_stats *counter_stats,
2573 		      struct rte_flow_query_count *query_count,
2574 		      struct rte_flow_error *error)
2575 {
2576 	uint64_t counters[2];
2577 	struct ibv_query_counter_set_attr query_cs_attr = {
2578 		.cs = cs,
2579 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2580 	};
2581 	struct ibv_counter_set_data query_out = {
2582 		.out = counters,
2583 		.outlen = 2 * sizeof(uint64_t),
2584 	};
2585 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2586 
2587 	if (err)
2588 		return rte_flow_error_set(error, err,
2589 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2590 					  NULL,
2591 					  "cannot read counter");
2592 	query_count->hits_set = 1;
2593 	query_count->bytes_set = 1;
2594 	query_count->hits = counters[0] - counter_stats->hits;
2595 	query_count->bytes = counters[1] - counter_stats->bytes;
2596 	if (query_count->reset) {
2597 		counter_stats->hits = counters[0];
2598 		counter_stats->bytes = counters[1];
2599 	}
2600 	return 0;
2601 }
2602 
2603 /**
2604  * Query a flows.
2605  *
2606  * @see rte_flow_query()
2607  * @see rte_flow_ops
2608  */
2609 int
2610 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2611 		struct rte_flow *flow,
2612 		enum rte_flow_action_type action __rte_unused,
2613 		void *data,
2614 		struct rte_flow_error *error)
2615 {
2616 	if (flow->cs) {
2617 		int ret;
2618 
2619 		ret = mlx5_flow_query_count(flow->cs,
2620 					    &flow->counter_stats,
2621 					    (struct rte_flow_query_count *)data,
2622 					    error);
2623 		if (ret)
2624 			return ret;
2625 	} else {
2626 		return rte_flow_error_set(error, EINVAL,
2627 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2628 					  NULL,
2629 					  "no counter found for flow");
2630 	}
2631 	return 0;
2632 }
2633 #endif
2634 
2635 /**
2636  * Isolated mode.
2637  *
2638  * @see rte_flow_isolate()
2639  * @see rte_flow_ops
2640  */
2641 int
2642 mlx5_flow_isolate(struct rte_eth_dev *dev,
2643 		  int enable,
2644 		  struct rte_flow_error *error)
2645 {
2646 	struct priv *priv = dev->data->dev_private;
2647 
2648 	if (dev->data->dev_started) {
2649 		rte_flow_error_set(error, EBUSY,
2650 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2651 				   NULL,
2652 				   "port must be stopped first");
2653 		return -rte_errno;
2654 	}
2655 	priv->isolated = !!enable;
2656 	if (enable)
2657 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2658 	else
2659 		priv->dev->dev_ops = &mlx5_dev_ops;
2660 	return 0;
2661 }
2662 
2663 /**
2664  * Convert a flow director filter to a generic flow.
2665  *
2666  * @param dev
2667  *   Pointer to Ethernet device.
2668  * @param fdir_filter
2669  *   Flow director filter to add.
2670  * @param attributes
2671  *   Generic flow parameters structure.
2672  *
2673  * @return
2674  *   0 on success, a negative errno value otherwise and rte_errno is set.
2675  */
2676 static int
2677 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2678 			 const struct rte_eth_fdir_filter *fdir_filter,
2679 			 struct mlx5_fdir *attributes)
2680 {
2681 	struct priv *priv = dev->data->dev_private;
2682 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2683 	const struct rte_eth_fdir_masks *mask =
2684 		&dev->data->dev_conf.fdir_conf.mask;
2685 
2686 	/* Validate queue number. */
2687 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2688 		DRV_LOG(ERR, "port %u invalid queue number %d",
2689 			dev->data->port_id, fdir_filter->action.rx_queue);
2690 		rte_errno = EINVAL;
2691 		return -rte_errno;
2692 	}
2693 	attributes->attr.ingress = 1;
2694 	attributes->items[0] = (struct rte_flow_item) {
2695 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2696 		.spec = &attributes->l2,
2697 		.mask = &attributes->l2_mask,
2698 	};
2699 	switch (fdir_filter->action.behavior) {
2700 	case RTE_ETH_FDIR_ACCEPT:
2701 		attributes->actions[0] = (struct rte_flow_action){
2702 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2703 			.conf = &attributes->queue,
2704 		};
2705 		break;
2706 	case RTE_ETH_FDIR_REJECT:
2707 		attributes->actions[0] = (struct rte_flow_action){
2708 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2709 		};
2710 		break;
2711 	default:
2712 		DRV_LOG(ERR, "port %u invalid behavior %d",
2713 			dev->data->port_id,
2714 			fdir_filter->action.behavior);
2715 		rte_errno = ENOTSUP;
2716 		return -rte_errno;
2717 	}
2718 	attributes->queue.index = fdir_filter->action.rx_queue;
2719 	/* Handle L3. */
2720 	switch (fdir_filter->input.flow_type) {
2721 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2722 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2723 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2724 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2725 			.src_addr = input->flow.ip4_flow.src_ip,
2726 			.dst_addr = input->flow.ip4_flow.dst_ip,
2727 			.time_to_live = input->flow.ip4_flow.ttl,
2728 			.type_of_service = input->flow.ip4_flow.tos,
2729 			.next_proto_id = input->flow.ip4_flow.proto,
2730 		};
2731 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2732 			.src_addr = mask->ipv4_mask.src_ip,
2733 			.dst_addr = mask->ipv4_mask.dst_ip,
2734 			.time_to_live = mask->ipv4_mask.ttl,
2735 			.type_of_service = mask->ipv4_mask.tos,
2736 			.next_proto_id = mask->ipv4_mask.proto,
2737 		};
2738 		attributes->items[1] = (struct rte_flow_item){
2739 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2740 			.spec = &attributes->l3,
2741 			.mask = &attributes->l3_mask,
2742 		};
2743 		break;
2744 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2745 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2746 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2747 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2748 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2749 			.proto = input->flow.ipv6_flow.proto,
2750 		};
2751 
2752 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2753 		       input->flow.ipv6_flow.src_ip,
2754 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2755 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2756 		       input->flow.ipv6_flow.dst_ip,
2757 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2758 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2759 		       mask->ipv6_mask.src_ip,
2760 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2761 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2762 		       mask->ipv6_mask.dst_ip,
2763 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2764 		attributes->items[1] = (struct rte_flow_item){
2765 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2766 			.spec = &attributes->l3,
2767 			.mask = &attributes->l3_mask,
2768 		};
2769 		break;
2770 	default:
2771 		DRV_LOG(ERR, "port %u invalid flow type%d",
2772 			dev->data->port_id, fdir_filter->input.flow_type);
2773 		rte_errno = ENOTSUP;
2774 		return -rte_errno;
2775 	}
2776 	/* Handle L4. */
2777 	switch (fdir_filter->input.flow_type) {
2778 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2779 		attributes->l4.udp.hdr = (struct udp_hdr){
2780 			.src_port = input->flow.udp4_flow.src_port,
2781 			.dst_port = input->flow.udp4_flow.dst_port,
2782 		};
2783 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2784 			.src_port = mask->src_port_mask,
2785 			.dst_port = mask->dst_port_mask,
2786 		};
2787 		attributes->items[2] = (struct rte_flow_item){
2788 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2789 			.spec = &attributes->l4,
2790 			.mask = &attributes->l4_mask,
2791 		};
2792 		break;
2793 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2794 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2795 			.src_port = input->flow.tcp4_flow.src_port,
2796 			.dst_port = input->flow.tcp4_flow.dst_port,
2797 		};
2798 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2799 			.src_port = mask->src_port_mask,
2800 			.dst_port = mask->dst_port_mask,
2801 		};
2802 		attributes->items[2] = (struct rte_flow_item){
2803 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2804 			.spec = &attributes->l4,
2805 			.mask = &attributes->l4_mask,
2806 		};
2807 		break;
2808 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2809 		attributes->l4.udp.hdr = (struct udp_hdr){
2810 			.src_port = input->flow.udp6_flow.src_port,
2811 			.dst_port = input->flow.udp6_flow.dst_port,
2812 		};
2813 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2814 			.src_port = mask->src_port_mask,
2815 			.dst_port = mask->dst_port_mask,
2816 		};
2817 		attributes->items[2] = (struct rte_flow_item){
2818 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2819 			.spec = &attributes->l4,
2820 			.mask = &attributes->l4_mask,
2821 		};
2822 		break;
2823 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2824 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2825 			.src_port = input->flow.tcp6_flow.src_port,
2826 			.dst_port = input->flow.tcp6_flow.dst_port,
2827 		};
2828 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2829 			.src_port = mask->src_port_mask,
2830 			.dst_port = mask->dst_port_mask,
2831 		};
2832 		attributes->items[2] = (struct rte_flow_item){
2833 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2834 			.spec = &attributes->l4,
2835 			.mask = &attributes->l4_mask,
2836 		};
2837 		break;
2838 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2839 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2840 		break;
2841 	default:
2842 		DRV_LOG(ERR, "port %u invalid flow type%d",
2843 			dev->data->port_id, fdir_filter->input.flow_type);
2844 		rte_errno = ENOTSUP;
2845 		return -rte_errno;
2846 	}
2847 	return 0;
2848 }
2849 
2850 /**
2851  * Add new flow director filter and store it in list.
2852  *
2853  * @param dev
2854  *   Pointer to Ethernet device.
2855  * @param fdir_filter
2856  *   Flow director filter to add.
2857  *
2858  * @return
2859  *   0 on success, a negative errno value otherwise and rte_errno is set.
2860  */
2861 static int
2862 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2863 		     const struct rte_eth_fdir_filter *fdir_filter)
2864 {
2865 	struct priv *priv = dev->data->dev_private;
2866 	struct mlx5_fdir attributes = {
2867 		.attr.group = 0,
2868 		.l2_mask = {
2869 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2870 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2871 			.type = 0,
2872 		},
2873 	};
2874 	struct mlx5_flow_parse parser = {
2875 		.layer = HASH_RXQ_ETH,
2876 	};
2877 	struct rte_flow_error error;
2878 	struct rte_flow *flow;
2879 	int ret;
2880 
2881 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2882 	if (ret)
2883 		return ret;
2884 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2885 				attributes.actions, &error, &parser);
2886 	if (ret)
2887 		return ret;
2888 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2889 				     attributes.items, attributes.actions,
2890 				     &error);
2891 	if (flow) {
2892 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2893 			(void *)flow);
2894 		return 0;
2895 	}
2896 	return -rte_errno;
2897 }
2898 
2899 /**
2900  * Delete specific filter.
2901  *
2902  * @param dev
2903  *   Pointer to Ethernet device.
2904  * @param fdir_filter
2905  *   Filter to be deleted.
2906  *
2907  * @return
2908  *   0 on success, a negative errno value otherwise and rte_errno is set.
2909  */
2910 static int
2911 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2912 			const struct rte_eth_fdir_filter *fdir_filter)
2913 {
2914 	struct priv *priv = dev->data->dev_private;
2915 	struct mlx5_fdir attributes = {
2916 		.attr.group = 0,
2917 	};
2918 	struct mlx5_flow_parse parser = {
2919 		.create = 1,
2920 		.layer = HASH_RXQ_ETH,
2921 	};
2922 	struct rte_flow_error error;
2923 	struct rte_flow *flow;
2924 	unsigned int i;
2925 	int ret;
2926 
2927 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2928 	if (ret)
2929 		return ret;
2930 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2931 				attributes.actions, &error, &parser);
2932 	if (ret)
2933 		goto exit;
2934 	/*
2935 	 * Special case for drop action which is only set in the
2936 	 * specifications when the flow is created.  In this situation the
2937 	 * drop specification is missing.
2938 	 */
2939 	if (parser.drop) {
2940 		struct ibv_flow_spec_action_drop *drop;
2941 
2942 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2943 				parser.queue[HASH_RXQ_ETH].offset);
2944 		*drop = (struct ibv_flow_spec_action_drop){
2945 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2946 			.size = sizeof(struct ibv_flow_spec_action_drop),
2947 		};
2948 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2949 	}
2950 	TAILQ_FOREACH(flow, &priv->flows, next) {
2951 		struct ibv_flow_attr *attr;
2952 		struct ibv_spec_header *attr_h;
2953 		void *spec;
2954 		struct ibv_flow_attr *flow_attr;
2955 		struct ibv_spec_header *flow_h;
2956 		void *flow_spec;
2957 		unsigned int specs_n;
2958 
2959 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2960 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2961 		/* Compare first the attributes. */
2962 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2963 			continue;
2964 		if (attr->num_of_specs == 0)
2965 			continue;
2966 		spec = (void *)((uintptr_t)attr +
2967 				sizeof(struct ibv_flow_attr));
2968 		flow_spec = (void *)((uintptr_t)flow_attr +
2969 				     sizeof(struct ibv_flow_attr));
2970 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2971 		for (i = 0; i != specs_n; ++i) {
2972 			attr_h = spec;
2973 			flow_h = flow_spec;
2974 			if (memcmp(spec, flow_spec,
2975 				   RTE_MIN(attr_h->size, flow_h->size)))
2976 				goto wrong_flow;
2977 			spec = (void *)((uintptr_t)spec + attr_h->size);
2978 			flow_spec = (void *)((uintptr_t)flow_spec +
2979 					     flow_h->size);
2980 		}
2981 		/* At this point, the flow match. */
2982 		break;
2983 wrong_flow:
2984 		/* The flow does not match. */
2985 		continue;
2986 	}
2987 	ret = rte_errno; /* Save rte_errno before cleanup. */
2988 	if (flow)
2989 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
2990 exit:
2991 	for (i = 0; i != hash_rxq_init_n; ++i) {
2992 		if (parser.queue[i].ibv_attr)
2993 			rte_free(parser.queue[i].ibv_attr);
2994 	}
2995 	rte_errno = ret; /* Restore rte_errno. */
2996 	return -rte_errno;
2997 }
2998 
2999 /**
3000  * Update queue for specific filter.
3001  *
3002  * @param dev
3003  *   Pointer to Ethernet device.
3004  * @param fdir_filter
3005  *   Filter to be updated.
3006  *
3007  * @return
3008  *   0 on success, a negative errno value otherwise and rte_errno is set.
3009  */
3010 static int
3011 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3012 			const struct rte_eth_fdir_filter *fdir_filter)
3013 {
3014 	int ret;
3015 
3016 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3017 	if (ret)
3018 		return ret;
3019 	return mlx5_fdir_filter_add(dev, fdir_filter);
3020 }
3021 
3022 /**
3023  * Flush all filters.
3024  *
3025  * @param dev
3026  *   Pointer to Ethernet device.
3027  */
3028 static void
3029 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3030 {
3031 	struct priv *priv = dev->data->dev_private;
3032 
3033 	mlx5_flow_list_flush(dev, &priv->flows);
3034 }
3035 
3036 /**
3037  * Get flow director information.
3038  *
3039  * @param dev
3040  *   Pointer to Ethernet device.
3041  * @param[out] fdir_info
3042  *   Resulting flow director information.
3043  */
3044 static void
3045 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3046 {
3047 	struct priv *priv = dev->data->dev_private;
3048 	struct rte_eth_fdir_masks *mask =
3049 		&priv->dev->data->dev_conf.fdir_conf.mask;
3050 
3051 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3052 	fdir_info->guarant_spc = 0;
3053 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3054 	fdir_info->max_flexpayload = 0;
3055 	fdir_info->flow_types_mask[0] = 0;
3056 	fdir_info->flex_payload_unit = 0;
3057 	fdir_info->max_flex_payload_segment_num = 0;
3058 	fdir_info->flex_payload_limit = 0;
3059 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3060 }
3061 
3062 /**
3063  * Deal with flow director operations.
3064  *
3065  * @param dev
3066  *   Pointer to Ethernet device.
3067  * @param filter_op
3068  *   Operation to perform.
3069  * @param arg
3070  *   Pointer to operation-specific structure.
3071  *
3072  * @return
3073  *   0 on success, a negative errno value otherwise and rte_errno is set.
3074  */
3075 static int
3076 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3077 		    void *arg)
3078 {
3079 	struct priv *priv = dev->data->dev_private;
3080 	enum rte_fdir_mode fdir_mode =
3081 		priv->dev->data->dev_conf.fdir_conf.mode;
3082 
3083 	if (filter_op == RTE_ETH_FILTER_NOP)
3084 		return 0;
3085 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3086 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3087 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3088 			dev->data->port_id, fdir_mode);
3089 		rte_errno = EINVAL;
3090 		return -rte_errno;
3091 	}
3092 	switch (filter_op) {
3093 	case RTE_ETH_FILTER_ADD:
3094 		return mlx5_fdir_filter_add(dev, arg);
3095 	case RTE_ETH_FILTER_UPDATE:
3096 		return mlx5_fdir_filter_update(dev, arg);
3097 	case RTE_ETH_FILTER_DELETE:
3098 		return mlx5_fdir_filter_delete(dev, arg);
3099 	case RTE_ETH_FILTER_FLUSH:
3100 		mlx5_fdir_filter_flush(dev);
3101 		break;
3102 	case RTE_ETH_FILTER_INFO:
3103 		mlx5_fdir_info_get(dev, arg);
3104 		break;
3105 	default:
3106 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3107 			dev->data->port_id, filter_op);
3108 		rte_errno = EINVAL;
3109 		return -rte_errno;
3110 	}
3111 	return 0;
3112 }
3113 
3114 /**
3115  * Manage filter operations.
3116  *
3117  * @param dev
3118  *   Pointer to Ethernet device structure.
3119  * @param filter_type
3120  *   Filter type.
3121  * @param filter_op
3122  *   Operation to perform.
3123  * @param arg
3124  *   Pointer to operation-specific structure.
3125  *
3126  * @return
3127  *   0 on success, a negative errno value otherwise and rte_errno is set.
3128  */
3129 int
3130 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3131 		     enum rte_filter_type filter_type,
3132 		     enum rte_filter_op filter_op,
3133 		     void *arg)
3134 {
3135 	switch (filter_type) {
3136 	case RTE_ETH_FILTER_GENERIC:
3137 		if (filter_op != RTE_ETH_FILTER_GET) {
3138 			rte_errno = EINVAL;
3139 			return -rte_errno;
3140 		}
3141 		*(const void **)arg = &mlx5_flow_ops;
3142 		return 0;
3143 	case RTE_ETH_FILTER_FDIR:
3144 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3145 	default:
3146 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3147 			dev->data->port_id, filter_type);
3148 		rte_errno = ENOTSUP;
3149 		return -rte_errno;
3150 	}
3151 	return 0;
3152 }
3153