xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 929e3319342b7ae2ec0c0fc5c5a4e954037d7d29)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_eth_ctrl.h>
22 #include <rte_ethdev_driver.h>
23 #include <rte_flow.h>
24 #include <rte_flow_driver.h>
25 #include <rte_malloc.h>
26 #include <rte_ip.h>
27 
28 #include "mlx5.h"
29 #include "mlx5_defs.h"
30 #include "mlx5_prm.h"
31 #include "mlx5_glue.h"
32 
33 /* Define minimal priority for control plane flows. */
34 #define MLX5_CTRL_FLOW_PRIORITY 4
35 
36 /* Internet Protocol versions. */
37 #define MLX5_IPV4 4
38 #define MLX5_IPV6 6
39 
40 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
41 struct ibv_flow_spec_counter_action {
42 	int dummy;
43 };
44 #endif
45 
46 /* Dev ops structure defined in mlx5.c */
47 extern const struct eth_dev_ops mlx5_dev_ops;
48 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
49 
50 /** Structure give to the conversion functions. */
51 struct mlx5_flow_data {
52 	struct mlx5_flow_parse *parser; /** Parser context. */
53 	struct rte_flow_error *error; /** Error context. */
54 };
55 
56 static int
57 mlx5_flow_create_eth(const struct rte_flow_item *item,
58 		     const void *default_mask,
59 		     struct mlx5_flow_data *data);
60 
61 static int
62 mlx5_flow_create_vlan(const struct rte_flow_item *item,
63 		      const void *default_mask,
64 		      struct mlx5_flow_data *data);
65 
66 static int
67 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
68 		      const void *default_mask,
69 		      struct mlx5_flow_data *data);
70 
71 static int
72 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
73 		      const void *default_mask,
74 		      struct mlx5_flow_data *data);
75 
76 static int
77 mlx5_flow_create_udp(const struct rte_flow_item *item,
78 		     const void *default_mask,
79 		     struct mlx5_flow_data *data);
80 
81 static int
82 mlx5_flow_create_tcp(const struct rte_flow_item *item,
83 		     const void *default_mask,
84 		     struct mlx5_flow_data *data);
85 
86 static int
87 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
88 		       const void *default_mask,
89 		       struct mlx5_flow_data *data);
90 
91 struct mlx5_flow_parse;
92 
93 static void
94 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
95 		      unsigned int size);
96 
97 static int
98 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
99 
100 static int
101 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
102 
103 /* Hash RX queue types. */
104 enum hash_rxq_type {
105 	HASH_RXQ_TCPV4,
106 	HASH_RXQ_UDPV4,
107 	HASH_RXQ_IPV4,
108 	HASH_RXQ_TCPV6,
109 	HASH_RXQ_UDPV6,
110 	HASH_RXQ_IPV6,
111 	HASH_RXQ_ETH,
112 };
113 
114 /* Initialization data for hash RX queue. */
115 struct hash_rxq_init {
116 	uint64_t hash_fields; /* Fields that participate in the hash. */
117 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
118 	unsigned int flow_priority; /* Flow priority to use. */
119 	unsigned int ip_version; /* Internet protocol. */
120 };
121 
122 /* Initialization data for hash RX queues. */
123 const struct hash_rxq_init hash_rxq_init[] = {
124 	[HASH_RXQ_TCPV4] = {
125 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126 				IBV_RX_HASH_DST_IPV4 |
127 				IBV_RX_HASH_SRC_PORT_TCP |
128 				IBV_RX_HASH_DST_PORT_TCP),
129 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
130 		.flow_priority = 1,
131 		.ip_version = MLX5_IPV4,
132 	},
133 	[HASH_RXQ_UDPV4] = {
134 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 				IBV_RX_HASH_DST_IPV4 |
136 				IBV_RX_HASH_SRC_PORT_UDP |
137 				IBV_RX_HASH_DST_PORT_UDP),
138 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
139 		.flow_priority = 1,
140 		.ip_version = MLX5_IPV4,
141 	},
142 	[HASH_RXQ_IPV4] = {
143 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
144 				IBV_RX_HASH_DST_IPV4),
145 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
146 				ETH_RSS_FRAG_IPV4),
147 		.flow_priority = 2,
148 		.ip_version = MLX5_IPV4,
149 	},
150 	[HASH_RXQ_TCPV6] = {
151 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152 				IBV_RX_HASH_DST_IPV6 |
153 				IBV_RX_HASH_SRC_PORT_TCP |
154 				IBV_RX_HASH_DST_PORT_TCP),
155 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
156 		.flow_priority = 1,
157 		.ip_version = MLX5_IPV6,
158 	},
159 	[HASH_RXQ_UDPV6] = {
160 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 				IBV_RX_HASH_DST_IPV6 |
162 				IBV_RX_HASH_SRC_PORT_UDP |
163 				IBV_RX_HASH_DST_PORT_UDP),
164 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
165 		.flow_priority = 1,
166 		.ip_version = MLX5_IPV6,
167 	},
168 	[HASH_RXQ_IPV6] = {
169 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
170 				IBV_RX_HASH_DST_IPV6),
171 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
172 				ETH_RSS_FRAG_IPV6),
173 		.flow_priority = 2,
174 		.ip_version = MLX5_IPV6,
175 	},
176 	[HASH_RXQ_ETH] = {
177 		.hash_fields = 0,
178 		.dpdk_rss_hf = 0,
179 		.flow_priority = 3,
180 	},
181 };
182 
183 /* Number of entries in hash_rxq_init[]. */
184 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
185 
186 /** Structure for holding counter stats. */
187 struct mlx5_flow_counter_stats {
188 	uint64_t hits; /**< Number of packets matched by the rule. */
189 	uint64_t bytes; /**< Number of bytes matched by the rule. */
190 };
191 
192 /** Structure for Drop queue. */
193 struct mlx5_hrxq_drop {
194 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
195 	struct ibv_qp *qp; /**< Verbs queue pair. */
196 	struct ibv_wq *wq; /**< Verbs work queue. */
197 	struct ibv_cq *cq; /**< Verbs completion queue. */
198 };
199 
200 /* Flows structures. */
201 struct mlx5_flow {
202 	uint64_t hash_fields; /**< Fields that participate in the hash. */
203 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
204 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
205 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
206 };
207 
208 /* Drop flows structures. */
209 struct mlx5_flow_drop {
210 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
211 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
212 };
213 
214 struct rte_flow {
215 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
216 	uint32_t mark:1; /**< Set if the flow is marked. */
217 	uint32_t drop:1; /**< Drop queue. */
218 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
219 	uint16_t (*queues)[]; /**< Queues indexes to use. */
220 	uint8_t rss_key[40]; /**< copy of the RSS key. */
221 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
222 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
223 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
224 	/**< Flow with Rx queue. */
225 };
226 
227 /** Static initializer for items. */
228 #define ITEMS(...) \
229 	(const enum rte_flow_item_type []){ \
230 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
231 	}
232 
233 /** Structure to generate a simple graph of layers supported by the NIC. */
234 struct mlx5_flow_items {
235 	/** List of possible actions for these items. */
236 	const enum rte_flow_action_type *const actions;
237 	/** Bit-masks corresponding to the possibilities for the item. */
238 	const void *mask;
239 	/**
240 	 * Default bit-masks to use when item->mask is not provided. When
241 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
242 	 * used instead.
243 	 */
244 	const void *default_mask;
245 	/** Bit-masks size in bytes. */
246 	const unsigned int mask_sz;
247 	/**
248 	 * Conversion function from rte_flow to NIC specific flow.
249 	 *
250 	 * @param item
251 	 *   rte_flow item to convert.
252 	 * @param default_mask
253 	 *   Default bit-masks to use when item->mask is not provided.
254 	 * @param data
255 	 *   Internal structure to store the conversion.
256 	 *
257 	 * @return
258 	 *   0 on success, a negative errno value otherwise and rte_errno is
259 	 *   set.
260 	 */
261 	int (*convert)(const struct rte_flow_item *item,
262 		       const void *default_mask,
263 		       struct mlx5_flow_data *data);
264 	/** Size in bytes of the destination structure. */
265 	const unsigned int dst_sz;
266 	/** List of possible following items.  */
267 	const enum rte_flow_item_type *const items;
268 };
269 
270 /** Valid action for this PMD. */
271 static const enum rte_flow_action_type valid_actions[] = {
272 	RTE_FLOW_ACTION_TYPE_DROP,
273 	RTE_FLOW_ACTION_TYPE_QUEUE,
274 	RTE_FLOW_ACTION_TYPE_MARK,
275 	RTE_FLOW_ACTION_TYPE_FLAG,
276 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
277 	RTE_FLOW_ACTION_TYPE_COUNT,
278 #endif
279 	RTE_FLOW_ACTION_TYPE_END,
280 };
281 
282 /** Graph of supported items and associated actions. */
283 static const struct mlx5_flow_items mlx5_flow_items[] = {
284 	[RTE_FLOW_ITEM_TYPE_END] = {
285 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
286 			       RTE_FLOW_ITEM_TYPE_VXLAN),
287 	},
288 	[RTE_FLOW_ITEM_TYPE_ETH] = {
289 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
290 			       RTE_FLOW_ITEM_TYPE_IPV4,
291 			       RTE_FLOW_ITEM_TYPE_IPV6),
292 		.actions = valid_actions,
293 		.mask = &(const struct rte_flow_item_eth){
294 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296 			.type = -1,
297 		},
298 		.default_mask = &rte_flow_item_eth_mask,
299 		.mask_sz = sizeof(struct rte_flow_item_eth),
300 		.convert = mlx5_flow_create_eth,
301 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
302 	},
303 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
304 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
305 			       RTE_FLOW_ITEM_TYPE_IPV6),
306 		.actions = valid_actions,
307 		.mask = &(const struct rte_flow_item_vlan){
308 			.tci = -1,
309 		},
310 		.default_mask = &rte_flow_item_vlan_mask,
311 		.mask_sz = sizeof(struct rte_flow_item_vlan),
312 		.convert = mlx5_flow_create_vlan,
313 		.dst_sz = 0,
314 	},
315 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
316 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
317 			       RTE_FLOW_ITEM_TYPE_TCP),
318 		.actions = valid_actions,
319 		.mask = &(const struct rte_flow_item_ipv4){
320 			.hdr = {
321 				.src_addr = -1,
322 				.dst_addr = -1,
323 				.type_of_service = -1,
324 				.next_proto_id = -1,
325 			},
326 		},
327 		.default_mask = &rte_flow_item_ipv4_mask,
328 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
329 		.convert = mlx5_flow_create_ipv4,
330 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
331 	},
332 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
333 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
334 			       RTE_FLOW_ITEM_TYPE_TCP),
335 		.actions = valid_actions,
336 		.mask = &(const struct rte_flow_item_ipv6){
337 			.hdr = {
338 				.src_addr = {
339 					0xff, 0xff, 0xff, 0xff,
340 					0xff, 0xff, 0xff, 0xff,
341 					0xff, 0xff, 0xff, 0xff,
342 					0xff, 0xff, 0xff, 0xff,
343 				},
344 				.dst_addr = {
345 					0xff, 0xff, 0xff, 0xff,
346 					0xff, 0xff, 0xff, 0xff,
347 					0xff, 0xff, 0xff, 0xff,
348 					0xff, 0xff, 0xff, 0xff,
349 				},
350 				.vtc_flow = -1,
351 				.proto = -1,
352 				.hop_limits = -1,
353 			},
354 		},
355 		.default_mask = &rte_flow_item_ipv6_mask,
356 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
357 		.convert = mlx5_flow_create_ipv6,
358 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
359 	},
360 	[RTE_FLOW_ITEM_TYPE_UDP] = {
361 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
362 		.actions = valid_actions,
363 		.mask = &(const struct rte_flow_item_udp){
364 			.hdr = {
365 				.src_port = -1,
366 				.dst_port = -1,
367 			},
368 		},
369 		.default_mask = &rte_flow_item_udp_mask,
370 		.mask_sz = sizeof(struct rte_flow_item_udp),
371 		.convert = mlx5_flow_create_udp,
372 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
373 	},
374 	[RTE_FLOW_ITEM_TYPE_TCP] = {
375 		.actions = valid_actions,
376 		.mask = &(const struct rte_flow_item_tcp){
377 			.hdr = {
378 				.src_port = -1,
379 				.dst_port = -1,
380 			},
381 		},
382 		.default_mask = &rte_flow_item_tcp_mask,
383 		.mask_sz = sizeof(struct rte_flow_item_tcp),
384 		.convert = mlx5_flow_create_tcp,
385 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
386 	},
387 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
388 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
389 		.actions = valid_actions,
390 		.mask = &(const struct rte_flow_item_vxlan){
391 			.vni = "\xff\xff\xff",
392 		},
393 		.default_mask = &rte_flow_item_vxlan_mask,
394 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
395 		.convert = mlx5_flow_create_vxlan,
396 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
397 	},
398 };
399 
400 /** Structure to pass to the conversion function. */
401 struct mlx5_flow_parse {
402 	uint32_t inner; /**< Set once VXLAN is encountered. */
403 	uint32_t create:1;
404 	/**< Whether resources should remain after a validate. */
405 	uint32_t drop:1; /**< Target is a drop queue. */
406 	uint32_t mark:1; /**< Mark is present in the flow. */
407 	uint32_t count:1; /**< Count is present in the flow. */
408 	uint32_t mark_id; /**< Mark identifier. */
409 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
410 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
411 	uint8_t rss_key[40]; /**< copy of the RSS key. */
412 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
413 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
414 	struct {
415 		struct ibv_flow_attr *ibv_attr;
416 		/**< Pointer to Verbs attributes. */
417 		unsigned int offset;
418 		/**< Current position or total size of the attribute. */
419 	} queue[RTE_DIM(hash_rxq_init)];
420 };
421 
422 static const struct rte_flow_ops mlx5_flow_ops = {
423 	.validate = mlx5_flow_validate,
424 	.create = mlx5_flow_create,
425 	.destroy = mlx5_flow_destroy,
426 	.flush = mlx5_flow_flush,
427 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
428 	.query = mlx5_flow_query,
429 #else
430 	.query = NULL,
431 #endif
432 	.isolate = mlx5_flow_isolate,
433 };
434 
435 /* Convert FDIR request to Generic flow. */
436 struct mlx5_fdir {
437 	struct rte_flow_attr attr;
438 	struct rte_flow_action actions[2];
439 	struct rte_flow_item items[4];
440 	struct rte_flow_item_eth l2;
441 	struct rte_flow_item_eth l2_mask;
442 	union {
443 		struct rte_flow_item_ipv4 ipv4;
444 		struct rte_flow_item_ipv6 ipv6;
445 	} l3;
446 	union {
447 		struct rte_flow_item_ipv4 ipv4;
448 		struct rte_flow_item_ipv6 ipv6;
449 	} l3_mask;
450 	union {
451 		struct rte_flow_item_udp udp;
452 		struct rte_flow_item_tcp tcp;
453 	} l4;
454 	union {
455 		struct rte_flow_item_udp udp;
456 		struct rte_flow_item_tcp tcp;
457 	} l4_mask;
458 	struct rte_flow_action_queue queue;
459 };
460 
461 /* Verbs specification header. */
462 struct ibv_spec_header {
463 	enum ibv_flow_spec_type type;
464 	uint16_t size;
465 };
466 
467 /**
468  * Check support for a given item.
469  *
470  * @param item[in]
471  *   Item specification.
472  * @param mask[in]
473  *   Bit-masks covering supported fields to compare with spec, last and mask in
474  *   \item.
475  * @param size
476  *   Bit-Mask size in bytes.
477  *
478  * @return
479  *   0 on success, a negative errno value otherwise and rte_errno is set.
480  */
481 static int
482 mlx5_flow_item_validate(const struct rte_flow_item *item,
483 			const uint8_t *mask, unsigned int size)
484 {
485 	if (!item->spec && (item->mask || item->last)) {
486 		rte_errno = EINVAL;
487 		return -rte_errno;
488 	}
489 	if (item->spec && !item->mask) {
490 		unsigned int i;
491 		const uint8_t *spec = item->spec;
492 
493 		for (i = 0; i < size; ++i)
494 			if ((spec[i] | mask[i]) != mask[i]) {
495 				rte_errno = EINVAL;
496 				return -rte_errno;
497 			}
498 	}
499 	if (item->last && !item->mask) {
500 		unsigned int i;
501 		const uint8_t *spec = item->last;
502 
503 		for (i = 0; i < size; ++i)
504 			if ((spec[i] | mask[i]) != mask[i]) {
505 				rte_errno = EINVAL;
506 				return -rte_errno;
507 			}
508 	}
509 	if (item->mask) {
510 		unsigned int i;
511 		const uint8_t *spec = item->spec;
512 
513 		for (i = 0; i < size; ++i)
514 			if ((spec[i] | mask[i]) != mask[i]) {
515 				rte_errno = EINVAL;
516 				return -rte_errno;
517 			}
518 	}
519 	if (item->spec && item->last) {
520 		uint8_t spec[size];
521 		uint8_t last[size];
522 		const uint8_t *apply = mask;
523 		unsigned int i;
524 		int ret;
525 
526 		if (item->mask)
527 			apply = item->mask;
528 		for (i = 0; i < size; ++i) {
529 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
530 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
531 		}
532 		ret = memcmp(spec, last, size);
533 		if (ret != 0) {
534 			rte_errno = EINVAL;
535 			return -rte_errno;
536 		}
537 	}
538 	return 0;
539 }
540 
541 /**
542  * Extract attribute to the parser.
543  *
544  * @param[in] attr
545  *   Flow rule attributes.
546  * @param[out] error
547  *   Perform verbose error reporting if not NULL.
548  *
549  * @return
550  *   0 on success, a negative errno value otherwise and rte_errno is set.
551  */
552 static int
553 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
554 			     struct rte_flow_error *error)
555 {
556 	if (attr->group) {
557 		rte_flow_error_set(error, ENOTSUP,
558 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
559 				   NULL,
560 				   "groups are not supported");
561 		return -rte_errno;
562 	}
563 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
564 		rte_flow_error_set(error, ENOTSUP,
565 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
566 				   NULL,
567 				   "priorities are not supported");
568 		return -rte_errno;
569 	}
570 	if (attr->egress) {
571 		rte_flow_error_set(error, ENOTSUP,
572 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
573 				   NULL,
574 				   "egress is not supported");
575 		return -rte_errno;
576 	}
577 	if (!attr->ingress) {
578 		rte_flow_error_set(error, ENOTSUP,
579 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
580 				   NULL,
581 				   "only ingress is supported");
582 		return -rte_errno;
583 	}
584 	return 0;
585 }
586 
587 /**
588  * Extract actions request to the parser.
589  *
590  * @param dev
591  *   Pointer to Ethernet device.
592  * @param[in] actions
593  *   Associated actions (list terminated by the END action).
594  * @param[out] error
595  *   Perform verbose error reporting if not NULL.
596  * @param[in, out] parser
597  *   Internal parser structure.
598  *
599  * @return
600  *   0 on success, a negative errno value otherwise and rte_errno is set.
601  */
602 static int
603 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
604 			  const struct rte_flow_action actions[],
605 			  struct rte_flow_error *error,
606 			  struct mlx5_flow_parse *parser)
607 {
608 	enum { FATE = 1, MARK = 2, COUNT = 4, };
609 	uint32_t overlap = 0;
610 	struct priv *priv = dev->data->dev_private;
611 
612 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
613 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
614 			continue;
615 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
616 			if (overlap & FATE)
617 				goto exit_action_overlap;
618 			overlap |= FATE;
619 			parser->drop = 1;
620 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
621 			const struct rte_flow_action_queue *queue =
622 				(const struct rte_flow_action_queue *)
623 				actions->conf;
624 
625 			if (overlap & FATE)
626 				goto exit_action_overlap;
627 			overlap |= FATE;
628 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
629 				goto exit_action_not_supported;
630 			parser->queues[0] = queue->index;
631 			parser->rss_conf = (struct rte_flow_action_rss){
632 				.queue_num = 1,
633 				.queue = parser->queues,
634 			};
635 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
636 			const struct rte_flow_action_rss *rss =
637 				(const struct rte_flow_action_rss *)
638 				actions->conf;
639 			const uint8_t *rss_key;
640 			uint32_t rss_key_len;
641 			uint16_t n;
642 
643 			if (overlap & FATE)
644 				goto exit_action_overlap;
645 			overlap |= FATE;
646 			if (rss->func &&
647 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
648 				rte_flow_error_set(error, EINVAL,
649 						   RTE_FLOW_ERROR_TYPE_ACTION,
650 						   actions,
651 						   "the only supported RSS hash"
652 						   " function is Toeplitz");
653 				return -rte_errno;
654 			}
655 			if (rss->types & MLX5_RSS_HF_MASK) {
656 				rte_flow_error_set(error, EINVAL,
657 						   RTE_FLOW_ERROR_TYPE_ACTION,
658 						   actions,
659 						   "unsupported RSS type"
660 						   " requested");
661 				return -rte_errno;
662 			}
663 			if (rss->key_len) {
664 				rss_key_len = rss->key_len;
665 				rss_key = rss->key;
666 			} else {
667 				rss_key_len = rss_hash_default_key_len;
668 				rss_key = rss_hash_default_key;
669 			}
670 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
671 				rte_flow_error_set(error, EINVAL,
672 						   RTE_FLOW_ERROR_TYPE_ACTION,
673 						   actions,
674 						   "RSS hash key must be"
675 						   " exactly 40 bytes long");
676 				return -rte_errno;
677 			}
678 			if (!rss->queue_num) {
679 				rte_flow_error_set(error, EINVAL,
680 						   RTE_FLOW_ERROR_TYPE_ACTION,
681 						   actions,
682 						   "no valid queues");
683 				return -rte_errno;
684 			}
685 			if (rss->queue_num > RTE_DIM(parser->queues)) {
686 				rte_flow_error_set(error, EINVAL,
687 						   RTE_FLOW_ERROR_TYPE_ACTION,
688 						   actions,
689 						   "too many queues for RSS"
690 						   " context");
691 				return -rte_errno;
692 			}
693 			for (n = 0; n < rss->queue_num; ++n) {
694 				if (rss->queue[n] >= priv->rxqs_n) {
695 					rte_flow_error_set(error, EINVAL,
696 						   RTE_FLOW_ERROR_TYPE_ACTION,
697 						   actions,
698 						   "queue id > number of"
699 						   " queues");
700 					return -rte_errno;
701 				}
702 			}
703 			parser->rss_conf = (struct rte_flow_action_rss){
704 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
705 				.types = rss->types,
706 				.key_len = rss_key_len,
707 				.queue_num = rss->queue_num,
708 				.key = memcpy(parser->rss_key, rss_key,
709 					      sizeof(*rss_key) * rss_key_len),
710 				.queue = memcpy(parser->queues, rss->queue,
711 						sizeof(*rss->queue) *
712 						rss->queue_num),
713 			};
714 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
715 			const struct rte_flow_action_mark *mark =
716 				(const struct rte_flow_action_mark *)
717 				actions->conf;
718 
719 			if (overlap & MARK)
720 				goto exit_action_overlap;
721 			overlap |= MARK;
722 			if (!mark) {
723 				rte_flow_error_set(error, EINVAL,
724 						   RTE_FLOW_ERROR_TYPE_ACTION,
725 						   actions,
726 						   "mark must be defined");
727 				return -rte_errno;
728 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
729 				rte_flow_error_set(error, ENOTSUP,
730 						   RTE_FLOW_ERROR_TYPE_ACTION,
731 						   actions,
732 						   "mark must be between 0"
733 						   " and 16777199");
734 				return -rte_errno;
735 			}
736 			parser->mark = 1;
737 			parser->mark_id = mark->id;
738 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
739 			if (overlap & MARK)
740 				goto exit_action_overlap;
741 			overlap |= MARK;
742 			parser->mark = 1;
743 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
744 			   priv->config.flow_counter_en) {
745 			if (overlap & COUNT)
746 				goto exit_action_overlap;
747 			overlap |= COUNT;
748 			parser->count = 1;
749 		} else {
750 			goto exit_action_not_supported;
751 		}
752 	}
753 	/* When fate is unknown, drop traffic. */
754 	if (!(overlap & FATE))
755 		parser->drop = 1;
756 	if (parser->drop && parser->mark)
757 		parser->mark = 0;
758 	if (!parser->rss_conf.queue_num && !parser->drop) {
759 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
760 				   NULL, "no valid action");
761 		return -rte_errno;
762 	}
763 	return 0;
764 exit_action_not_supported:
765 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
766 			   actions, "action not supported");
767 	return -rte_errno;
768 exit_action_overlap:
769 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
770 			   actions, "overlapping actions are not supported");
771 	return -rte_errno;
772 }
773 
774 /**
775  * Validate items.
776  *
777  * @param[in] items
778  *   Pattern specification (list terminated by the END pattern item).
779  * @param[out] error
780  *   Perform verbose error reporting if not NULL.
781  * @param[in, out] parser
782  *   Internal parser structure.
783  *
784  * @return
785  *   0 on success, a negative errno value otherwise and rte_errno is set.
786  */
787 static int
788 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
789 				 struct rte_flow_error *error,
790 				 struct mlx5_flow_parse *parser)
791 {
792 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
793 	unsigned int i;
794 	int ret = 0;
795 
796 	/* Initialise the offsets to start after verbs attribute. */
797 	for (i = 0; i != hash_rxq_init_n; ++i)
798 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
799 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
800 		const struct mlx5_flow_items *token = NULL;
801 		unsigned int n;
802 
803 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
804 			continue;
805 		for (i = 0;
806 		     cur_item->items &&
807 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
808 		     ++i) {
809 			if (cur_item->items[i] == items->type) {
810 				token = &mlx5_flow_items[items->type];
811 				break;
812 			}
813 		}
814 		if (!token) {
815 			ret = -ENOTSUP;
816 			goto exit_item_not_supported;
817 		}
818 		cur_item = token;
819 		ret = mlx5_flow_item_validate(items,
820 					      (const uint8_t *)cur_item->mask,
821 					      cur_item->mask_sz);
822 		if (ret)
823 			goto exit_item_not_supported;
824 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
825 			if (parser->inner) {
826 				rte_flow_error_set(error, ENOTSUP,
827 						   RTE_FLOW_ERROR_TYPE_ITEM,
828 						   items,
829 						   "cannot recognize multiple"
830 						   " VXLAN encapsulations");
831 				return -rte_errno;
832 			}
833 			parser->inner = IBV_FLOW_SPEC_INNER;
834 		}
835 		if (parser->drop) {
836 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
837 		} else {
838 			for (n = 0; n != hash_rxq_init_n; ++n)
839 				parser->queue[n].offset += cur_item->dst_sz;
840 		}
841 	}
842 	if (parser->drop) {
843 		parser->queue[HASH_RXQ_ETH].offset +=
844 			sizeof(struct ibv_flow_spec_action_drop);
845 	}
846 	if (parser->mark) {
847 		for (i = 0; i != hash_rxq_init_n; ++i)
848 			parser->queue[i].offset +=
849 				sizeof(struct ibv_flow_spec_action_tag);
850 	}
851 	if (parser->count) {
852 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
853 
854 		for (i = 0; i != hash_rxq_init_n; ++i)
855 			parser->queue[i].offset += size;
856 	}
857 	return 0;
858 exit_item_not_supported:
859 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
860 				  items, "item not supported");
861 }
862 
863 /**
864  * Allocate memory space to store verbs flow attributes.
865  *
866  * @param[in] size
867  *   Amount of byte to allocate.
868  * @param[out] error
869  *   Perform verbose error reporting if not NULL.
870  *
871  * @return
872  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
873  */
874 static struct ibv_flow_attr *
875 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
876 {
877 	struct ibv_flow_attr *ibv_attr;
878 
879 	ibv_attr = rte_calloc(__func__, 1, size, 0);
880 	if (!ibv_attr) {
881 		rte_flow_error_set(error, ENOMEM,
882 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
883 				   NULL,
884 				   "cannot allocate verbs spec attributes");
885 		return NULL;
886 	}
887 	return ibv_attr;
888 }
889 
890 /**
891  * Make inner packet matching with an higher priority from the non Inner
892  * matching.
893  *
894  * @param[in, out] parser
895  *   Internal parser structure.
896  * @param attr
897  *   User flow attribute.
898  */
899 static void
900 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
901 			  const struct rte_flow_attr *attr)
902 {
903 	unsigned int i;
904 
905 	if (parser->drop) {
906 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
907 			attr->priority +
908 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
909 		return;
910 	}
911 	for (i = 0; i != hash_rxq_init_n; ++i) {
912 		if (parser->queue[i].ibv_attr) {
913 			parser->queue[i].ibv_attr->priority =
914 				attr->priority +
915 				hash_rxq_init[i].flow_priority -
916 				(parser->inner ? 1 : 0);
917 		}
918 	}
919 }
920 
921 /**
922  * Finalise verbs flow attributes.
923  *
924  * @param[in, out] parser
925  *   Internal parser structure.
926  */
927 static void
928 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
929 {
930 	const unsigned int ipv4 =
931 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
932 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
933 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
934 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
935 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
936 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
937 	unsigned int i;
938 
939 	/* Remove any other flow not matching the pattern. */
940 	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
941 		for (i = 0; i != hash_rxq_init_n; ++i) {
942 			if (i == HASH_RXQ_ETH)
943 				continue;
944 			rte_free(parser->queue[i].ibv_attr);
945 			parser->queue[i].ibv_attr = NULL;
946 		}
947 		return;
948 	}
949 	if (parser->layer == HASH_RXQ_ETH) {
950 		goto fill;
951 	} else {
952 		/*
953 		 * This layer becomes useless as the pattern define under
954 		 * layers.
955 		 */
956 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
957 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
958 	}
959 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
960 	for (i = ohmin; i != (ohmax + 1); ++i) {
961 		if (!parser->queue[i].ibv_attr)
962 			continue;
963 		rte_free(parser->queue[i].ibv_attr);
964 		parser->queue[i].ibv_attr = NULL;
965 	}
966 	/* Remove impossible flow according to the RSS configuration. */
967 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
968 	    parser->rss_conf.types) {
969 		/* Remove any other flow. */
970 		for (i = hmin; i != (hmax + 1); ++i) {
971 			if ((i == parser->layer) ||
972 			     (!parser->queue[i].ibv_attr))
973 				continue;
974 			rte_free(parser->queue[i].ibv_attr);
975 			parser->queue[i].ibv_attr = NULL;
976 		}
977 	} else  if (!parser->queue[ip].ibv_attr) {
978 		/* no RSS possible with the current configuration. */
979 		parser->rss_conf.queue_num = 1;
980 		return;
981 	}
982 fill:
983 	/*
984 	 * Fill missing layers in verbs specifications, or compute the correct
985 	 * offset to allocate the memory space for the attributes and
986 	 * specifications.
987 	 */
988 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
989 		union {
990 			struct ibv_flow_spec_ipv4_ext ipv4;
991 			struct ibv_flow_spec_ipv6 ipv6;
992 			struct ibv_flow_spec_tcp_udp udp_tcp;
993 		} specs;
994 		void *dst;
995 		uint16_t size;
996 
997 		if (i == parser->layer)
998 			continue;
999 		if (parser->layer == HASH_RXQ_ETH) {
1000 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1001 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1002 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1003 					.type = IBV_FLOW_SPEC_IPV4_EXT,
1004 					.size = size,
1005 				};
1006 			} else {
1007 				size = sizeof(struct ibv_flow_spec_ipv6);
1008 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1009 					.type = IBV_FLOW_SPEC_IPV6,
1010 					.size = size,
1011 				};
1012 			}
1013 			if (parser->queue[i].ibv_attr) {
1014 				dst = (void *)((uintptr_t)
1015 					       parser->queue[i].ibv_attr +
1016 					       parser->queue[i].offset);
1017 				memcpy(dst, &specs, size);
1018 				++parser->queue[i].ibv_attr->num_of_specs;
1019 			}
1020 			parser->queue[i].offset += size;
1021 		}
1022 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1023 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1024 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1025 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1026 				.type = ((i == HASH_RXQ_UDPV4 ||
1027 					  i == HASH_RXQ_UDPV6) ?
1028 					 IBV_FLOW_SPEC_UDP :
1029 					 IBV_FLOW_SPEC_TCP),
1030 				.size = size,
1031 			};
1032 			if (parser->queue[i].ibv_attr) {
1033 				dst = (void *)((uintptr_t)
1034 					       parser->queue[i].ibv_attr +
1035 					       parser->queue[i].offset);
1036 				memcpy(dst, &specs, size);
1037 				++parser->queue[i].ibv_attr->num_of_specs;
1038 			}
1039 			parser->queue[i].offset += size;
1040 		}
1041 	}
1042 }
1043 
1044 /**
1045  * Validate and convert a flow supported by the NIC.
1046  *
1047  * @param dev
1048  *   Pointer to Ethernet device.
1049  * @param[in] attr
1050  *   Flow rule attributes.
1051  * @param[in] pattern
1052  *   Pattern specification (list terminated by the END pattern item).
1053  * @param[in] actions
1054  *   Associated actions (list terminated by the END action).
1055  * @param[out] error
1056  *   Perform verbose error reporting if not NULL.
1057  * @param[in, out] parser
1058  *   Internal parser structure.
1059  *
1060  * @return
1061  *   0 on success, a negative errno value otherwise and rte_errno is set.
1062  */
1063 static int
1064 mlx5_flow_convert(struct rte_eth_dev *dev,
1065 		  const struct rte_flow_attr *attr,
1066 		  const struct rte_flow_item items[],
1067 		  const struct rte_flow_action actions[],
1068 		  struct rte_flow_error *error,
1069 		  struct mlx5_flow_parse *parser)
1070 {
1071 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1072 	unsigned int i;
1073 	int ret;
1074 
1075 	/* First step. Validate the attributes, items and actions. */
1076 	*parser = (struct mlx5_flow_parse){
1077 		.create = parser->create,
1078 		.layer = HASH_RXQ_ETH,
1079 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1080 	};
1081 	ret = mlx5_flow_convert_attributes(attr, error);
1082 	if (ret)
1083 		return ret;
1084 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1085 	if (ret)
1086 		return ret;
1087 	ret = mlx5_flow_convert_items_validate(items, error, parser);
1088 	if (ret)
1089 		return ret;
1090 	mlx5_flow_convert_finalise(parser);
1091 	/*
1092 	 * Second step.
1093 	 * Allocate the memory space to store verbs specifications.
1094 	 */
1095 	if (parser->drop) {
1096 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1097 
1098 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1099 			mlx5_flow_convert_allocate(offset, error);
1100 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1101 			goto exit_enomem;
1102 		parser->queue[HASH_RXQ_ETH].offset =
1103 			sizeof(struct ibv_flow_attr);
1104 	} else {
1105 		for (i = 0; i != hash_rxq_init_n; ++i) {
1106 			unsigned int offset;
1107 
1108 			if (!(parser->rss_conf.types &
1109 			      hash_rxq_init[i].dpdk_rss_hf) &&
1110 			    (i != HASH_RXQ_ETH))
1111 				continue;
1112 			offset = parser->queue[i].offset;
1113 			parser->queue[i].ibv_attr =
1114 				mlx5_flow_convert_allocate(offset, error);
1115 			if (!parser->queue[i].ibv_attr)
1116 				goto exit_enomem;
1117 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1118 		}
1119 	}
1120 	/* Third step. Conversion parse, fill the specifications. */
1121 	parser->inner = 0;
1122 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1123 		struct mlx5_flow_data data = {
1124 			.parser = parser,
1125 			.error = error,
1126 		};
1127 
1128 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1129 			continue;
1130 		cur_item = &mlx5_flow_items[items->type];
1131 		ret = cur_item->convert(items,
1132 					(cur_item->default_mask ?
1133 					 cur_item->default_mask :
1134 					 cur_item->mask),
1135 					 &data);
1136 		if (ret)
1137 			goto exit_free;
1138 	}
1139 	if (parser->mark)
1140 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1141 	if (parser->count && parser->create) {
1142 		mlx5_flow_create_count(dev, parser);
1143 		if (!parser->cs)
1144 			goto exit_count_error;
1145 	}
1146 	/*
1147 	 * Last step. Complete missing specification to reach the RSS
1148 	 * configuration.
1149 	 */
1150 	if (!parser->drop)
1151 		mlx5_flow_convert_finalise(parser);
1152 	mlx5_flow_update_priority(parser, attr);
1153 exit_free:
1154 	/* Only verification is expected, all resources should be released. */
1155 	if (!parser->create) {
1156 		for (i = 0; i != hash_rxq_init_n; ++i) {
1157 			if (parser->queue[i].ibv_attr) {
1158 				rte_free(parser->queue[i].ibv_attr);
1159 				parser->queue[i].ibv_attr = NULL;
1160 			}
1161 		}
1162 	}
1163 	return ret;
1164 exit_enomem:
1165 	for (i = 0; i != hash_rxq_init_n; ++i) {
1166 		if (parser->queue[i].ibv_attr) {
1167 			rte_free(parser->queue[i].ibv_attr);
1168 			parser->queue[i].ibv_attr = NULL;
1169 		}
1170 	}
1171 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1172 			   NULL, "cannot allocate verbs spec attributes");
1173 	return -rte_errno;
1174 exit_count_error:
1175 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1176 			   NULL, "cannot create counter");
1177 	return -rte_errno;
1178 }
1179 
1180 /**
1181  * Copy the specification created into the flow.
1182  *
1183  * @param parser
1184  *   Internal parser structure.
1185  * @param src
1186  *   Create specification.
1187  * @param size
1188  *   Size in bytes of the specification to copy.
1189  */
1190 static void
1191 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1192 		      unsigned int size)
1193 {
1194 	unsigned int i;
1195 	void *dst;
1196 
1197 	for (i = 0; i != hash_rxq_init_n; ++i) {
1198 		if (!parser->queue[i].ibv_attr)
1199 			continue;
1200 		/* Specification must be the same l3 type or none. */
1201 		if (parser->layer == HASH_RXQ_ETH ||
1202 		    (hash_rxq_init[parser->layer].ip_version ==
1203 		     hash_rxq_init[i].ip_version) ||
1204 		    (hash_rxq_init[i].ip_version == 0)) {
1205 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1206 					parser->queue[i].offset);
1207 			memcpy(dst, src, size);
1208 			++parser->queue[i].ibv_attr->num_of_specs;
1209 			parser->queue[i].offset += size;
1210 		}
1211 	}
1212 }
1213 
1214 /**
1215  * Convert Ethernet item to Verbs specification.
1216  *
1217  * @param item[in]
1218  *   Item specification.
1219  * @param default_mask[in]
1220  *   Default bit-masks to use when item->mask is not provided.
1221  * @param data[in, out]
1222  *   User structure.
1223  *
1224  * @return
1225  *   0 on success, a negative errno value otherwise and rte_errno is set.
1226  */
1227 static int
1228 mlx5_flow_create_eth(const struct rte_flow_item *item,
1229 		     const void *default_mask,
1230 		     struct mlx5_flow_data *data)
1231 {
1232 	const struct rte_flow_item_eth *spec = item->spec;
1233 	const struct rte_flow_item_eth *mask = item->mask;
1234 	struct mlx5_flow_parse *parser = data->parser;
1235 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1236 	struct ibv_flow_spec_eth eth = {
1237 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1238 		.size = eth_size,
1239 	};
1240 
1241 	/* Don't update layer for the inner pattern. */
1242 	if (!parser->inner)
1243 		parser->layer = HASH_RXQ_ETH;
1244 	if (spec) {
1245 		unsigned int i;
1246 
1247 		if (!mask)
1248 			mask = default_mask;
1249 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1250 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1251 		eth.val.ether_type = spec->type;
1252 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1253 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1254 		eth.mask.ether_type = mask->type;
1255 		/* Remove unwanted bits from values. */
1256 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1257 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1258 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1259 		}
1260 		eth.val.ether_type &= eth.mask.ether_type;
1261 	}
1262 	mlx5_flow_create_copy(parser, &eth, eth_size);
1263 	return 0;
1264 }
1265 
1266 /**
1267  * Convert VLAN item to Verbs specification.
1268  *
1269  * @param item[in]
1270  *   Item specification.
1271  * @param default_mask[in]
1272  *   Default bit-masks to use when item->mask is not provided.
1273  * @param data[in, out]
1274  *   User structure.
1275  *
1276  * @return
1277  *   0 on success, a negative errno value otherwise and rte_errno is set.
1278  */
1279 static int
1280 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1281 		      const void *default_mask,
1282 		      struct mlx5_flow_data *data)
1283 {
1284 	const struct rte_flow_item_vlan *spec = item->spec;
1285 	const struct rte_flow_item_vlan *mask = item->mask;
1286 	struct mlx5_flow_parse *parser = data->parser;
1287 	struct ibv_flow_spec_eth *eth;
1288 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1289 
1290 	if (spec) {
1291 		unsigned int i;
1292 		if (!mask)
1293 			mask = default_mask;
1294 
1295 		for (i = 0; i != hash_rxq_init_n; ++i) {
1296 			if (!parser->queue[i].ibv_attr)
1297 				continue;
1298 
1299 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1300 				       parser->queue[i].offset - eth_size);
1301 			eth->val.vlan_tag = spec->tci;
1302 			eth->mask.vlan_tag = mask->tci;
1303 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1304 			/*
1305 			 * From verbs perspective an empty VLAN is equivalent
1306 			 * to a packet without VLAN layer.
1307 			 */
1308 			if (!eth->mask.vlan_tag)
1309 				goto error;
1310 		}
1311 		return 0;
1312 	}
1313 error:
1314 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1315 				  item, "VLAN cannot be empty");
1316 }
1317 
1318 /**
1319  * Convert IPv4 item to Verbs specification.
1320  *
1321  * @param item[in]
1322  *   Item specification.
1323  * @param default_mask[in]
1324  *   Default bit-masks to use when item->mask is not provided.
1325  * @param data[in, out]
1326  *   User structure.
1327  *
1328  * @return
1329  *   0 on success, a negative errno value otherwise and rte_errno is set.
1330  */
1331 static int
1332 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1333 		      const void *default_mask,
1334 		      struct mlx5_flow_data *data)
1335 {
1336 	const struct rte_flow_item_ipv4 *spec = item->spec;
1337 	const struct rte_flow_item_ipv4 *mask = item->mask;
1338 	struct mlx5_flow_parse *parser = data->parser;
1339 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1340 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1341 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1342 		.size = ipv4_size,
1343 	};
1344 
1345 	/* Don't update layer for the inner pattern. */
1346 	if (!parser->inner)
1347 		parser->layer = HASH_RXQ_IPV4;
1348 	if (spec) {
1349 		if (!mask)
1350 			mask = default_mask;
1351 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1352 			.src_ip = spec->hdr.src_addr,
1353 			.dst_ip = spec->hdr.dst_addr,
1354 			.proto = spec->hdr.next_proto_id,
1355 			.tos = spec->hdr.type_of_service,
1356 		};
1357 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1358 			.src_ip = mask->hdr.src_addr,
1359 			.dst_ip = mask->hdr.dst_addr,
1360 			.proto = mask->hdr.next_proto_id,
1361 			.tos = mask->hdr.type_of_service,
1362 		};
1363 		/* Remove unwanted bits from values. */
1364 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1365 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1366 		ipv4.val.proto &= ipv4.mask.proto;
1367 		ipv4.val.tos &= ipv4.mask.tos;
1368 	}
1369 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1370 	return 0;
1371 }
1372 
1373 /**
1374  * Convert IPv6 item to Verbs specification.
1375  *
1376  * @param item[in]
1377  *   Item specification.
1378  * @param default_mask[in]
1379  *   Default bit-masks to use when item->mask is not provided.
1380  * @param data[in, out]
1381  *   User structure.
1382  *
1383  * @return
1384  *   0 on success, a negative errno value otherwise and rte_errno is set.
1385  */
1386 static int
1387 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1388 		      const void *default_mask,
1389 		      struct mlx5_flow_data *data)
1390 {
1391 	const struct rte_flow_item_ipv6 *spec = item->spec;
1392 	const struct rte_flow_item_ipv6 *mask = item->mask;
1393 	struct mlx5_flow_parse *parser = data->parser;
1394 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1395 	struct ibv_flow_spec_ipv6 ipv6 = {
1396 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1397 		.size = ipv6_size,
1398 	};
1399 
1400 	/* Don't update layer for the inner pattern. */
1401 	if (!parser->inner)
1402 		parser->layer = HASH_RXQ_IPV6;
1403 	if (spec) {
1404 		unsigned int i;
1405 		uint32_t vtc_flow_val;
1406 		uint32_t vtc_flow_mask;
1407 
1408 		if (!mask)
1409 			mask = default_mask;
1410 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1411 		       RTE_DIM(ipv6.val.src_ip));
1412 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1413 		       RTE_DIM(ipv6.val.dst_ip));
1414 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1415 		       RTE_DIM(ipv6.mask.src_ip));
1416 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1417 		       RTE_DIM(ipv6.mask.dst_ip));
1418 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1419 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1420 		ipv6.val.flow_label =
1421 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1422 					 IPV6_HDR_FL_SHIFT);
1423 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1424 					 IPV6_HDR_TC_SHIFT;
1425 		ipv6.val.next_hdr = spec->hdr.proto;
1426 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1427 		ipv6.mask.flow_label =
1428 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1429 					 IPV6_HDR_FL_SHIFT);
1430 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1431 					  IPV6_HDR_TC_SHIFT;
1432 		ipv6.mask.next_hdr = mask->hdr.proto;
1433 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1434 		/* Remove unwanted bits from values. */
1435 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1436 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1437 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1438 		}
1439 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1440 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1441 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1442 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1443 	}
1444 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1445 	return 0;
1446 }
1447 
1448 /**
1449  * Convert UDP item to Verbs specification.
1450  *
1451  * @param item[in]
1452  *   Item specification.
1453  * @param default_mask[in]
1454  *   Default bit-masks to use when item->mask is not provided.
1455  * @param data[in, out]
1456  *   User structure.
1457  *
1458  * @return
1459  *   0 on success, a negative errno value otherwise and rte_errno is set.
1460  */
1461 static int
1462 mlx5_flow_create_udp(const struct rte_flow_item *item,
1463 		     const void *default_mask,
1464 		     struct mlx5_flow_data *data)
1465 {
1466 	const struct rte_flow_item_udp *spec = item->spec;
1467 	const struct rte_flow_item_udp *mask = item->mask;
1468 	struct mlx5_flow_parse *parser = data->parser;
1469 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1470 	struct ibv_flow_spec_tcp_udp udp = {
1471 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1472 		.size = udp_size,
1473 	};
1474 
1475 	/* Don't update layer for the inner pattern. */
1476 	if (!parser->inner) {
1477 		if (parser->layer == HASH_RXQ_IPV4)
1478 			parser->layer = HASH_RXQ_UDPV4;
1479 		else
1480 			parser->layer = HASH_RXQ_UDPV6;
1481 	}
1482 	if (spec) {
1483 		if (!mask)
1484 			mask = default_mask;
1485 		udp.val.dst_port = spec->hdr.dst_port;
1486 		udp.val.src_port = spec->hdr.src_port;
1487 		udp.mask.dst_port = mask->hdr.dst_port;
1488 		udp.mask.src_port = mask->hdr.src_port;
1489 		/* Remove unwanted bits from values. */
1490 		udp.val.src_port &= udp.mask.src_port;
1491 		udp.val.dst_port &= udp.mask.dst_port;
1492 	}
1493 	mlx5_flow_create_copy(parser, &udp, udp_size);
1494 	return 0;
1495 }
1496 
1497 /**
1498  * Convert TCP item to Verbs specification.
1499  *
1500  * @param item[in]
1501  *   Item specification.
1502  * @param default_mask[in]
1503  *   Default bit-masks to use when item->mask is not provided.
1504  * @param data[in, out]
1505  *   User structure.
1506  *
1507  * @return
1508  *   0 on success, a negative errno value otherwise and rte_errno is set.
1509  */
1510 static int
1511 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1512 		     const void *default_mask,
1513 		     struct mlx5_flow_data *data)
1514 {
1515 	const struct rte_flow_item_tcp *spec = item->spec;
1516 	const struct rte_flow_item_tcp *mask = item->mask;
1517 	struct mlx5_flow_parse *parser = data->parser;
1518 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1519 	struct ibv_flow_spec_tcp_udp tcp = {
1520 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1521 		.size = tcp_size,
1522 	};
1523 
1524 	/* Don't update layer for the inner pattern. */
1525 	if (!parser->inner) {
1526 		if (parser->layer == HASH_RXQ_IPV4)
1527 			parser->layer = HASH_RXQ_TCPV4;
1528 		else
1529 			parser->layer = HASH_RXQ_TCPV6;
1530 	}
1531 	if (spec) {
1532 		if (!mask)
1533 			mask = default_mask;
1534 		tcp.val.dst_port = spec->hdr.dst_port;
1535 		tcp.val.src_port = spec->hdr.src_port;
1536 		tcp.mask.dst_port = mask->hdr.dst_port;
1537 		tcp.mask.src_port = mask->hdr.src_port;
1538 		/* Remove unwanted bits from values. */
1539 		tcp.val.src_port &= tcp.mask.src_port;
1540 		tcp.val.dst_port &= tcp.mask.dst_port;
1541 	}
1542 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1543 	return 0;
1544 }
1545 
1546 /**
1547  * Convert VXLAN item to Verbs specification.
1548  *
1549  * @param item[in]
1550  *   Item specification.
1551  * @param default_mask[in]
1552  *   Default bit-masks to use when item->mask is not provided.
1553  * @param data[in, out]
1554  *   User structure.
1555  *
1556  * @return
1557  *   0 on success, a negative errno value otherwise and rte_errno is set.
1558  */
1559 static int
1560 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1561 		       const void *default_mask,
1562 		       struct mlx5_flow_data *data)
1563 {
1564 	const struct rte_flow_item_vxlan *spec = item->spec;
1565 	const struct rte_flow_item_vxlan *mask = item->mask;
1566 	struct mlx5_flow_parse *parser = data->parser;
1567 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1568 	struct ibv_flow_spec_tunnel vxlan = {
1569 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1570 		.size = size,
1571 	};
1572 	union vni {
1573 		uint32_t vlan_id;
1574 		uint8_t vni[4];
1575 	} id;
1576 
1577 	id.vni[0] = 0;
1578 	parser->inner = IBV_FLOW_SPEC_INNER;
1579 	if (spec) {
1580 		if (!mask)
1581 			mask = default_mask;
1582 		memcpy(&id.vni[1], spec->vni, 3);
1583 		vxlan.val.tunnel_id = id.vlan_id;
1584 		memcpy(&id.vni[1], mask->vni, 3);
1585 		vxlan.mask.tunnel_id = id.vlan_id;
1586 		/* Remove unwanted bits from values. */
1587 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1588 	}
1589 	/*
1590 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1591 	 * layer is defined in the Verbs specification it is interpreted as
1592 	 * wildcard and all packets will match this rule, if it follows a full
1593 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1594 	 * before will also match this rule.
1595 	 * To avoid such situation, VNI 0 is currently refused.
1596 	 */
1597 	if (!vxlan.val.tunnel_id)
1598 		return rte_flow_error_set(data->error, EINVAL,
1599 					  RTE_FLOW_ERROR_TYPE_ITEM,
1600 					  item,
1601 					  "VxLAN vni cannot be 0");
1602 	mlx5_flow_create_copy(parser, &vxlan, size);
1603 	return 0;
1604 }
1605 
1606 /**
1607  * Convert mark/flag action to Verbs specification.
1608  *
1609  * @param parser
1610  *   Internal parser structure.
1611  * @param mark_id
1612  *   Mark identifier.
1613  *
1614  * @return
1615  *   0 on success, a negative errno value otherwise and rte_errno is set.
1616  */
1617 static int
1618 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1619 {
1620 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1621 	struct ibv_flow_spec_action_tag tag = {
1622 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1623 		.size = size,
1624 		.tag_id = mlx5_flow_mark_set(mark_id),
1625 	};
1626 
1627 	assert(parser->mark);
1628 	mlx5_flow_create_copy(parser, &tag, size);
1629 	return 0;
1630 }
1631 
1632 /**
1633  * Convert count action to Verbs specification.
1634  *
1635  * @param dev
1636  *   Pointer to Ethernet device.
1637  * @param parser
1638  *   Pointer to MLX5 flow parser structure.
1639  *
1640  * @return
1641  *   0 on success, a negative errno value otherwise and rte_errno is set.
1642  */
1643 static int
1644 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1645 		       struct mlx5_flow_parse *parser __rte_unused)
1646 {
1647 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1648 	struct priv *priv = dev->data->dev_private;
1649 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1650 	struct ibv_counter_set_init_attr init_attr = {0};
1651 	struct ibv_flow_spec_counter_action counter = {
1652 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1653 		.size = size,
1654 		.counter_set_handle = 0,
1655 	};
1656 
1657 	init_attr.counter_set_id = 0;
1658 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1659 	if (!parser->cs) {
1660 		rte_errno = EINVAL;
1661 		return -rte_errno;
1662 	}
1663 	counter.counter_set_handle = parser->cs->handle;
1664 	mlx5_flow_create_copy(parser, &counter, size);
1665 #endif
1666 	return 0;
1667 }
1668 
1669 /**
1670  * Complete flow rule creation with a drop queue.
1671  *
1672  * @param dev
1673  *   Pointer to Ethernet device.
1674  * @param parser
1675  *   Internal parser structure.
1676  * @param flow
1677  *   Pointer to the rte_flow.
1678  * @param[out] error
1679  *   Perform verbose error reporting if not NULL.
1680  *
1681  * @return
1682  *   0 on success, a negative errno value otherwise and rte_errno is set.
1683  */
1684 static int
1685 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1686 				   struct mlx5_flow_parse *parser,
1687 				   struct rte_flow *flow,
1688 				   struct rte_flow_error *error)
1689 {
1690 	struct priv *priv = dev->data->dev_private;
1691 	struct ibv_flow_spec_action_drop *drop;
1692 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1693 
1694 	assert(priv->pd);
1695 	assert(priv->ctx);
1696 	flow->drop = 1;
1697 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1698 			parser->queue[HASH_RXQ_ETH].offset);
1699 	*drop = (struct ibv_flow_spec_action_drop){
1700 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1701 			.size = size,
1702 	};
1703 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1704 	parser->queue[HASH_RXQ_ETH].offset += size;
1705 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1706 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1707 	if (parser->count)
1708 		flow->cs = parser->cs;
1709 	if (!priv->dev->data->dev_started)
1710 		return 0;
1711 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1712 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1713 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1714 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
1715 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1716 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1717 				   NULL, "flow rule creation failure");
1718 		goto error;
1719 	}
1720 	return 0;
1721 error:
1722 	assert(flow);
1723 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1724 		claim_zero(mlx5_glue->destroy_flow
1725 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1726 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1727 	}
1728 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1729 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1730 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1731 	}
1732 	if (flow->cs) {
1733 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1734 		flow->cs = NULL;
1735 		parser->cs = NULL;
1736 	}
1737 	return -rte_errno;
1738 }
1739 
1740 /**
1741  * Create hash Rx queues when RSS is enabled.
1742  *
1743  * @param dev
1744  *   Pointer to Ethernet device.
1745  * @param parser
1746  *   Internal parser structure.
1747  * @param flow
1748  *   Pointer to the rte_flow.
1749  * @param[out] error
1750  *   Perform verbose error reporting if not NULL.
1751  *
1752  * @return
1753  *   0 on success, a negative errno value otherwise and rte_errno is set.
1754  */
1755 static int
1756 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1757 				  struct mlx5_flow_parse *parser,
1758 				  struct rte_flow *flow,
1759 				  struct rte_flow_error *error)
1760 {
1761 	struct priv *priv = dev->data->dev_private;
1762 	unsigned int i;
1763 
1764 	for (i = 0; i != hash_rxq_init_n; ++i) {
1765 		uint64_t hash_fields;
1766 
1767 		if (!parser->queue[i].ibv_attr)
1768 			continue;
1769 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1770 		parser->queue[i].ibv_attr = NULL;
1771 		hash_fields = hash_rxq_init[i].hash_fields;
1772 		if (!priv->dev->data->dev_started)
1773 			continue;
1774 		flow->frxq[i].hrxq =
1775 			mlx5_hrxq_get(dev,
1776 				      parser->rss_conf.key,
1777 				      parser->rss_conf.key_len,
1778 				      hash_fields,
1779 				      parser->rss_conf.queue,
1780 				      parser->rss_conf.queue_num);
1781 		if (flow->frxq[i].hrxq)
1782 			continue;
1783 		flow->frxq[i].hrxq =
1784 			mlx5_hrxq_new(dev,
1785 				      parser->rss_conf.key,
1786 				      parser->rss_conf.key_len,
1787 				      hash_fields,
1788 				      parser->rss_conf.queue,
1789 				      parser->rss_conf.queue_num);
1790 		if (!flow->frxq[i].hrxq) {
1791 			return rte_flow_error_set(error, ENOMEM,
1792 						  RTE_FLOW_ERROR_TYPE_HANDLE,
1793 						  NULL,
1794 						  "cannot create hash rxq");
1795 		}
1796 	}
1797 	return 0;
1798 }
1799 
1800 /**
1801  * Complete flow rule creation.
1802  *
1803  * @param dev
1804  *   Pointer to Ethernet device.
1805  * @param parser
1806  *   Internal parser structure.
1807  * @param flow
1808  *   Pointer to the rte_flow.
1809  * @param[out] error
1810  *   Perform verbose error reporting if not NULL.
1811  *
1812  * @return
1813  *   0 on success, a negative errno value otherwise and rte_errno is set.
1814  */
1815 static int
1816 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1817 			      struct mlx5_flow_parse *parser,
1818 			      struct rte_flow *flow,
1819 			      struct rte_flow_error *error)
1820 {
1821 	struct priv *priv = dev->data->dev_private;
1822 	int ret;
1823 	unsigned int i;
1824 	unsigned int flows_n = 0;
1825 
1826 	assert(priv->pd);
1827 	assert(priv->ctx);
1828 	assert(!parser->drop);
1829 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1830 	if (ret)
1831 		goto error;
1832 	if (parser->count)
1833 		flow->cs = parser->cs;
1834 	if (!priv->dev->data->dev_started)
1835 		return 0;
1836 	for (i = 0; i != hash_rxq_init_n; ++i) {
1837 		if (!flow->frxq[i].hrxq)
1838 			continue;
1839 		flow->frxq[i].ibv_flow =
1840 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1841 					       flow->frxq[i].ibv_attr);
1842 		if (!flow->frxq[i].ibv_flow) {
1843 			rte_flow_error_set(error, ENOMEM,
1844 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1845 					   NULL, "flow rule creation failure");
1846 			goto error;
1847 		}
1848 		++flows_n;
1849 		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1850 			dev->data->port_id,
1851 			(void *)flow, i,
1852 			(void *)flow->frxq[i].hrxq,
1853 			(void *)flow->frxq[i].ibv_flow);
1854 	}
1855 	if (!flows_n) {
1856 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1857 				   NULL, "internal error in flow creation");
1858 		goto error;
1859 	}
1860 	for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1861 		struct mlx5_rxq_data *q =
1862 			(*priv->rxqs)[parser->rss_conf.queue[i]];
1863 
1864 		q->mark |= parser->mark;
1865 	}
1866 	return 0;
1867 error:
1868 	ret = rte_errno; /* Save rte_errno before cleanup. */
1869 	assert(flow);
1870 	for (i = 0; i != hash_rxq_init_n; ++i) {
1871 		if (flow->frxq[i].ibv_flow) {
1872 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1873 
1874 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1875 		}
1876 		if (flow->frxq[i].hrxq)
1877 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1878 		if (flow->frxq[i].ibv_attr)
1879 			rte_free(flow->frxq[i].ibv_attr);
1880 	}
1881 	if (flow->cs) {
1882 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1883 		flow->cs = NULL;
1884 		parser->cs = NULL;
1885 	}
1886 	rte_errno = ret; /* Restore rte_errno. */
1887 	return -rte_errno;
1888 }
1889 
1890 /**
1891  * Convert a flow.
1892  *
1893  * @param dev
1894  *   Pointer to Ethernet device.
1895  * @param list
1896  *   Pointer to a TAILQ flow list.
1897  * @param[in] attr
1898  *   Flow rule attributes.
1899  * @param[in] pattern
1900  *   Pattern specification (list terminated by the END pattern item).
1901  * @param[in] actions
1902  *   Associated actions (list terminated by the END action).
1903  * @param[out] error
1904  *   Perform verbose error reporting if not NULL.
1905  *
1906  * @return
1907  *   A flow on success, NULL otherwise and rte_errno is set.
1908  */
1909 static struct rte_flow *
1910 mlx5_flow_list_create(struct rte_eth_dev *dev,
1911 		      struct mlx5_flows *list,
1912 		      const struct rte_flow_attr *attr,
1913 		      const struct rte_flow_item items[],
1914 		      const struct rte_flow_action actions[],
1915 		      struct rte_flow_error *error)
1916 {
1917 	struct mlx5_flow_parse parser = { .create = 1, };
1918 	struct rte_flow *flow = NULL;
1919 	unsigned int i;
1920 	int ret;
1921 
1922 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1923 	if (ret)
1924 		goto exit;
1925 	flow = rte_calloc(__func__, 1,
1926 			  sizeof(*flow) +
1927 			  parser.rss_conf.queue_num * sizeof(uint16_t),
1928 			  0);
1929 	if (!flow) {
1930 		rte_flow_error_set(error, ENOMEM,
1931 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1932 				   NULL,
1933 				   "cannot allocate flow memory");
1934 		return NULL;
1935 	}
1936 	/* Copy configuration. */
1937 	flow->queues = (uint16_t (*)[])(flow + 1);
1938 	flow->rss_conf = (struct rte_flow_action_rss){
1939 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1940 		.types = parser.rss_conf.types,
1941 		.key_len = parser.rss_conf.key_len,
1942 		.queue_num = parser.rss_conf.queue_num,
1943 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
1944 			      sizeof(*parser.rss_conf.key) *
1945 			      parser.rss_conf.key_len),
1946 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
1947 				sizeof(*parser.rss_conf.queue) *
1948 				parser.rss_conf.queue_num),
1949 	};
1950 	flow->mark = parser.mark;
1951 	/* finalise the flow. */
1952 	if (parser.drop)
1953 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1954 							 error);
1955 	else
1956 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1957 	if (ret)
1958 		goto exit;
1959 	TAILQ_INSERT_TAIL(list, flow, next);
1960 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1961 		(void *)flow);
1962 	return flow;
1963 exit:
1964 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1965 		error->message);
1966 	for (i = 0; i != hash_rxq_init_n; ++i) {
1967 		if (parser.queue[i].ibv_attr)
1968 			rte_free(parser.queue[i].ibv_attr);
1969 	}
1970 	rte_free(flow);
1971 	return NULL;
1972 }
1973 
1974 /**
1975  * Validate a flow supported by the NIC.
1976  *
1977  * @see rte_flow_validate()
1978  * @see rte_flow_ops
1979  */
1980 int
1981 mlx5_flow_validate(struct rte_eth_dev *dev,
1982 		   const struct rte_flow_attr *attr,
1983 		   const struct rte_flow_item items[],
1984 		   const struct rte_flow_action actions[],
1985 		   struct rte_flow_error *error)
1986 {
1987 	struct mlx5_flow_parse parser = { .create = 0, };
1988 
1989 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1990 }
1991 
1992 /**
1993  * Create a flow.
1994  *
1995  * @see rte_flow_create()
1996  * @see rte_flow_ops
1997  */
1998 struct rte_flow *
1999 mlx5_flow_create(struct rte_eth_dev *dev,
2000 		 const struct rte_flow_attr *attr,
2001 		 const struct rte_flow_item items[],
2002 		 const struct rte_flow_action actions[],
2003 		 struct rte_flow_error *error)
2004 {
2005 	struct priv *priv = dev->data->dev_private;
2006 
2007 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2008 				     error);
2009 }
2010 
2011 /**
2012  * Destroy a flow in a list.
2013  *
2014  * @param dev
2015  *   Pointer to Ethernet device.
2016  * @param list
2017  *   Pointer to a TAILQ flow list.
2018  * @param[in] flow
2019  *   Flow to destroy.
2020  */
2021 static void
2022 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2023 		       struct rte_flow *flow)
2024 {
2025 	struct priv *priv = dev->data->dev_private;
2026 	unsigned int i;
2027 
2028 	if (flow->drop || !flow->mark)
2029 		goto free;
2030 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2031 		struct rte_flow *tmp;
2032 		int mark = 0;
2033 
2034 		/*
2035 		 * To remove the mark from the queue, the queue must not be
2036 		 * present in any other marked flow (RSS or not).
2037 		 */
2038 		TAILQ_FOREACH(tmp, list, next) {
2039 			unsigned int j;
2040 			uint16_t *tqs = NULL;
2041 			uint16_t tq_n = 0;
2042 
2043 			if (!tmp->mark)
2044 				continue;
2045 			for (j = 0; j != hash_rxq_init_n; ++j) {
2046 				if (!tmp->frxq[j].hrxq)
2047 					continue;
2048 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2049 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2050 			}
2051 			if (!tq_n)
2052 				continue;
2053 			for (j = 0; (j != tq_n) && !mark; j++)
2054 				if (tqs[j] == (*flow->queues)[i])
2055 					mark = 1;
2056 		}
2057 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2058 	}
2059 free:
2060 	if (flow->drop) {
2061 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2062 			claim_zero(mlx5_glue->destroy_flow
2063 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2064 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2065 	} else {
2066 		for (i = 0; i != hash_rxq_init_n; ++i) {
2067 			struct mlx5_flow *frxq = &flow->frxq[i];
2068 
2069 			if (frxq->ibv_flow)
2070 				claim_zero(mlx5_glue->destroy_flow
2071 					   (frxq->ibv_flow));
2072 			if (frxq->hrxq)
2073 				mlx5_hrxq_release(dev, frxq->hrxq);
2074 			if (frxq->ibv_attr)
2075 				rte_free(frxq->ibv_attr);
2076 		}
2077 	}
2078 	if (flow->cs) {
2079 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2080 		flow->cs = NULL;
2081 	}
2082 	TAILQ_REMOVE(list, flow, next);
2083 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2084 		(void *)flow);
2085 	rte_free(flow);
2086 }
2087 
2088 /**
2089  * Destroy all flows.
2090  *
2091  * @param dev
2092  *   Pointer to Ethernet device.
2093  * @param list
2094  *   Pointer to a TAILQ flow list.
2095  */
2096 void
2097 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2098 {
2099 	while (!TAILQ_EMPTY(list)) {
2100 		struct rte_flow *flow;
2101 
2102 		flow = TAILQ_FIRST(list);
2103 		mlx5_flow_list_destroy(dev, list, flow);
2104 	}
2105 }
2106 
2107 /**
2108  * Create drop queue.
2109  *
2110  * @param dev
2111  *   Pointer to Ethernet device.
2112  *
2113  * @return
2114  *   0 on success, a negative errno value otherwise and rte_errno is set.
2115  */
2116 int
2117 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2118 {
2119 	struct priv *priv = dev->data->dev_private;
2120 	struct mlx5_hrxq_drop *fdq = NULL;
2121 
2122 	assert(priv->pd);
2123 	assert(priv->ctx);
2124 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2125 	if (!fdq) {
2126 		DRV_LOG(WARNING,
2127 			"port %u cannot allocate memory for drop queue",
2128 			dev->data->port_id);
2129 		rte_errno = ENOMEM;
2130 		return -rte_errno;
2131 	}
2132 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2133 	if (!fdq->cq) {
2134 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2135 			dev->data->port_id);
2136 		rte_errno = errno;
2137 		goto error;
2138 	}
2139 	fdq->wq = mlx5_glue->create_wq
2140 		(priv->ctx,
2141 		 &(struct ibv_wq_init_attr){
2142 			.wq_type = IBV_WQT_RQ,
2143 			.max_wr = 1,
2144 			.max_sge = 1,
2145 			.pd = priv->pd,
2146 			.cq = fdq->cq,
2147 		 });
2148 	if (!fdq->wq) {
2149 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2150 			dev->data->port_id);
2151 		rte_errno = errno;
2152 		goto error;
2153 	}
2154 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2155 		(priv->ctx,
2156 		 &(struct ibv_rwq_ind_table_init_attr){
2157 			.log_ind_tbl_size = 0,
2158 			.ind_tbl = &fdq->wq,
2159 			.comp_mask = 0,
2160 		 });
2161 	if (!fdq->ind_table) {
2162 		DRV_LOG(WARNING,
2163 			"port %u cannot allocate indirection table for drop"
2164 			" queue",
2165 			dev->data->port_id);
2166 		rte_errno = errno;
2167 		goto error;
2168 	}
2169 	fdq->qp = mlx5_glue->create_qp_ex
2170 		(priv->ctx,
2171 		 &(struct ibv_qp_init_attr_ex){
2172 			.qp_type = IBV_QPT_RAW_PACKET,
2173 			.comp_mask =
2174 				IBV_QP_INIT_ATTR_PD |
2175 				IBV_QP_INIT_ATTR_IND_TABLE |
2176 				IBV_QP_INIT_ATTR_RX_HASH,
2177 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2178 				.rx_hash_function =
2179 					IBV_RX_HASH_FUNC_TOEPLITZ,
2180 				.rx_hash_key_len = rss_hash_default_key_len,
2181 				.rx_hash_key = rss_hash_default_key,
2182 				.rx_hash_fields_mask = 0,
2183 				},
2184 			.rwq_ind_tbl = fdq->ind_table,
2185 			.pd = priv->pd
2186 		 });
2187 	if (!fdq->qp) {
2188 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2189 			dev->data->port_id);
2190 		rte_errno = errno;
2191 		goto error;
2192 	}
2193 	priv->flow_drop_queue = fdq;
2194 	return 0;
2195 error:
2196 	if (fdq->qp)
2197 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2198 	if (fdq->ind_table)
2199 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2200 	if (fdq->wq)
2201 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2202 	if (fdq->cq)
2203 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2204 	if (fdq)
2205 		rte_free(fdq);
2206 	priv->flow_drop_queue = NULL;
2207 	return -rte_errno;
2208 }
2209 
2210 /**
2211  * Delete drop queue.
2212  *
2213  * @param dev
2214  *   Pointer to Ethernet device.
2215  */
2216 void
2217 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2218 {
2219 	struct priv *priv = dev->data->dev_private;
2220 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2221 
2222 	if (!fdq)
2223 		return;
2224 	if (fdq->qp)
2225 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2226 	if (fdq->ind_table)
2227 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2228 	if (fdq->wq)
2229 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2230 	if (fdq->cq)
2231 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2232 	rte_free(fdq);
2233 	priv->flow_drop_queue = NULL;
2234 }
2235 
2236 /**
2237  * Remove all flows.
2238  *
2239  * @param dev
2240  *   Pointer to Ethernet device.
2241  * @param list
2242  *   Pointer to a TAILQ flow list.
2243  */
2244 void
2245 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2246 {
2247 	struct priv *priv = dev->data->dev_private;
2248 	struct rte_flow *flow;
2249 
2250 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2251 		unsigned int i;
2252 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2253 
2254 		if (flow->drop) {
2255 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2256 				continue;
2257 			claim_zero(mlx5_glue->destroy_flow
2258 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2259 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2260 			DRV_LOG(DEBUG, "port %u flow %p removed",
2261 				dev->data->port_id, (void *)flow);
2262 			/* Next flow. */
2263 			continue;
2264 		}
2265 		/* Verify the flow has not already been cleaned. */
2266 		for (i = 0; i != hash_rxq_init_n; ++i) {
2267 			if (!flow->frxq[i].ibv_flow)
2268 				continue;
2269 			/*
2270 			 * Indirection table may be necessary to remove the
2271 			 * flags in the Rx queues.
2272 			 * This helps to speed-up the process by avoiding
2273 			 * another loop.
2274 			 */
2275 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2276 			break;
2277 		}
2278 		if (i == hash_rxq_init_n)
2279 			return;
2280 		if (flow->mark) {
2281 			assert(ind_tbl);
2282 			for (i = 0; i != ind_tbl->queues_n; ++i)
2283 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2284 		}
2285 		for (i = 0; i != hash_rxq_init_n; ++i) {
2286 			if (!flow->frxq[i].ibv_flow)
2287 				continue;
2288 			claim_zero(mlx5_glue->destroy_flow
2289 				   (flow->frxq[i].ibv_flow));
2290 			flow->frxq[i].ibv_flow = NULL;
2291 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2292 			flow->frxq[i].hrxq = NULL;
2293 		}
2294 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2295 			(void *)flow);
2296 	}
2297 }
2298 
2299 /**
2300  * Add all flows.
2301  *
2302  * @param dev
2303  *   Pointer to Ethernet device.
2304  * @param list
2305  *   Pointer to a TAILQ flow list.
2306  *
2307  * @return
2308  *   0 on success, a negative errno value otherwise and rte_errno is set.
2309  */
2310 int
2311 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2312 {
2313 	struct priv *priv = dev->data->dev_private;
2314 	struct rte_flow *flow;
2315 
2316 	TAILQ_FOREACH(flow, list, next) {
2317 		unsigned int i;
2318 
2319 		if (flow->drop) {
2320 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2321 				mlx5_glue->create_flow
2322 				(priv->flow_drop_queue->qp,
2323 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2324 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2325 				DRV_LOG(DEBUG,
2326 					"port %u flow %p cannot be applied",
2327 					dev->data->port_id, (void *)flow);
2328 				rte_errno = EINVAL;
2329 				return -rte_errno;
2330 			}
2331 			DRV_LOG(DEBUG, "port %u flow %p applied",
2332 				dev->data->port_id, (void *)flow);
2333 			/* Next flow. */
2334 			continue;
2335 		}
2336 		for (i = 0; i != hash_rxq_init_n; ++i) {
2337 			if (!flow->frxq[i].ibv_attr)
2338 				continue;
2339 			flow->frxq[i].hrxq =
2340 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2341 					      flow->rss_conf.key_len,
2342 					      hash_rxq_init[i].hash_fields,
2343 					      flow->rss_conf.queue,
2344 					      flow->rss_conf.queue_num);
2345 			if (flow->frxq[i].hrxq)
2346 				goto flow_create;
2347 			flow->frxq[i].hrxq =
2348 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2349 					      flow->rss_conf.key_len,
2350 					      hash_rxq_init[i].hash_fields,
2351 					      flow->rss_conf.queue,
2352 					      flow->rss_conf.queue_num);
2353 			if (!flow->frxq[i].hrxq) {
2354 				DRV_LOG(DEBUG,
2355 					"port %u flow %p cannot be applied",
2356 					dev->data->port_id, (void *)flow);
2357 				rte_errno = EINVAL;
2358 				return -rte_errno;
2359 			}
2360 flow_create:
2361 			flow->frxq[i].ibv_flow =
2362 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2363 						       flow->frxq[i].ibv_attr);
2364 			if (!flow->frxq[i].ibv_flow) {
2365 				DRV_LOG(DEBUG,
2366 					"port %u flow %p cannot be applied",
2367 					dev->data->port_id, (void *)flow);
2368 				rte_errno = EINVAL;
2369 				return -rte_errno;
2370 			}
2371 			DRV_LOG(DEBUG, "port %u flow %p applied",
2372 				dev->data->port_id, (void *)flow);
2373 		}
2374 		if (!flow->mark)
2375 			continue;
2376 		for (i = 0; i != flow->rss_conf.queue_num; ++i)
2377 			(*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2378 	}
2379 	return 0;
2380 }
2381 
2382 /**
2383  * Verify the flow list is empty
2384  *
2385  * @param dev
2386  *  Pointer to Ethernet device.
2387  *
2388  * @return the number of flows not released.
2389  */
2390 int
2391 mlx5_flow_verify(struct rte_eth_dev *dev)
2392 {
2393 	struct priv *priv = dev->data->dev_private;
2394 	struct rte_flow *flow;
2395 	int ret = 0;
2396 
2397 	TAILQ_FOREACH(flow, &priv->flows, next) {
2398 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2399 			dev->data->port_id, (void *)flow);
2400 		++ret;
2401 	}
2402 	return ret;
2403 }
2404 
2405 /**
2406  * Enable a control flow configured from the control plane.
2407  *
2408  * @param dev
2409  *   Pointer to Ethernet device.
2410  * @param eth_spec
2411  *   An Ethernet flow spec to apply.
2412  * @param eth_mask
2413  *   An Ethernet flow mask to apply.
2414  * @param vlan_spec
2415  *   A VLAN flow spec to apply.
2416  * @param vlan_mask
2417  *   A VLAN flow mask to apply.
2418  *
2419  * @return
2420  *   0 on success, a negative errno value otherwise and rte_errno is set.
2421  */
2422 int
2423 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2424 		    struct rte_flow_item_eth *eth_spec,
2425 		    struct rte_flow_item_eth *eth_mask,
2426 		    struct rte_flow_item_vlan *vlan_spec,
2427 		    struct rte_flow_item_vlan *vlan_mask)
2428 {
2429 	struct priv *priv = dev->data->dev_private;
2430 	const struct rte_flow_attr attr = {
2431 		.ingress = 1,
2432 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2433 	};
2434 	struct rte_flow_item items[] = {
2435 		{
2436 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2437 			.spec = eth_spec,
2438 			.last = NULL,
2439 			.mask = eth_mask,
2440 		},
2441 		{
2442 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2443 				RTE_FLOW_ITEM_TYPE_END,
2444 			.spec = vlan_spec,
2445 			.last = NULL,
2446 			.mask = vlan_mask,
2447 		},
2448 		{
2449 			.type = RTE_FLOW_ITEM_TYPE_END,
2450 		},
2451 	};
2452 	uint16_t queue[priv->reta_idx_n];
2453 	struct rte_flow_action_rss action_rss = {
2454 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2455 		.types = priv->rss_conf.rss_hf,
2456 		.key_len = priv->rss_conf.rss_key_len,
2457 		.queue_num = priv->reta_idx_n,
2458 		.key = priv->rss_conf.rss_key,
2459 		.queue = queue,
2460 	};
2461 	struct rte_flow_action actions[] = {
2462 		{
2463 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2464 			.conf = &action_rss,
2465 		},
2466 		{
2467 			.type = RTE_FLOW_ACTION_TYPE_END,
2468 		},
2469 	};
2470 	struct rte_flow *flow;
2471 	struct rte_flow_error error;
2472 	unsigned int i;
2473 
2474 	if (!priv->reta_idx_n) {
2475 		rte_errno = EINVAL;
2476 		return -rte_errno;
2477 	}
2478 	for (i = 0; i != priv->reta_idx_n; ++i)
2479 		queue[i] = (*priv->reta_idx)[i];
2480 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2481 				     actions, &error);
2482 	if (!flow)
2483 		return -rte_errno;
2484 	return 0;
2485 }
2486 
2487 /**
2488  * Enable a flow control configured from the control plane.
2489  *
2490  * @param dev
2491  *   Pointer to Ethernet device.
2492  * @param eth_spec
2493  *   An Ethernet flow spec to apply.
2494  * @param eth_mask
2495  *   An Ethernet flow mask to apply.
2496  *
2497  * @return
2498  *   0 on success, a negative errno value otherwise and rte_errno is set.
2499  */
2500 int
2501 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2502 	       struct rte_flow_item_eth *eth_spec,
2503 	       struct rte_flow_item_eth *eth_mask)
2504 {
2505 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2506 }
2507 
2508 /**
2509  * Destroy a flow.
2510  *
2511  * @see rte_flow_destroy()
2512  * @see rte_flow_ops
2513  */
2514 int
2515 mlx5_flow_destroy(struct rte_eth_dev *dev,
2516 		  struct rte_flow *flow,
2517 		  struct rte_flow_error *error __rte_unused)
2518 {
2519 	struct priv *priv = dev->data->dev_private;
2520 
2521 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2522 	return 0;
2523 }
2524 
2525 /**
2526  * Destroy all flows.
2527  *
2528  * @see rte_flow_flush()
2529  * @see rte_flow_ops
2530  */
2531 int
2532 mlx5_flow_flush(struct rte_eth_dev *dev,
2533 		struct rte_flow_error *error __rte_unused)
2534 {
2535 	struct priv *priv = dev->data->dev_private;
2536 
2537 	mlx5_flow_list_flush(dev, &priv->flows);
2538 	return 0;
2539 }
2540 
2541 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2542 /**
2543  * Query flow counter.
2544  *
2545  * @param cs
2546  *   the counter set.
2547  * @param counter_value
2548  *   returned data from the counter.
2549  *
2550  * @return
2551  *   0 on success, a negative errno value otherwise and rte_errno is set.
2552  */
2553 static int
2554 mlx5_flow_query_count(struct ibv_counter_set *cs,
2555 		      struct mlx5_flow_counter_stats *counter_stats,
2556 		      struct rte_flow_query_count *query_count,
2557 		      struct rte_flow_error *error)
2558 {
2559 	uint64_t counters[2];
2560 	struct ibv_query_counter_set_attr query_cs_attr = {
2561 		.cs = cs,
2562 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2563 	};
2564 	struct ibv_counter_set_data query_out = {
2565 		.out = counters,
2566 		.outlen = 2 * sizeof(uint64_t),
2567 	};
2568 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2569 
2570 	if (err)
2571 		return rte_flow_error_set(error, err,
2572 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2573 					  NULL,
2574 					  "cannot read counter");
2575 	query_count->hits_set = 1;
2576 	query_count->bytes_set = 1;
2577 	query_count->hits = counters[0] - counter_stats->hits;
2578 	query_count->bytes = counters[1] - counter_stats->bytes;
2579 	if (query_count->reset) {
2580 		counter_stats->hits = counters[0];
2581 		counter_stats->bytes = counters[1];
2582 	}
2583 	return 0;
2584 }
2585 
2586 /**
2587  * Query a flows.
2588  *
2589  * @see rte_flow_query()
2590  * @see rte_flow_ops
2591  */
2592 int
2593 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2594 		struct rte_flow *flow,
2595 		enum rte_flow_action_type action __rte_unused,
2596 		void *data,
2597 		struct rte_flow_error *error)
2598 {
2599 	if (flow->cs) {
2600 		int ret;
2601 
2602 		ret = mlx5_flow_query_count(flow->cs,
2603 					    &flow->counter_stats,
2604 					    (struct rte_flow_query_count *)data,
2605 					    error);
2606 		if (ret)
2607 			return ret;
2608 	} else {
2609 		return rte_flow_error_set(error, EINVAL,
2610 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2611 					  NULL,
2612 					  "no counter found for flow");
2613 	}
2614 	return 0;
2615 }
2616 #endif
2617 
2618 /**
2619  * Isolated mode.
2620  *
2621  * @see rte_flow_isolate()
2622  * @see rte_flow_ops
2623  */
2624 int
2625 mlx5_flow_isolate(struct rte_eth_dev *dev,
2626 		  int enable,
2627 		  struct rte_flow_error *error)
2628 {
2629 	struct priv *priv = dev->data->dev_private;
2630 
2631 	if (dev->data->dev_started) {
2632 		rte_flow_error_set(error, EBUSY,
2633 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2634 				   NULL,
2635 				   "port must be stopped first");
2636 		return -rte_errno;
2637 	}
2638 	priv->isolated = !!enable;
2639 	if (enable)
2640 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2641 	else
2642 		priv->dev->dev_ops = &mlx5_dev_ops;
2643 	return 0;
2644 }
2645 
2646 /**
2647  * Convert a flow director filter to a generic flow.
2648  *
2649  * @param dev
2650  *   Pointer to Ethernet device.
2651  * @param fdir_filter
2652  *   Flow director filter to add.
2653  * @param attributes
2654  *   Generic flow parameters structure.
2655  *
2656  * @return
2657  *   0 on success, a negative errno value otherwise and rte_errno is set.
2658  */
2659 static int
2660 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2661 			 const struct rte_eth_fdir_filter *fdir_filter,
2662 			 struct mlx5_fdir *attributes)
2663 {
2664 	struct priv *priv = dev->data->dev_private;
2665 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2666 	const struct rte_eth_fdir_masks *mask =
2667 		&dev->data->dev_conf.fdir_conf.mask;
2668 
2669 	/* Validate queue number. */
2670 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2671 		DRV_LOG(ERR, "port %u invalid queue number %d",
2672 			dev->data->port_id, fdir_filter->action.rx_queue);
2673 		rte_errno = EINVAL;
2674 		return -rte_errno;
2675 	}
2676 	attributes->attr.ingress = 1;
2677 	attributes->items[0] = (struct rte_flow_item) {
2678 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2679 		.spec = &attributes->l2,
2680 		.mask = &attributes->l2_mask,
2681 	};
2682 	switch (fdir_filter->action.behavior) {
2683 	case RTE_ETH_FDIR_ACCEPT:
2684 		attributes->actions[0] = (struct rte_flow_action){
2685 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2686 			.conf = &attributes->queue,
2687 		};
2688 		break;
2689 	case RTE_ETH_FDIR_REJECT:
2690 		attributes->actions[0] = (struct rte_flow_action){
2691 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2692 		};
2693 		break;
2694 	default:
2695 		DRV_LOG(ERR, "port %u invalid behavior %d",
2696 			dev->data->port_id,
2697 			fdir_filter->action.behavior);
2698 		rte_errno = ENOTSUP;
2699 		return -rte_errno;
2700 	}
2701 	attributes->queue.index = fdir_filter->action.rx_queue;
2702 	/* Handle L3. */
2703 	switch (fdir_filter->input.flow_type) {
2704 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2705 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2706 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2707 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2708 			.src_addr = input->flow.ip4_flow.src_ip,
2709 			.dst_addr = input->flow.ip4_flow.dst_ip,
2710 			.time_to_live = input->flow.ip4_flow.ttl,
2711 			.type_of_service = input->flow.ip4_flow.tos,
2712 			.next_proto_id = input->flow.ip4_flow.proto,
2713 		};
2714 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2715 			.src_addr = mask->ipv4_mask.src_ip,
2716 			.dst_addr = mask->ipv4_mask.dst_ip,
2717 			.time_to_live = mask->ipv4_mask.ttl,
2718 			.type_of_service = mask->ipv4_mask.tos,
2719 			.next_proto_id = mask->ipv4_mask.proto,
2720 		};
2721 		attributes->items[1] = (struct rte_flow_item){
2722 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2723 			.spec = &attributes->l3,
2724 			.mask = &attributes->l3_mask,
2725 		};
2726 		break;
2727 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2728 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2729 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2730 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2731 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2732 			.proto = input->flow.ipv6_flow.proto,
2733 		};
2734 
2735 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2736 		       input->flow.ipv6_flow.src_ip,
2737 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2738 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2739 		       input->flow.ipv6_flow.dst_ip,
2740 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2741 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2742 		       mask->ipv6_mask.src_ip,
2743 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2744 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2745 		       mask->ipv6_mask.dst_ip,
2746 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2747 		attributes->items[1] = (struct rte_flow_item){
2748 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2749 			.spec = &attributes->l3,
2750 			.mask = &attributes->l3_mask,
2751 		};
2752 		break;
2753 	default:
2754 		DRV_LOG(ERR, "port %u invalid flow type%d",
2755 			dev->data->port_id, fdir_filter->input.flow_type);
2756 		rte_errno = ENOTSUP;
2757 		return -rte_errno;
2758 	}
2759 	/* Handle L4. */
2760 	switch (fdir_filter->input.flow_type) {
2761 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2762 		attributes->l4.udp.hdr = (struct udp_hdr){
2763 			.src_port = input->flow.udp4_flow.src_port,
2764 			.dst_port = input->flow.udp4_flow.dst_port,
2765 		};
2766 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2767 			.src_port = mask->src_port_mask,
2768 			.dst_port = mask->dst_port_mask,
2769 		};
2770 		attributes->items[2] = (struct rte_flow_item){
2771 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2772 			.spec = &attributes->l4,
2773 			.mask = &attributes->l4_mask,
2774 		};
2775 		break;
2776 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2777 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2778 			.src_port = input->flow.tcp4_flow.src_port,
2779 			.dst_port = input->flow.tcp4_flow.dst_port,
2780 		};
2781 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2782 			.src_port = mask->src_port_mask,
2783 			.dst_port = mask->dst_port_mask,
2784 		};
2785 		attributes->items[2] = (struct rte_flow_item){
2786 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2787 			.spec = &attributes->l4,
2788 			.mask = &attributes->l4_mask,
2789 		};
2790 		break;
2791 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2792 		attributes->l4.udp.hdr = (struct udp_hdr){
2793 			.src_port = input->flow.udp6_flow.src_port,
2794 			.dst_port = input->flow.udp6_flow.dst_port,
2795 		};
2796 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2797 			.src_port = mask->src_port_mask,
2798 			.dst_port = mask->dst_port_mask,
2799 		};
2800 		attributes->items[2] = (struct rte_flow_item){
2801 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2802 			.spec = &attributes->l4,
2803 			.mask = &attributes->l4_mask,
2804 		};
2805 		break;
2806 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2807 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2808 			.src_port = input->flow.tcp6_flow.src_port,
2809 			.dst_port = input->flow.tcp6_flow.dst_port,
2810 		};
2811 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2812 			.src_port = mask->src_port_mask,
2813 			.dst_port = mask->dst_port_mask,
2814 		};
2815 		attributes->items[2] = (struct rte_flow_item){
2816 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2817 			.spec = &attributes->l4,
2818 			.mask = &attributes->l4_mask,
2819 		};
2820 		break;
2821 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2822 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2823 		break;
2824 	default:
2825 		DRV_LOG(ERR, "port %u invalid flow type%d",
2826 			dev->data->port_id, fdir_filter->input.flow_type);
2827 		rte_errno = ENOTSUP;
2828 		return -rte_errno;
2829 	}
2830 	return 0;
2831 }
2832 
2833 /**
2834  * Add new flow director filter and store it in list.
2835  *
2836  * @param dev
2837  *   Pointer to Ethernet device.
2838  * @param fdir_filter
2839  *   Flow director filter to add.
2840  *
2841  * @return
2842  *   0 on success, a negative errno value otherwise and rte_errno is set.
2843  */
2844 static int
2845 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2846 		     const struct rte_eth_fdir_filter *fdir_filter)
2847 {
2848 	struct priv *priv = dev->data->dev_private;
2849 	struct mlx5_fdir attributes = {
2850 		.attr.group = 0,
2851 		.l2_mask = {
2852 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2853 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2854 			.type = 0,
2855 		},
2856 	};
2857 	struct mlx5_flow_parse parser = {
2858 		.layer = HASH_RXQ_ETH,
2859 	};
2860 	struct rte_flow_error error;
2861 	struct rte_flow *flow;
2862 	int ret;
2863 
2864 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2865 	if (ret)
2866 		return ret;
2867 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2868 				attributes.actions, &error, &parser);
2869 	if (ret)
2870 		return ret;
2871 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2872 				     attributes.items, attributes.actions,
2873 				     &error);
2874 	if (flow) {
2875 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2876 			(void *)flow);
2877 		return 0;
2878 	}
2879 	return -rte_errno;
2880 }
2881 
2882 /**
2883  * Delete specific filter.
2884  *
2885  * @param dev
2886  *   Pointer to Ethernet device.
2887  * @param fdir_filter
2888  *   Filter to be deleted.
2889  *
2890  * @return
2891  *   0 on success, a negative errno value otherwise and rte_errno is set.
2892  */
2893 static int
2894 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2895 			const struct rte_eth_fdir_filter *fdir_filter)
2896 {
2897 	struct priv *priv = dev->data->dev_private;
2898 	struct mlx5_fdir attributes = {
2899 		.attr.group = 0,
2900 	};
2901 	struct mlx5_flow_parse parser = {
2902 		.create = 1,
2903 		.layer = HASH_RXQ_ETH,
2904 	};
2905 	struct rte_flow_error error;
2906 	struct rte_flow *flow;
2907 	unsigned int i;
2908 	int ret;
2909 
2910 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2911 	if (ret)
2912 		return ret;
2913 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2914 				attributes.actions, &error, &parser);
2915 	if (ret)
2916 		goto exit;
2917 	/*
2918 	 * Special case for drop action which is only set in the
2919 	 * specifications when the flow is created.  In this situation the
2920 	 * drop specification is missing.
2921 	 */
2922 	if (parser.drop) {
2923 		struct ibv_flow_spec_action_drop *drop;
2924 
2925 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2926 				parser.queue[HASH_RXQ_ETH].offset);
2927 		*drop = (struct ibv_flow_spec_action_drop){
2928 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2929 			.size = sizeof(struct ibv_flow_spec_action_drop),
2930 		};
2931 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2932 	}
2933 	TAILQ_FOREACH(flow, &priv->flows, next) {
2934 		struct ibv_flow_attr *attr;
2935 		struct ibv_spec_header *attr_h;
2936 		void *spec;
2937 		struct ibv_flow_attr *flow_attr;
2938 		struct ibv_spec_header *flow_h;
2939 		void *flow_spec;
2940 		unsigned int specs_n;
2941 
2942 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2943 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2944 		/* Compare first the attributes. */
2945 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2946 			continue;
2947 		if (attr->num_of_specs == 0)
2948 			continue;
2949 		spec = (void *)((uintptr_t)attr +
2950 				sizeof(struct ibv_flow_attr));
2951 		flow_spec = (void *)((uintptr_t)flow_attr +
2952 				     sizeof(struct ibv_flow_attr));
2953 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2954 		for (i = 0; i != specs_n; ++i) {
2955 			attr_h = spec;
2956 			flow_h = flow_spec;
2957 			if (memcmp(spec, flow_spec,
2958 				   RTE_MIN(attr_h->size, flow_h->size)))
2959 				goto wrong_flow;
2960 			spec = (void *)((uintptr_t)spec + attr_h->size);
2961 			flow_spec = (void *)((uintptr_t)flow_spec +
2962 					     flow_h->size);
2963 		}
2964 		/* At this point, the flow match. */
2965 		break;
2966 wrong_flow:
2967 		/* The flow does not match. */
2968 		continue;
2969 	}
2970 	ret = rte_errno; /* Save rte_errno before cleanup. */
2971 	if (flow)
2972 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
2973 exit:
2974 	for (i = 0; i != hash_rxq_init_n; ++i) {
2975 		if (parser.queue[i].ibv_attr)
2976 			rte_free(parser.queue[i].ibv_attr);
2977 	}
2978 	rte_errno = ret; /* Restore rte_errno. */
2979 	return -rte_errno;
2980 }
2981 
2982 /**
2983  * Update queue for specific filter.
2984  *
2985  * @param dev
2986  *   Pointer to Ethernet device.
2987  * @param fdir_filter
2988  *   Filter to be updated.
2989  *
2990  * @return
2991  *   0 on success, a negative errno value otherwise and rte_errno is set.
2992  */
2993 static int
2994 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2995 			const struct rte_eth_fdir_filter *fdir_filter)
2996 {
2997 	int ret;
2998 
2999 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3000 	if (ret)
3001 		return ret;
3002 	return mlx5_fdir_filter_add(dev, fdir_filter);
3003 }
3004 
3005 /**
3006  * Flush all filters.
3007  *
3008  * @param dev
3009  *   Pointer to Ethernet device.
3010  */
3011 static void
3012 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3013 {
3014 	struct priv *priv = dev->data->dev_private;
3015 
3016 	mlx5_flow_list_flush(dev, &priv->flows);
3017 }
3018 
3019 /**
3020  * Get flow director information.
3021  *
3022  * @param dev
3023  *   Pointer to Ethernet device.
3024  * @param[out] fdir_info
3025  *   Resulting flow director information.
3026  */
3027 static void
3028 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3029 {
3030 	struct priv *priv = dev->data->dev_private;
3031 	struct rte_eth_fdir_masks *mask =
3032 		&priv->dev->data->dev_conf.fdir_conf.mask;
3033 
3034 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3035 	fdir_info->guarant_spc = 0;
3036 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3037 	fdir_info->max_flexpayload = 0;
3038 	fdir_info->flow_types_mask[0] = 0;
3039 	fdir_info->flex_payload_unit = 0;
3040 	fdir_info->max_flex_payload_segment_num = 0;
3041 	fdir_info->flex_payload_limit = 0;
3042 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3043 }
3044 
3045 /**
3046  * Deal with flow director operations.
3047  *
3048  * @param dev
3049  *   Pointer to Ethernet device.
3050  * @param filter_op
3051  *   Operation to perform.
3052  * @param arg
3053  *   Pointer to operation-specific structure.
3054  *
3055  * @return
3056  *   0 on success, a negative errno value otherwise and rte_errno is set.
3057  */
3058 static int
3059 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3060 		    void *arg)
3061 {
3062 	struct priv *priv = dev->data->dev_private;
3063 	enum rte_fdir_mode fdir_mode =
3064 		priv->dev->data->dev_conf.fdir_conf.mode;
3065 
3066 	if (filter_op == RTE_ETH_FILTER_NOP)
3067 		return 0;
3068 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3069 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3070 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3071 			dev->data->port_id, fdir_mode);
3072 		rte_errno = EINVAL;
3073 		return -rte_errno;
3074 	}
3075 	switch (filter_op) {
3076 	case RTE_ETH_FILTER_ADD:
3077 		return mlx5_fdir_filter_add(dev, arg);
3078 	case RTE_ETH_FILTER_UPDATE:
3079 		return mlx5_fdir_filter_update(dev, arg);
3080 	case RTE_ETH_FILTER_DELETE:
3081 		return mlx5_fdir_filter_delete(dev, arg);
3082 	case RTE_ETH_FILTER_FLUSH:
3083 		mlx5_fdir_filter_flush(dev);
3084 		break;
3085 	case RTE_ETH_FILTER_INFO:
3086 		mlx5_fdir_info_get(dev, arg);
3087 		break;
3088 	default:
3089 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3090 			dev->data->port_id, filter_op);
3091 		rte_errno = EINVAL;
3092 		return -rte_errno;
3093 	}
3094 	return 0;
3095 }
3096 
3097 /**
3098  * Manage filter operations.
3099  *
3100  * @param dev
3101  *   Pointer to Ethernet device structure.
3102  * @param filter_type
3103  *   Filter type.
3104  * @param filter_op
3105  *   Operation to perform.
3106  * @param arg
3107  *   Pointer to operation-specific structure.
3108  *
3109  * @return
3110  *   0 on success, a negative errno value otherwise and rte_errno is set.
3111  */
3112 int
3113 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3114 		     enum rte_filter_type filter_type,
3115 		     enum rte_filter_op filter_op,
3116 		     void *arg)
3117 {
3118 	switch (filter_type) {
3119 	case RTE_ETH_FILTER_GENERIC:
3120 		if (filter_op != RTE_ETH_FILTER_GET) {
3121 			rte_errno = EINVAL;
3122 			return -rte_errno;
3123 		}
3124 		*(const void **)arg = &mlx5_flow_ops;
3125 		return 0;
3126 	case RTE_ETH_FILTER_FDIR:
3127 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3128 	default:
3129 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3130 			dev->data->port_id, filter_type);
3131 		rte_errno = ENOTSUP;
3132 		return -rte_errno;
3133 	}
3134 	return 0;
3135 }
3136