xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 76e9a55b5b8248f5b48d3819a7c878fa11d68726)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Define minimal priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 4
36 
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 
41 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
42 struct ibv_flow_spec_counter_action {
43 	int dummy;
44 };
45 #endif
46 
47 /* Dev ops structure defined in mlx5.c */
48 extern const struct eth_dev_ops mlx5_dev_ops;
49 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
50 
51 /** Structure give to the conversion functions. */
52 struct mlx5_flow_data {
53 	struct mlx5_flow_parse *parser; /** Parser context. */
54 	struct rte_flow_error *error; /** Error context. */
55 };
56 
57 static int
58 mlx5_flow_create_eth(const struct rte_flow_item *item,
59 		     const void *default_mask,
60 		     struct mlx5_flow_data *data);
61 
62 static int
63 mlx5_flow_create_vlan(const struct rte_flow_item *item,
64 		      const void *default_mask,
65 		      struct mlx5_flow_data *data);
66 
67 static int
68 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
69 		      const void *default_mask,
70 		      struct mlx5_flow_data *data);
71 
72 static int
73 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
74 		      const void *default_mask,
75 		      struct mlx5_flow_data *data);
76 
77 static int
78 mlx5_flow_create_udp(const struct rte_flow_item *item,
79 		     const void *default_mask,
80 		     struct mlx5_flow_data *data);
81 
82 static int
83 mlx5_flow_create_tcp(const struct rte_flow_item *item,
84 		     const void *default_mask,
85 		     struct mlx5_flow_data *data);
86 
87 static int
88 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
89 		       const void *default_mask,
90 		       struct mlx5_flow_data *data);
91 
92 struct mlx5_flow_parse;
93 
94 static void
95 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
96 		      unsigned int size);
97 
98 static int
99 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
100 
101 static int
102 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
103 
104 /* Hash RX queue types. */
105 enum hash_rxq_type {
106 	HASH_RXQ_TCPV4,
107 	HASH_RXQ_UDPV4,
108 	HASH_RXQ_IPV4,
109 	HASH_RXQ_TCPV6,
110 	HASH_RXQ_UDPV6,
111 	HASH_RXQ_IPV6,
112 	HASH_RXQ_ETH,
113 };
114 
115 /* Initialization data for hash RX queue. */
116 struct hash_rxq_init {
117 	uint64_t hash_fields; /* Fields that participate in the hash. */
118 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
119 	unsigned int flow_priority; /* Flow priority to use. */
120 	unsigned int ip_version; /* Internet protocol. */
121 };
122 
123 /* Initialization data for hash RX queues. */
124 const struct hash_rxq_init hash_rxq_init[] = {
125 	[HASH_RXQ_TCPV4] = {
126 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
127 				IBV_RX_HASH_DST_IPV4 |
128 				IBV_RX_HASH_SRC_PORT_TCP |
129 				IBV_RX_HASH_DST_PORT_TCP),
130 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
131 		.flow_priority = 1,
132 		.ip_version = MLX5_IPV4,
133 	},
134 	[HASH_RXQ_UDPV4] = {
135 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
136 				IBV_RX_HASH_DST_IPV4 |
137 				IBV_RX_HASH_SRC_PORT_UDP |
138 				IBV_RX_HASH_DST_PORT_UDP),
139 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
140 		.flow_priority = 1,
141 		.ip_version = MLX5_IPV4,
142 	},
143 	[HASH_RXQ_IPV4] = {
144 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 				IBV_RX_HASH_DST_IPV4),
146 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
147 				ETH_RSS_FRAG_IPV4),
148 		.flow_priority = 2,
149 		.ip_version = MLX5_IPV4,
150 	},
151 	[HASH_RXQ_TCPV6] = {
152 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
153 				IBV_RX_HASH_DST_IPV6 |
154 				IBV_RX_HASH_SRC_PORT_TCP |
155 				IBV_RX_HASH_DST_PORT_TCP),
156 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
157 		.flow_priority = 1,
158 		.ip_version = MLX5_IPV6,
159 	},
160 	[HASH_RXQ_UDPV6] = {
161 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
162 				IBV_RX_HASH_DST_IPV6 |
163 				IBV_RX_HASH_SRC_PORT_UDP |
164 				IBV_RX_HASH_DST_PORT_UDP),
165 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
166 		.flow_priority = 1,
167 		.ip_version = MLX5_IPV6,
168 	},
169 	[HASH_RXQ_IPV6] = {
170 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 				IBV_RX_HASH_DST_IPV6),
172 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
173 				ETH_RSS_FRAG_IPV6),
174 		.flow_priority = 2,
175 		.ip_version = MLX5_IPV6,
176 	},
177 	[HASH_RXQ_ETH] = {
178 		.hash_fields = 0,
179 		.dpdk_rss_hf = 0,
180 		.flow_priority = 3,
181 	},
182 };
183 
184 /* Number of entries in hash_rxq_init[]. */
185 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
186 
187 /** Structure for holding counter stats. */
188 struct mlx5_flow_counter_stats {
189 	uint64_t hits; /**< Number of packets matched by the rule. */
190 	uint64_t bytes; /**< Number of bytes matched by the rule. */
191 };
192 
193 /** Structure for Drop queue. */
194 struct mlx5_hrxq_drop {
195 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
196 	struct ibv_qp *qp; /**< Verbs queue pair. */
197 	struct ibv_wq *wq; /**< Verbs work queue. */
198 	struct ibv_cq *cq; /**< Verbs completion queue. */
199 };
200 
201 /* Flows structures. */
202 struct mlx5_flow {
203 	uint64_t hash_fields; /**< Fields that participate in the hash. */
204 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
205 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
207 };
208 
209 /* Drop flows structures. */
210 struct mlx5_flow_drop {
211 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
212 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 };
214 
215 struct rte_flow {
216 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
217 	uint32_t mark:1; /**< Set if the flow is marked. */
218 	uint32_t drop:1; /**< Drop queue. */
219 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
220 	uint16_t (*queues)[]; /**< Queues indexes to use. */
221 	uint8_t rss_key[40]; /**< copy of the RSS key. */
222 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
223 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
224 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
225 	/**< Flow with Rx queue. */
226 };
227 
228 /** Static initializer for items. */
229 #define ITEMS(...) \
230 	(const enum rte_flow_item_type []){ \
231 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232 	}
233 
234 /** Structure to generate a simple graph of layers supported by the NIC. */
235 struct mlx5_flow_items {
236 	/** List of possible actions for these items. */
237 	const enum rte_flow_action_type *const actions;
238 	/** Bit-masks corresponding to the possibilities for the item. */
239 	const void *mask;
240 	/**
241 	 * Default bit-masks to use when item->mask is not provided. When
242 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
243 	 * used instead.
244 	 */
245 	const void *default_mask;
246 	/** Bit-masks size in bytes. */
247 	const unsigned int mask_sz;
248 	/**
249 	 * Conversion function from rte_flow to NIC specific flow.
250 	 *
251 	 * @param item
252 	 *   rte_flow item to convert.
253 	 * @param default_mask
254 	 *   Default bit-masks to use when item->mask is not provided.
255 	 * @param data
256 	 *   Internal structure to store the conversion.
257 	 *
258 	 * @return
259 	 *   0 on success, a negative errno value otherwise and rte_errno is
260 	 *   set.
261 	 */
262 	int (*convert)(const struct rte_flow_item *item,
263 		       const void *default_mask,
264 		       struct mlx5_flow_data *data);
265 	/** Size in bytes of the destination structure. */
266 	const unsigned int dst_sz;
267 	/** List of possible following items.  */
268 	const enum rte_flow_item_type *const items;
269 };
270 
271 /** Valid action for this PMD. */
272 static const enum rte_flow_action_type valid_actions[] = {
273 	RTE_FLOW_ACTION_TYPE_DROP,
274 	RTE_FLOW_ACTION_TYPE_QUEUE,
275 	RTE_FLOW_ACTION_TYPE_MARK,
276 	RTE_FLOW_ACTION_TYPE_FLAG,
277 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
278 	RTE_FLOW_ACTION_TYPE_COUNT,
279 #endif
280 	RTE_FLOW_ACTION_TYPE_END,
281 };
282 
283 /** Graph of supported items and associated actions. */
284 static const struct mlx5_flow_items mlx5_flow_items[] = {
285 	[RTE_FLOW_ITEM_TYPE_END] = {
286 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
287 			       RTE_FLOW_ITEM_TYPE_VXLAN),
288 	},
289 	[RTE_FLOW_ITEM_TYPE_ETH] = {
290 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
291 			       RTE_FLOW_ITEM_TYPE_IPV4,
292 			       RTE_FLOW_ITEM_TYPE_IPV6),
293 		.actions = valid_actions,
294 		.mask = &(const struct rte_flow_item_eth){
295 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
297 			.type = -1,
298 		},
299 		.default_mask = &rte_flow_item_eth_mask,
300 		.mask_sz = sizeof(struct rte_flow_item_eth),
301 		.convert = mlx5_flow_create_eth,
302 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
303 	},
304 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
305 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
306 			       RTE_FLOW_ITEM_TYPE_IPV6),
307 		.actions = valid_actions,
308 		.mask = &(const struct rte_flow_item_vlan){
309 			.tci = -1,
310 			.inner_type = -1,
311 		},
312 		.default_mask = &rte_flow_item_vlan_mask,
313 		.mask_sz = sizeof(struct rte_flow_item_vlan),
314 		.convert = mlx5_flow_create_vlan,
315 		.dst_sz = 0,
316 	},
317 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
318 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
319 			       RTE_FLOW_ITEM_TYPE_TCP),
320 		.actions = valid_actions,
321 		.mask = &(const struct rte_flow_item_ipv4){
322 			.hdr = {
323 				.src_addr = -1,
324 				.dst_addr = -1,
325 				.type_of_service = -1,
326 				.next_proto_id = -1,
327 			},
328 		},
329 		.default_mask = &rte_flow_item_ipv4_mask,
330 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
331 		.convert = mlx5_flow_create_ipv4,
332 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
333 	},
334 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
335 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
336 			       RTE_FLOW_ITEM_TYPE_TCP),
337 		.actions = valid_actions,
338 		.mask = &(const struct rte_flow_item_ipv6){
339 			.hdr = {
340 				.src_addr = {
341 					0xff, 0xff, 0xff, 0xff,
342 					0xff, 0xff, 0xff, 0xff,
343 					0xff, 0xff, 0xff, 0xff,
344 					0xff, 0xff, 0xff, 0xff,
345 				},
346 				.dst_addr = {
347 					0xff, 0xff, 0xff, 0xff,
348 					0xff, 0xff, 0xff, 0xff,
349 					0xff, 0xff, 0xff, 0xff,
350 					0xff, 0xff, 0xff, 0xff,
351 				},
352 				.vtc_flow = -1,
353 				.proto = -1,
354 				.hop_limits = -1,
355 			},
356 		},
357 		.default_mask = &rte_flow_item_ipv6_mask,
358 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
359 		.convert = mlx5_flow_create_ipv6,
360 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
361 	},
362 	[RTE_FLOW_ITEM_TYPE_UDP] = {
363 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
364 		.actions = valid_actions,
365 		.mask = &(const struct rte_flow_item_udp){
366 			.hdr = {
367 				.src_port = -1,
368 				.dst_port = -1,
369 			},
370 		},
371 		.default_mask = &rte_flow_item_udp_mask,
372 		.mask_sz = sizeof(struct rte_flow_item_udp),
373 		.convert = mlx5_flow_create_udp,
374 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
375 	},
376 	[RTE_FLOW_ITEM_TYPE_TCP] = {
377 		.actions = valid_actions,
378 		.mask = &(const struct rte_flow_item_tcp){
379 			.hdr = {
380 				.src_port = -1,
381 				.dst_port = -1,
382 			},
383 		},
384 		.default_mask = &rte_flow_item_tcp_mask,
385 		.mask_sz = sizeof(struct rte_flow_item_tcp),
386 		.convert = mlx5_flow_create_tcp,
387 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
388 	},
389 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
390 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
391 		.actions = valid_actions,
392 		.mask = &(const struct rte_flow_item_vxlan){
393 			.vni = "\xff\xff\xff",
394 		},
395 		.default_mask = &rte_flow_item_vxlan_mask,
396 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
397 		.convert = mlx5_flow_create_vxlan,
398 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
399 	},
400 };
401 
402 /** Structure to pass to the conversion function. */
403 struct mlx5_flow_parse {
404 	uint32_t inner; /**< Set once VXLAN is encountered. */
405 	uint32_t create:1;
406 	/**< Whether resources should remain after a validate. */
407 	uint32_t drop:1; /**< Target is a drop queue. */
408 	uint32_t mark:1; /**< Mark is present in the flow. */
409 	uint32_t count:1; /**< Count is present in the flow. */
410 	uint32_t mark_id; /**< Mark identifier. */
411 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
412 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
413 	uint8_t rss_key[40]; /**< copy of the RSS key. */
414 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
415 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
416 	struct {
417 		struct ibv_flow_attr *ibv_attr;
418 		/**< Pointer to Verbs attributes. */
419 		unsigned int offset;
420 		/**< Current position or total size of the attribute. */
421 	} queue[RTE_DIM(hash_rxq_init)];
422 };
423 
424 static const struct rte_flow_ops mlx5_flow_ops = {
425 	.validate = mlx5_flow_validate,
426 	.create = mlx5_flow_create,
427 	.destroy = mlx5_flow_destroy,
428 	.flush = mlx5_flow_flush,
429 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
430 	.query = mlx5_flow_query,
431 #else
432 	.query = NULL,
433 #endif
434 	.isolate = mlx5_flow_isolate,
435 };
436 
437 /* Convert FDIR request to Generic flow. */
438 struct mlx5_fdir {
439 	struct rte_flow_attr attr;
440 	struct rte_flow_action actions[2];
441 	struct rte_flow_item items[4];
442 	struct rte_flow_item_eth l2;
443 	struct rte_flow_item_eth l2_mask;
444 	union {
445 		struct rte_flow_item_ipv4 ipv4;
446 		struct rte_flow_item_ipv6 ipv6;
447 	} l3;
448 	union {
449 		struct rte_flow_item_ipv4 ipv4;
450 		struct rte_flow_item_ipv6 ipv6;
451 	} l3_mask;
452 	union {
453 		struct rte_flow_item_udp udp;
454 		struct rte_flow_item_tcp tcp;
455 	} l4;
456 	union {
457 		struct rte_flow_item_udp udp;
458 		struct rte_flow_item_tcp tcp;
459 	} l4_mask;
460 	struct rte_flow_action_queue queue;
461 };
462 
463 /* Verbs specification header. */
464 struct ibv_spec_header {
465 	enum ibv_flow_spec_type type;
466 	uint16_t size;
467 };
468 
469 /**
470  * Check support for a given item.
471  *
472  * @param item[in]
473  *   Item specification.
474  * @param mask[in]
475  *   Bit-masks covering supported fields to compare with spec, last and mask in
476  *   \item.
477  * @param size
478  *   Bit-Mask size in bytes.
479  *
480  * @return
481  *   0 on success, a negative errno value otherwise and rte_errno is set.
482  */
483 static int
484 mlx5_flow_item_validate(const struct rte_flow_item *item,
485 			const uint8_t *mask, unsigned int size)
486 {
487 	if (!item->spec && (item->mask || item->last)) {
488 		rte_errno = EINVAL;
489 		return -rte_errno;
490 	}
491 	if (item->spec && !item->mask) {
492 		unsigned int i;
493 		const uint8_t *spec = item->spec;
494 
495 		for (i = 0; i < size; ++i)
496 			if ((spec[i] | mask[i]) != mask[i]) {
497 				rte_errno = EINVAL;
498 				return -rte_errno;
499 			}
500 	}
501 	if (item->last && !item->mask) {
502 		unsigned int i;
503 		const uint8_t *spec = item->last;
504 
505 		for (i = 0; i < size; ++i)
506 			if ((spec[i] | mask[i]) != mask[i]) {
507 				rte_errno = EINVAL;
508 				return -rte_errno;
509 			}
510 	}
511 	if (item->mask) {
512 		unsigned int i;
513 		const uint8_t *spec = item->spec;
514 
515 		for (i = 0; i < size; ++i)
516 			if ((spec[i] | mask[i]) != mask[i]) {
517 				rte_errno = EINVAL;
518 				return -rte_errno;
519 			}
520 	}
521 	if (item->spec && item->last) {
522 		uint8_t spec[size];
523 		uint8_t last[size];
524 		const uint8_t *apply = mask;
525 		unsigned int i;
526 		int ret;
527 
528 		if (item->mask)
529 			apply = item->mask;
530 		for (i = 0; i < size; ++i) {
531 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
532 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
533 		}
534 		ret = memcmp(spec, last, size);
535 		if (ret != 0) {
536 			rte_errno = EINVAL;
537 			return -rte_errno;
538 		}
539 	}
540 	return 0;
541 }
542 
543 /**
544  * Extract attribute to the parser.
545  *
546  * @param[in] attr
547  *   Flow rule attributes.
548  * @param[out] error
549  *   Perform verbose error reporting if not NULL.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
556 			     struct rte_flow_error *error)
557 {
558 	if (attr->group) {
559 		rte_flow_error_set(error, ENOTSUP,
560 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
561 				   NULL,
562 				   "groups are not supported");
563 		return -rte_errno;
564 	}
565 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
566 		rte_flow_error_set(error, ENOTSUP,
567 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
568 				   NULL,
569 				   "priorities are not supported");
570 		return -rte_errno;
571 	}
572 	if (attr->egress) {
573 		rte_flow_error_set(error, ENOTSUP,
574 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
575 				   NULL,
576 				   "egress is not supported");
577 		return -rte_errno;
578 	}
579 	if (attr->transfer) {
580 		rte_flow_error_set(error, ENOTSUP,
581 				   RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
582 				   NULL,
583 				   "transfer is not supported");
584 		return -rte_errno;
585 	}
586 	if (!attr->ingress) {
587 		rte_flow_error_set(error, ENOTSUP,
588 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
589 				   NULL,
590 				   "only ingress is supported");
591 		return -rte_errno;
592 	}
593 	return 0;
594 }
595 
596 /**
597  * Extract actions request to the parser.
598  *
599  * @param dev
600  *   Pointer to Ethernet device.
601  * @param[in] actions
602  *   Associated actions (list terminated by the END action).
603  * @param[out] error
604  *   Perform verbose error reporting if not NULL.
605  * @param[in, out] parser
606  *   Internal parser structure.
607  *
608  * @return
609  *   0 on success, a negative errno value otherwise and rte_errno is set.
610  */
611 static int
612 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
613 			  const struct rte_flow_action actions[],
614 			  struct rte_flow_error *error,
615 			  struct mlx5_flow_parse *parser)
616 {
617 	enum { FATE = 1, MARK = 2, COUNT = 4, };
618 	uint32_t overlap = 0;
619 	struct priv *priv = dev->data->dev_private;
620 
621 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
622 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
623 			continue;
624 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
625 			if (overlap & FATE)
626 				goto exit_action_overlap;
627 			overlap |= FATE;
628 			parser->drop = 1;
629 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
630 			const struct rte_flow_action_queue *queue =
631 				(const struct rte_flow_action_queue *)
632 				actions->conf;
633 
634 			if (overlap & FATE)
635 				goto exit_action_overlap;
636 			overlap |= FATE;
637 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
638 				goto exit_action_not_supported;
639 			parser->queues[0] = queue->index;
640 			parser->rss_conf = (struct rte_flow_action_rss){
641 				.queue_num = 1,
642 				.queue = parser->queues,
643 			};
644 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
645 			const struct rte_flow_action_rss *rss =
646 				(const struct rte_flow_action_rss *)
647 				actions->conf;
648 			const uint8_t *rss_key;
649 			uint32_t rss_key_len;
650 			uint16_t n;
651 
652 			if (overlap & FATE)
653 				goto exit_action_overlap;
654 			overlap |= FATE;
655 			if (rss->func &&
656 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
657 				rte_flow_error_set(error, EINVAL,
658 						   RTE_FLOW_ERROR_TYPE_ACTION,
659 						   actions,
660 						   "the only supported RSS hash"
661 						   " function is Toeplitz");
662 				return -rte_errno;
663 			}
664 			if (rss->level) {
665 				rte_flow_error_set(error, EINVAL,
666 						   RTE_FLOW_ERROR_TYPE_ACTION,
667 						   actions,
668 						   "a nonzero RSS encapsulation"
669 						   " level is not supported");
670 				return -rte_errno;
671 			}
672 			if (rss->types & MLX5_RSS_HF_MASK) {
673 				rte_flow_error_set(error, EINVAL,
674 						   RTE_FLOW_ERROR_TYPE_ACTION,
675 						   actions,
676 						   "unsupported RSS type"
677 						   " requested");
678 				return -rte_errno;
679 			}
680 			if (rss->key_len) {
681 				rss_key_len = rss->key_len;
682 				rss_key = rss->key;
683 			} else {
684 				rss_key_len = rss_hash_default_key_len;
685 				rss_key = rss_hash_default_key;
686 			}
687 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
688 				rte_flow_error_set(error, EINVAL,
689 						   RTE_FLOW_ERROR_TYPE_ACTION,
690 						   actions,
691 						   "RSS hash key must be"
692 						   " exactly 40 bytes long");
693 				return -rte_errno;
694 			}
695 			if (!rss->queue_num) {
696 				rte_flow_error_set(error, EINVAL,
697 						   RTE_FLOW_ERROR_TYPE_ACTION,
698 						   actions,
699 						   "no valid queues");
700 				return -rte_errno;
701 			}
702 			if (rss->queue_num > RTE_DIM(parser->queues)) {
703 				rte_flow_error_set(error, EINVAL,
704 						   RTE_FLOW_ERROR_TYPE_ACTION,
705 						   actions,
706 						   "too many queues for RSS"
707 						   " context");
708 				return -rte_errno;
709 			}
710 			for (n = 0; n < rss->queue_num; ++n) {
711 				if (rss->queue[n] >= priv->rxqs_n) {
712 					rte_flow_error_set(error, EINVAL,
713 						   RTE_FLOW_ERROR_TYPE_ACTION,
714 						   actions,
715 						   "queue id > number of"
716 						   " queues");
717 					return -rte_errno;
718 				}
719 			}
720 			parser->rss_conf = (struct rte_flow_action_rss){
721 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
722 				.level = 0,
723 				.types = rss->types,
724 				.key_len = rss_key_len,
725 				.queue_num = rss->queue_num,
726 				.key = memcpy(parser->rss_key, rss_key,
727 					      sizeof(*rss_key) * rss_key_len),
728 				.queue = memcpy(parser->queues, rss->queue,
729 						sizeof(*rss->queue) *
730 						rss->queue_num),
731 			};
732 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
733 			const struct rte_flow_action_mark *mark =
734 				(const struct rte_flow_action_mark *)
735 				actions->conf;
736 
737 			if (overlap & MARK)
738 				goto exit_action_overlap;
739 			overlap |= MARK;
740 			if (!mark) {
741 				rte_flow_error_set(error, EINVAL,
742 						   RTE_FLOW_ERROR_TYPE_ACTION,
743 						   actions,
744 						   "mark must be defined");
745 				return -rte_errno;
746 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
747 				rte_flow_error_set(error, ENOTSUP,
748 						   RTE_FLOW_ERROR_TYPE_ACTION,
749 						   actions,
750 						   "mark must be between 0"
751 						   " and 16777199");
752 				return -rte_errno;
753 			}
754 			parser->mark = 1;
755 			parser->mark_id = mark->id;
756 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
757 			if (overlap & MARK)
758 				goto exit_action_overlap;
759 			overlap |= MARK;
760 			parser->mark = 1;
761 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
762 			   priv->config.flow_counter_en) {
763 			if (overlap & COUNT)
764 				goto exit_action_overlap;
765 			overlap |= COUNT;
766 			parser->count = 1;
767 		} else {
768 			goto exit_action_not_supported;
769 		}
770 	}
771 	/* When fate is unknown, drop traffic. */
772 	if (!(overlap & FATE))
773 		parser->drop = 1;
774 	if (parser->drop && parser->mark)
775 		parser->mark = 0;
776 	if (!parser->rss_conf.queue_num && !parser->drop) {
777 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
778 				   NULL, "no valid action");
779 		return -rte_errno;
780 	}
781 	return 0;
782 exit_action_not_supported:
783 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
784 			   actions, "action not supported");
785 	return -rte_errno;
786 exit_action_overlap:
787 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
788 			   actions, "overlapping actions are not supported");
789 	return -rte_errno;
790 }
791 
792 /**
793  * Validate items.
794  *
795  * @param[in] items
796  *   Pattern specification (list terminated by the END pattern item).
797  * @param[out] error
798  *   Perform verbose error reporting if not NULL.
799  * @param[in, out] parser
800  *   Internal parser structure.
801  *
802  * @return
803  *   0 on success, a negative errno value otherwise and rte_errno is set.
804  */
805 static int
806 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
807 				 struct rte_flow_error *error,
808 				 struct mlx5_flow_parse *parser)
809 {
810 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
811 	unsigned int i;
812 	int ret = 0;
813 
814 	/* Initialise the offsets to start after verbs attribute. */
815 	for (i = 0; i != hash_rxq_init_n; ++i)
816 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
817 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
818 		const struct mlx5_flow_items *token = NULL;
819 		unsigned int n;
820 
821 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
822 			continue;
823 		for (i = 0;
824 		     cur_item->items &&
825 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
826 		     ++i) {
827 			if (cur_item->items[i] == items->type) {
828 				token = &mlx5_flow_items[items->type];
829 				break;
830 			}
831 		}
832 		if (!token) {
833 			ret = -ENOTSUP;
834 			goto exit_item_not_supported;
835 		}
836 		cur_item = token;
837 		ret = mlx5_flow_item_validate(items,
838 					      (const uint8_t *)cur_item->mask,
839 					      cur_item->mask_sz);
840 		if (ret)
841 			goto exit_item_not_supported;
842 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
843 			if (parser->inner) {
844 				rte_flow_error_set(error, ENOTSUP,
845 						   RTE_FLOW_ERROR_TYPE_ITEM,
846 						   items,
847 						   "cannot recognize multiple"
848 						   " VXLAN encapsulations");
849 				return -rte_errno;
850 			}
851 			parser->inner = IBV_FLOW_SPEC_INNER;
852 		}
853 		if (parser->drop) {
854 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
855 		} else {
856 			for (n = 0; n != hash_rxq_init_n; ++n)
857 				parser->queue[n].offset += cur_item->dst_sz;
858 		}
859 	}
860 	if (parser->drop) {
861 		parser->queue[HASH_RXQ_ETH].offset +=
862 			sizeof(struct ibv_flow_spec_action_drop);
863 	}
864 	if (parser->mark) {
865 		for (i = 0; i != hash_rxq_init_n; ++i)
866 			parser->queue[i].offset +=
867 				sizeof(struct ibv_flow_spec_action_tag);
868 	}
869 	if (parser->count) {
870 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
871 
872 		for (i = 0; i != hash_rxq_init_n; ++i)
873 			parser->queue[i].offset += size;
874 	}
875 	return 0;
876 exit_item_not_supported:
877 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
878 				  items, "item not supported");
879 }
880 
881 /**
882  * Allocate memory space to store verbs flow attributes.
883  *
884  * @param[in] size
885  *   Amount of byte to allocate.
886  * @param[out] error
887  *   Perform verbose error reporting if not NULL.
888  *
889  * @return
890  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
891  */
892 static struct ibv_flow_attr *
893 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
894 {
895 	struct ibv_flow_attr *ibv_attr;
896 
897 	ibv_attr = rte_calloc(__func__, 1, size, 0);
898 	if (!ibv_attr) {
899 		rte_flow_error_set(error, ENOMEM,
900 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
901 				   NULL,
902 				   "cannot allocate verbs spec attributes");
903 		return NULL;
904 	}
905 	return ibv_attr;
906 }
907 
908 /**
909  * Make inner packet matching with an higher priority from the non Inner
910  * matching.
911  *
912  * @param[in, out] parser
913  *   Internal parser structure.
914  * @param attr
915  *   User flow attribute.
916  */
917 static void
918 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
919 			  const struct rte_flow_attr *attr)
920 {
921 	unsigned int i;
922 
923 	if (parser->drop) {
924 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
925 			attr->priority +
926 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
927 		return;
928 	}
929 	for (i = 0; i != hash_rxq_init_n; ++i) {
930 		if (parser->queue[i].ibv_attr) {
931 			parser->queue[i].ibv_attr->priority =
932 				attr->priority +
933 				hash_rxq_init[i].flow_priority -
934 				(parser->inner ? 1 : 0);
935 		}
936 	}
937 }
938 
939 /**
940  * Finalise verbs flow attributes.
941  *
942  * @param[in, out] parser
943  *   Internal parser structure.
944  */
945 static void
946 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
947 {
948 	const unsigned int ipv4 =
949 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
950 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
951 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
952 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
953 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
954 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
955 	unsigned int i;
956 
957 	/* Remove any other flow not matching the pattern. */
958 	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
959 		for (i = 0; i != hash_rxq_init_n; ++i) {
960 			if (i == HASH_RXQ_ETH)
961 				continue;
962 			rte_free(parser->queue[i].ibv_attr);
963 			parser->queue[i].ibv_attr = NULL;
964 		}
965 		return;
966 	}
967 	if (parser->layer == HASH_RXQ_ETH) {
968 		goto fill;
969 	} else {
970 		/*
971 		 * This layer becomes useless as the pattern define under
972 		 * layers.
973 		 */
974 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
975 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
976 	}
977 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
978 	for (i = ohmin; i != (ohmax + 1); ++i) {
979 		if (!parser->queue[i].ibv_attr)
980 			continue;
981 		rte_free(parser->queue[i].ibv_attr);
982 		parser->queue[i].ibv_attr = NULL;
983 	}
984 	/* Remove impossible flow according to the RSS configuration. */
985 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
986 	    parser->rss_conf.types) {
987 		/* Remove any other flow. */
988 		for (i = hmin; i != (hmax + 1); ++i) {
989 			if ((i == parser->layer) ||
990 			     (!parser->queue[i].ibv_attr))
991 				continue;
992 			rte_free(parser->queue[i].ibv_attr);
993 			parser->queue[i].ibv_attr = NULL;
994 		}
995 	} else  if (!parser->queue[ip].ibv_attr) {
996 		/* no RSS possible with the current configuration. */
997 		parser->rss_conf.queue_num = 1;
998 		return;
999 	}
1000 fill:
1001 	/*
1002 	 * Fill missing layers in verbs specifications, or compute the correct
1003 	 * offset to allocate the memory space for the attributes and
1004 	 * specifications.
1005 	 */
1006 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1007 		union {
1008 			struct ibv_flow_spec_ipv4_ext ipv4;
1009 			struct ibv_flow_spec_ipv6 ipv6;
1010 			struct ibv_flow_spec_tcp_udp udp_tcp;
1011 		} specs;
1012 		void *dst;
1013 		uint16_t size;
1014 
1015 		if (i == parser->layer)
1016 			continue;
1017 		if (parser->layer == HASH_RXQ_ETH) {
1018 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1019 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1020 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1021 					.type = IBV_FLOW_SPEC_IPV4_EXT,
1022 					.size = size,
1023 				};
1024 			} else {
1025 				size = sizeof(struct ibv_flow_spec_ipv6);
1026 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1027 					.type = IBV_FLOW_SPEC_IPV6,
1028 					.size = size,
1029 				};
1030 			}
1031 			if (parser->queue[i].ibv_attr) {
1032 				dst = (void *)((uintptr_t)
1033 					       parser->queue[i].ibv_attr +
1034 					       parser->queue[i].offset);
1035 				memcpy(dst, &specs, size);
1036 				++parser->queue[i].ibv_attr->num_of_specs;
1037 			}
1038 			parser->queue[i].offset += size;
1039 		}
1040 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1041 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1042 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1043 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1044 				.type = ((i == HASH_RXQ_UDPV4 ||
1045 					  i == HASH_RXQ_UDPV6) ?
1046 					 IBV_FLOW_SPEC_UDP :
1047 					 IBV_FLOW_SPEC_TCP),
1048 				.size = size,
1049 			};
1050 			if (parser->queue[i].ibv_attr) {
1051 				dst = (void *)((uintptr_t)
1052 					       parser->queue[i].ibv_attr +
1053 					       parser->queue[i].offset);
1054 				memcpy(dst, &specs, size);
1055 				++parser->queue[i].ibv_attr->num_of_specs;
1056 			}
1057 			parser->queue[i].offset += size;
1058 		}
1059 	}
1060 }
1061 
1062 /**
1063  * Validate and convert a flow supported by the NIC.
1064  *
1065  * @param dev
1066  *   Pointer to Ethernet device.
1067  * @param[in] attr
1068  *   Flow rule attributes.
1069  * @param[in] pattern
1070  *   Pattern specification (list terminated by the END pattern item).
1071  * @param[in] actions
1072  *   Associated actions (list terminated by the END action).
1073  * @param[out] error
1074  *   Perform verbose error reporting if not NULL.
1075  * @param[in, out] parser
1076  *   Internal parser structure.
1077  *
1078  * @return
1079  *   0 on success, a negative errno value otherwise and rte_errno is set.
1080  */
1081 static int
1082 mlx5_flow_convert(struct rte_eth_dev *dev,
1083 		  const struct rte_flow_attr *attr,
1084 		  const struct rte_flow_item items[],
1085 		  const struct rte_flow_action actions[],
1086 		  struct rte_flow_error *error,
1087 		  struct mlx5_flow_parse *parser)
1088 {
1089 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1090 	unsigned int i;
1091 	int ret;
1092 
1093 	/* First step. Validate the attributes, items and actions. */
1094 	*parser = (struct mlx5_flow_parse){
1095 		.create = parser->create,
1096 		.layer = HASH_RXQ_ETH,
1097 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1098 	};
1099 	ret = mlx5_flow_convert_attributes(attr, error);
1100 	if (ret)
1101 		return ret;
1102 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1103 	if (ret)
1104 		return ret;
1105 	ret = mlx5_flow_convert_items_validate(items, error, parser);
1106 	if (ret)
1107 		return ret;
1108 	mlx5_flow_convert_finalise(parser);
1109 	/*
1110 	 * Second step.
1111 	 * Allocate the memory space to store verbs specifications.
1112 	 */
1113 	if (parser->drop) {
1114 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1115 
1116 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1117 			mlx5_flow_convert_allocate(offset, error);
1118 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1119 			goto exit_enomem;
1120 		parser->queue[HASH_RXQ_ETH].offset =
1121 			sizeof(struct ibv_flow_attr);
1122 	} else {
1123 		for (i = 0; i != hash_rxq_init_n; ++i) {
1124 			unsigned int offset;
1125 
1126 			if (!(parser->rss_conf.types &
1127 			      hash_rxq_init[i].dpdk_rss_hf) &&
1128 			    (i != HASH_RXQ_ETH))
1129 				continue;
1130 			offset = parser->queue[i].offset;
1131 			parser->queue[i].ibv_attr =
1132 				mlx5_flow_convert_allocate(offset, error);
1133 			if (!parser->queue[i].ibv_attr)
1134 				goto exit_enomem;
1135 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1136 		}
1137 	}
1138 	/* Third step. Conversion parse, fill the specifications. */
1139 	parser->inner = 0;
1140 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1141 		struct mlx5_flow_data data = {
1142 			.parser = parser,
1143 			.error = error,
1144 		};
1145 
1146 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1147 			continue;
1148 		cur_item = &mlx5_flow_items[items->type];
1149 		ret = cur_item->convert(items,
1150 					(cur_item->default_mask ?
1151 					 cur_item->default_mask :
1152 					 cur_item->mask),
1153 					 &data);
1154 		if (ret)
1155 			goto exit_free;
1156 	}
1157 	if (parser->mark)
1158 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1159 	if (parser->count && parser->create) {
1160 		mlx5_flow_create_count(dev, parser);
1161 		if (!parser->cs)
1162 			goto exit_count_error;
1163 	}
1164 	/*
1165 	 * Last step. Complete missing specification to reach the RSS
1166 	 * configuration.
1167 	 */
1168 	if (!parser->drop)
1169 		mlx5_flow_convert_finalise(parser);
1170 	mlx5_flow_update_priority(parser, attr);
1171 exit_free:
1172 	/* Only verification is expected, all resources should be released. */
1173 	if (!parser->create) {
1174 		for (i = 0; i != hash_rxq_init_n; ++i) {
1175 			if (parser->queue[i].ibv_attr) {
1176 				rte_free(parser->queue[i].ibv_attr);
1177 				parser->queue[i].ibv_attr = NULL;
1178 			}
1179 		}
1180 	}
1181 	return ret;
1182 exit_enomem:
1183 	for (i = 0; i != hash_rxq_init_n; ++i) {
1184 		if (parser->queue[i].ibv_attr) {
1185 			rte_free(parser->queue[i].ibv_attr);
1186 			parser->queue[i].ibv_attr = NULL;
1187 		}
1188 	}
1189 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1190 			   NULL, "cannot allocate verbs spec attributes");
1191 	return -rte_errno;
1192 exit_count_error:
1193 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1194 			   NULL, "cannot create counter");
1195 	return -rte_errno;
1196 }
1197 
1198 /**
1199  * Copy the specification created into the flow.
1200  *
1201  * @param parser
1202  *   Internal parser structure.
1203  * @param src
1204  *   Create specification.
1205  * @param size
1206  *   Size in bytes of the specification to copy.
1207  */
1208 static void
1209 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1210 		      unsigned int size)
1211 {
1212 	unsigned int i;
1213 	void *dst;
1214 
1215 	for (i = 0; i != hash_rxq_init_n; ++i) {
1216 		if (!parser->queue[i].ibv_attr)
1217 			continue;
1218 		/* Specification must be the same l3 type or none. */
1219 		if (parser->layer == HASH_RXQ_ETH ||
1220 		    (hash_rxq_init[parser->layer].ip_version ==
1221 		     hash_rxq_init[i].ip_version) ||
1222 		    (hash_rxq_init[i].ip_version == 0)) {
1223 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1224 					parser->queue[i].offset);
1225 			memcpy(dst, src, size);
1226 			++parser->queue[i].ibv_attr->num_of_specs;
1227 			parser->queue[i].offset += size;
1228 		}
1229 	}
1230 }
1231 
1232 /**
1233  * Convert Ethernet item to Verbs specification.
1234  *
1235  * @param item[in]
1236  *   Item specification.
1237  * @param default_mask[in]
1238  *   Default bit-masks to use when item->mask is not provided.
1239  * @param data[in, out]
1240  *   User structure.
1241  *
1242  * @return
1243  *   0 on success, a negative errno value otherwise and rte_errno is set.
1244  */
1245 static int
1246 mlx5_flow_create_eth(const struct rte_flow_item *item,
1247 		     const void *default_mask,
1248 		     struct mlx5_flow_data *data)
1249 {
1250 	const struct rte_flow_item_eth *spec = item->spec;
1251 	const struct rte_flow_item_eth *mask = item->mask;
1252 	struct mlx5_flow_parse *parser = data->parser;
1253 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1254 	struct ibv_flow_spec_eth eth = {
1255 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1256 		.size = eth_size,
1257 	};
1258 
1259 	/* Don't update layer for the inner pattern. */
1260 	if (!parser->inner)
1261 		parser->layer = HASH_RXQ_ETH;
1262 	if (spec) {
1263 		unsigned int i;
1264 
1265 		if (!mask)
1266 			mask = default_mask;
1267 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1268 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1269 		eth.val.ether_type = spec->type;
1270 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1271 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1272 		eth.mask.ether_type = mask->type;
1273 		/* Remove unwanted bits from values. */
1274 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1275 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1276 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1277 		}
1278 		eth.val.ether_type &= eth.mask.ether_type;
1279 	}
1280 	mlx5_flow_create_copy(parser, &eth, eth_size);
1281 	return 0;
1282 }
1283 
1284 /**
1285  * Convert VLAN item to Verbs specification.
1286  *
1287  * @param item[in]
1288  *   Item specification.
1289  * @param default_mask[in]
1290  *   Default bit-masks to use when item->mask is not provided.
1291  * @param data[in, out]
1292  *   User structure.
1293  *
1294  * @return
1295  *   0 on success, a negative errno value otherwise and rte_errno is set.
1296  */
1297 static int
1298 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1299 		      const void *default_mask,
1300 		      struct mlx5_flow_data *data)
1301 {
1302 	const struct rte_flow_item_vlan *spec = item->spec;
1303 	const struct rte_flow_item_vlan *mask = item->mask;
1304 	struct mlx5_flow_parse *parser = data->parser;
1305 	struct ibv_flow_spec_eth *eth;
1306 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1307 	const char *msg = "VLAN cannot be empty";
1308 
1309 	if (spec) {
1310 		unsigned int i;
1311 		if (!mask)
1312 			mask = default_mask;
1313 
1314 		for (i = 0; i != hash_rxq_init_n; ++i) {
1315 			if (!parser->queue[i].ibv_attr)
1316 				continue;
1317 
1318 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1319 				       parser->queue[i].offset - eth_size);
1320 			eth->val.vlan_tag = spec->tci;
1321 			eth->mask.vlan_tag = mask->tci;
1322 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1323 			/*
1324 			 * From verbs perspective an empty VLAN is equivalent
1325 			 * to a packet without VLAN layer.
1326 			 */
1327 			if (!eth->mask.vlan_tag)
1328 				goto error;
1329 			/* Outer TPID cannot be matched. */
1330 			if (eth->mask.ether_type) {
1331 				msg = "VLAN TPID matching is not supported";
1332 				goto error;
1333 			}
1334 			eth->val.ether_type = spec->inner_type;
1335 			eth->mask.ether_type = mask->inner_type;
1336 			eth->val.ether_type &= eth->mask.ether_type;
1337 		}
1338 		return 0;
1339 	}
1340 error:
1341 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1342 				  item, msg);
1343 }
1344 
1345 /**
1346  * Convert IPv4 item to Verbs specification.
1347  *
1348  * @param item[in]
1349  *   Item specification.
1350  * @param default_mask[in]
1351  *   Default bit-masks to use when item->mask is not provided.
1352  * @param data[in, out]
1353  *   User structure.
1354  *
1355  * @return
1356  *   0 on success, a negative errno value otherwise and rte_errno is set.
1357  */
1358 static int
1359 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1360 		      const void *default_mask,
1361 		      struct mlx5_flow_data *data)
1362 {
1363 	const struct rte_flow_item_ipv4 *spec = item->spec;
1364 	const struct rte_flow_item_ipv4 *mask = item->mask;
1365 	struct mlx5_flow_parse *parser = data->parser;
1366 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1367 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1368 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1369 		.size = ipv4_size,
1370 	};
1371 
1372 	/* Don't update layer for the inner pattern. */
1373 	if (!parser->inner)
1374 		parser->layer = HASH_RXQ_IPV4;
1375 	if (spec) {
1376 		if (!mask)
1377 			mask = default_mask;
1378 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1379 			.src_ip = spec->hdr.src_addr,
1380 			.dst_ip = spec->hdr.dst_addr,
1381 			.proto = spec->hdr.next_proto_id,
1382 			.tos = spec->hdr.type_of_service,
1383 		};
1384 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1385 			.src_ip = mask->hdr.src_addr,
1386 			.dst_ip = mask->hdr.dst_addr,
1387 			.proto = mask->hdr.next_proto_id,
1388 			.tos = mask->hdr.type_of_service,
1389 		};
1390 		/* Remove unwanted bits from values. */
1391 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1392 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1393 		ipv4.val.proto &= ipv4.mask.proto;
1394 		ipv4.val.tos &= ipv4.mask.tos;
1395 	}
1396 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1397 	return 0;
1398 }
1399 
1400 /**
1401  * Convert IPv6 item to Verbs specification.
1402  *
1403  * @param item[in]
1404  *   Item specification.
1405  * @param default_mask[in]
1406  *   Default bit-masks to use when item->mask is not provided.
1407  * @param data[in, out]
1408  *   User structure.
1409  *
1410  * @return
1411  *   0 on success, a negative errno value otherwise and rte_errno is set.
1412  */
1413 static int
1414 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1415 		      const void *default_mask,
1416 		      struct mlx5_flow_data *data)
1417 {
1418 	const struct rte_flow_item_ipv6 *spec = item->spec;
1419 	const struct rte_flow_item_ipv6 *mask = item->mask;
1420 	struct mlx5_flow_parse *parser = data->parser;
1421 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1422 	struct ibv_flow_spec_ipv6 ipv6 = {
1423 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1424 		.size = ipv6_size,
1425 	};
1426 
1427 	/* Don't update layer for the inner pattern. */
1428 	if (!parser->inner)
1429 		parser->layer = HASH_RXQ_IPV6;
1430 	if (spec) {
1431 		unsigned int i;
1432 		uint32_t vtc_flow_val;
1433 		uint32_t vtc_flow_mask;
1434 
1435 		if (!mask)
1436 			mask = default_mask;
1437 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1438 		       RTE_DIM(ipv6.val.src_ip));
1439 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1440 		       RTE_DIM(ipv6.val.dst_ip));
1441 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1442 		       RTE_DIM(ipv6.mask.src_ip));
1443 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1444 		       RTE_DIM(ipv6.mask.dst_ip));
1445 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1446 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1447 		ipv6.val.flow_label =
1448 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1449 					 IPV6_HDR_FL_SHIFT);
1450 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1451 					 IPV6_HDR_TC_SHIFT;
1452 		ipv6.val.next_hdr = spec->hdr.proto;
1453 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1454 		ipv6.mask.flow_label =
1455 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1456 					 IPV6_HDR_FL_SHIFT);
1457 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1458 					  IPV6_HDR_TC_SHIFT;
1459 		ipv6.mask.next_hdr = mask->hdr.proto;
1460 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1461 		/* Remove unwanted bits from values. */
1462 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1463 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1464 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1465 		}
1466 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1467 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1468 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1469 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1470 	}
1471 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1472 	return 0;
1473 }
1474 
1475 /**
1476  * Convert UDP item to Verbs specification.
1477  *
1478  * @param item[in]
1479  *   Item specification.
1480  * @param default_mask[in]
1481  *   Default bit-masks to use when item->mask is not provided.
1482  * @param data[in, out]
1483  *   User structure.
1484  *
1485  * @return
1486  *   0 on success, a negative errno value otherwise and rte_errno is set.
1487  */
1488 static int
1489 mlx5_flow_create_udp(const struct rte_flow_item *item,
1490 		     const void *default_mask,
1491 		     struct mlx5_flow_data *data)
1492 {
1493 	const struct rte_flow_item_udp *spec = item->spec;
1494 	const struct rte_flow_item_udp *mask = item->mask;
1495 	struct mlx5_flow_parse *parser = data->parser;
1496 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1497 	struct ibv_flow_spec_tcp_udp udp = {
1498 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1499 		.size = udp_size,
1500 	};
1501 
1502 	/* Don't update layer for the inner pattern. */
1503 	if (!parser->inner) {
1504 		if (parser->layer == HASH_RXQ_IPV4)
1505 			parser->layer = HASH_RXQ_UDPV4;
1506 		else
1507 			parser->layer = HASH_RXQ_UDPV6;
1508 	}
1509 	if (spec) {
1510 		if (!mask)
1511 			mask = default_mask;
1512 		udp.val.dst_port = spec->hdr.dst_port;
1513 		udp.val.src_port = spec->hdr.src_port;
1514 		udp.mask.dst_port = mask->hdr.dst_port;
1515 		udp.mask.src_port = mask->hdr.src_port;
1516 		/* Remove unwanted bits from values. */
1517 		udp.val.src_port &= udp.mask.src_port;
1518 		udp.val.dst_port &= udp.mask.dst_port;
1519 	}
1520 	mlx5_flow_create_copy(parser, &udp, udp_size);
1521 	return 0;
1522 }
1523 
1524 /**
1525  * Convert TCP item to Verbs specification.
1526  *
1527  * @param item[in]
1528  *   Item specification.
1529  * @param default_mask[in]
1530  *   Default bit-masks to use when item->mask is not provided.
1531  * @param data[in, out]
1532  *   User structure.
1533  *
1534  * @return
1535  *   0 on success, a negative errno value otherwise and rte_errno is set.
1536  */
1537 static int
1538 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1539 		     const void *default_mask,
1540 		     struct mlx5_flow_data *data)
1541 {
1542 	const struct rte_flow_item_tcp *spec = item->spec;
1543 	const struct rte_flow_item_tcp *mask = item->mask;
1544 	struct mlx5_flow_parse *parser = data->parser;
1545 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1546 	struct ibv_flow_spec_tcp_udp tcp = {
1547 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1548 		.size = tcp_size,
1549 	};
1550 
1551 	/* Don't update layer for the inner pattern. */
1552 	if (!parser->inner) {
1553 		if (parser->layer == HASH_RXQ_IPV4)
1554 			parser->layer = HASH_RXQ_TCPV4;
1555 		else
1556 			parser->layer = HASH_RXQ_TCPV6;
1557 	}
1558 	if (spec) {
1559 		if (!mask)
1560 			mask = default_mask;
1561 		tcp.val.dst_port = spec->hdr.dst_port;
1562 		tcp.val.src_port = spec->hdr.src_port;
1563 		tcp.mask.dst_port = mask->hdr.dst_port;
1564 		tcp.mask.src_port = mask->hdr.src_port;
1565 		/* Remove unwanted bits from values. */
1566 		tcp.val.src_port &= tcp.mask.src_port;
1567 		tcp.val.dst_port &= tcp.mask.dst_port;
1568 	}
1569 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1570 	return 0;
1571 }
1572 
1573 /**
1574  * Convert VXLAN item to Verbs specification.
1575  *
1576  * @param item[in]
1577  *   Item specification.
1578  * @param default_mask[in]
1579  *   Default bit-masks to use when item->mask is not provided.
1580  * @param data[in, out]
1581  *   User structure.
1582  *
1583  * @return
1584  *   0 on success, a negative errno value otherwise and rte_errno is set.
1585  */
1586 static int
1587 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1588 		       const void *default_mask,
1589 		       struct mlx5_flow_data *data)
1590 {
1591 	const struct rte_flow_item_vxlan *spec = item->spec;
1592 	const struct rte_flow_item_vxlan *mask = item->mask;
1593 	struct mlx5_flow_parse *parser = data->parser;
1594 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1595 	struct ibv_flow_spec_tunnel vxlan = {
1596 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1597 		.size = size,
1598 	};
1599 	union vni {
1600 		uint32_t vlan_id;
1601 		uint8_t vni[4];
1602 	} id;
1603 
1604 	id.vni[0] = 0;
1605 	parser->inner = IBV_FLOW_SPEC_INNER;
1606 	if (spec) {
1607 		if (!mask)
1608 			mask = default_mask;
1609 		memcpy(&id.vni[1], spec->vni, 3);
1610 		vxlan.val.tunnel_id = id.vlan_id;
1611 		memcpy(&id.vni[1], mask->vni, 3);
1612 		vxlan.mask.tunnel_id = id.vlan_id;
1613 		/* Remove unwanted bits from values. */
1614 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1615 	}
1616 	/*
1617 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1618 	 * layer is defined in the Verbs specification it is interpreted as
1619 	 * wildcard and all packets will match this rule, if it follows a full
1620 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1621 	 * before will also match this rule.
1622 	 * To avoid such situation, VNI 0 is currently refused.
1623 	 */
1624 	if (!vxlan.val.tunnel_id)
1625 		return rte_flow_error_set(data->error, EINVAL,
1626 					  RTE_FLOW_ERROR_TYPE_ITEM,
1627 					  item,
1628 					  "VxLAN vni cannot be 0");
1629 	mlx5_flow_create_copy(parser, &vxlan, size);
1630 	return 0;
1631 }
1632 
1633 /**
1634  * Convert mark/flag action to Verbs specification.
1635  *
1636  * @param parser
1637  *   Internal parser structure.
1638  * @param mark_id
1639  *   Mark identifier.
1640  *
1641  * @return
1642  *   0 on success, a negative errno value otherwise and rte_errno is set.
1643  */
1644 static int
1645 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1646 {
1647 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1648 	struct ibv_flow_spec_action_tag tag = {
1649 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1650 		.size = size,
1651 		.tag_id = mlx5_flow_mark_set(mark_id),
1652 	};
1653 
1654 	assert(parser->mark);
1655 	mlx5_flow_create_copy(parser, &tag, size);
1656 	return 0;
1657 }
1658 
1659 /**
1660  * Convert count action to Verbs specification.
1661  *
1662  * @param dev
1663  *   Pointer to Ethernet device.
1664  * @param parser
1665  *   Pointer to MLX5 flow parser structure.
1666  *
1667  * @return
1668  *   0 on success, a negative errno value otherwise and rte_errno is set.
1669  */
1670 static int
1671 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1672 		       struct mlx5_flow_parse *parser __rte_unused)
1673 {
1674 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1675 	struct priv *priv = dev->data->dev_private;
1676 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1677 	struct ibv_counter_set_init_attr init_attr = {0};
1678 	struct ibv_flow_spec_counter_action counter = {
1679 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1680 		.size = size,
1681 		.counter_set_handle = 0,
1682 	};
1683 
1684 	init_attr.counter_set_id = 0;
1685 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1686 	if (!parser->cs) {
1687 		rte_errno = EINVAL;
1688 		return -rte_errno;
1689 	}
1690 	counter.counter_set_handle = parser->cs->handle;
1691 	mlx5_flow_create_copy(parser, &counter, size);
1692 #endif
1693 	return 0;
1694 }
1695 
1696 /**
1697  * Complete flow rule creation with a drop queue.
1698  *
1699  * @param dev
1700  *   Pointer to Ethernet device.
1701  * @param parser
1702  *   Internal parser structure.
1703  * @param flow
1704  *   Pointer to the rte_flow.
1705  * @param[out] error
1706  *   Perform verbose error reporting if not NULL.
1707  *
1708  * @return
1709  *   0 on success, a negative errno value otherwise and rte_errno is set.
1710  */
1711 static int
1712 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1713 				   struct mlx5_flow_parse *parser,
1714 				   struct rte_flow *flow,
1715 				   struct rte_flow_error *error)
1716 {
1717 	struct priv *priv = dev->data->dev_private;
1718 	struct ibv_flow_spec_action_drop *drop;
1719 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1720 
1721 	assert(priv->pd);
1722 	assert(priv->ctx);
1723 	flow->drop = 1;
1724 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1725 			parser->queue[HASH_RXQ_ETH].offset);
1726 	*drop = (struct ibv_flow_spec_action_drop){
1727 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1728 			.size = size,
1729 	};
1730 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1731 	parser->queue[HASH_RXQ_ETH].offset += size;
1732 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1733 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1734 	if (parser->count)
1735 		flow->cs = parser->cs;
1736 	if (!priv->dev->data->dev_started)
1737 		return 0;
1738 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1739 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1740 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1741 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
1742 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1743 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1744 				   NULL, "flow rule creation failure");
1745 		goto error;
1746 	}
1747 	return 0;
1748 error:
1749 	assert(flow);
1750 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1751 		claim_zero(mlx5_glue->destroy_flow
1752 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1753 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1754 	}
1755 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1756 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1757 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1758 	}
1759 	if (flow->cs) {
1760 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1761 		flow->cs = NULL;
1762 		parser->cs = NULL;
1763 	}
1764 	return -rte_errno;
1765 }
1766 
1767 /**
1768  * Create hash Rx queues when RSS is enabled.
1769  *
1770  * @param dev
1771  *   Pointer to Ethernet device.
1772  * @param parser
1773  *   Internal parser structure.
1774  * @param flow
1775  *   Pointer to the rte_flow.
1776  * @param[out] error
1777  *   Perform verbose error reporting if not NULL.
1778  *
1779  * @return
1780  *   0 on success, a negative errno value otherwise and rte_errno is set.
1781  */
1782 static int
1783 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1784 				  struct mlx5_flow_parse *parser,
1785 				  struct rte_flow *flow,
1786 				  struct rte_flow_error *error)
1787 {
1788 	struct priv *priv = dev->data->dev_private;
1789 	unsigned int i;
1790 
1791 	for (i = 0; i != hash_rxq_init_n; ++i) {
1792 		uint64_t hash_fields;
1793 
1794 		if (!parser->queue[i].ibv_attr)
1795 			continue;
1796 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1797 		parser->queue[i].ibv_attr = NULL;
1798 		hash_fields = hash_rxq_init[i].hash_fields;
1799 		if (!priv->dev->data->dev_started)
1800 			continue;
1801 		flow->frxq[i].hrxq =
1802 			mlx5_hrxq_get(dev,
1803 				      parser->rss_conf.key,
1804 				      parser->rss_conf.key_len,
1805 				      hash_fields,
1806 				      parser->rss_conf.queue,
1807 				      parser->rss_conf.queue_num);
1808 		if (flow->frxq[i].hrxq)
1809 			continue;
1810 		flow->frxq[i].hrxq =
1811 			mlx5_hrxq_new(dev,
1812 				      parser->rss_conf.key,
1813 				      parser->rss_conf.key_len,
1814 				      hash_fields,
1815 				      parser->rss_conf.queue,
1816 				      parser->rss_conf.queue_num);
1817 		if (!flow->frxq[i].hrxq) {
1818 			return rte_flow_error_set(error, ENOMEM,
1819 						  RTE_FLOW_ERROR_TYPE_HANDLE,
1820 						  NULL,
1821 						  "cannot create hash rxq");
1822 		}
1823 	}
1824 	return 0;
1825 }
1826 
1827 /**
1828  * Complete flow rule creation.
1829  *
1830  * @param dev
1831  *   Pointer to Ethernet device.
1832  * @param parser
1833  *   Internal parser structure.
1834  * @param flow
1835  *   Pointer to the rte_flow.
1836  * @param[out] error
1837  *   Perform verbose error reporting if not NULL.
1838  *
1839  * @return
1840  *   0 on success, a negative errno value otherwise and rte_errno is set.
1841  */
1842 static int
1843 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1844 			      struct mlx5_flow_parse *parser,
1845 			      struct rte_flow *flow,
1846 			      struct rte_flow_error *error)
1847 {
1848 	struct priv *priv = dev->data->dev_private;
1849 	int ret;
1850 	unsigned int i;
1851 	unsigned int flows_n = 0;
1852 
1853 	assert(priv->pd);
1854 	assert(priv->ctx);
1855 	assert(!parser->drop);
1856 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1857 	if (ret)
1858 		goto error;
1859 	if (parser->count)
1860 		flow->cs = parser->cs;
1861 	if (!priv->dev->data->dev_started)
1862 		return 0;
1863 	for (i = 0; i != hash_rxq_init_n; ++i) {
1864 		if (!flow->frxq[i].hrxq)
1865 			continue;
1866 		flow->frxq[i].ibv_flow =
1867 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1868 					       flow->frxq[i].ibv_attr);
1869 		if (!flow->frxq[i].ibv_flow) {
1870 			rte_flow_error_set(error, ENOMEM,
1871 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1872 					   NULL, "flow rule creation failure");
1873 			goto error;
1874 		}
1875 		++flows_n;
1876 		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1877 			dev->data->port_id,
1878 			(void *)flow, i,
1879 			(void *)flow->frxq[i].hrxq,
1880 			(void *)flow->frxq[i].ibv_flow);
1881 	}
1882 	if (!flows_n) {
1883 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1884 				   NULL, "internal error in flow creation");
1885 		goto error;
1886 	}
1887 	for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1888 		struct mlx5_rxq_data *q =
1889 			(*priv->rxqs)[parser->rss_conf.queue[i]];
1890 
1891 		q->mark |= parser->mark;
1892 	}
1893 	return 0;
1894 error:
1895 	ret = rte_errno; /* Save rte_errno before cleanup. */
1896 	assert(flow);
1897 	for (i = 0; i != hash_rxq_init_n; ++i) {
1898 		if (flow->frxq[i].ibv_flow) {
1899 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1900 
1901 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1902 		}
1903 		if (flow->frxq[i].hrxq)
1904 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1905 		if (flow->frxq[i].ibv_attr)
1906 			rte_free(flow->frxq[i].ibv_attr);
1907 	}
1908 	if (flow->cs) {
1909 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1910 		flow->cs = NULL;
1911 		parser->cs = NULL;
1912 	}
1913 	rte_errno = ret; /* Restore rte_errno. */
1914 	return -rte_errno;
1915 }
1916 
1917 /**
1918  * Convert a flow.
1919  *
1920  * @param dev
1921  *   Pointer to Ethernet device.
1922  * @param list
1923  *   Pointer to a TAILQ flow list.
1924  * @param[in] attr
1925  *   Flow rule attributes.
1926  * @param[in] pattern
1927  *   Pattern specification (list terminated by the END pattern item).
1928  * @param[in] actions
1929  *   Associated actions (list terminated by the END action).
1930  * @param[out] error
1931  *   Perform verbose error reporting if not NULL.
1932  *
1933  * @return
1934  *   A flow on success, NULL otherwise and rte_errno is set.
1935  */
1936 static struct rte_flow *
1937 mlx5_flow_list_create(struct rte_eth_dev *dev,
1938 		      struct mlx5_flows *list,
1939 		      const struct rte_flow_attr *attr,
1940 		      const struct rte_flow_item items[],
1941 		      const struct rte_flow_action actions[],
1942 		      struct rte_flow_error *error)
1943 {
1944 	struct mlx5_flow_parse parser = { .create = 1, };
1945 	struct rte_flow *flow = NULL;
1946 	unsigned int i;
1947 	int ret;
1948 
1949 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1950 	if (ret)
1951 		goto exit;
1952 	flow = rte_calloc(__func__, 1,
1953 			  sizeof(*flow) +
1954 			  parser.rss_conf.queue_num * sizeof(uint16_t),
1955 			  0);
1956 	if (!flow) {
1957 		rte_flow_error_set(error, ENOMEM,
1958 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1959 				   NULL,
1960 				   "cannot allocate flow memory");
1961 		return NULL;
1962 	}
1963 	/* Copy configuration. */
1964 	flow->queues = (uint16_t (*)[])(flow + 1);
1965 	flow->rss_conf = (struct rte_flow_action_rss){
1966 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1967 		.level = 0,
1968 		.types = parser.rss_conf.types,
1969 		.key_len = parser.rss_conf.key_len,
1970 		.queue_num = parser.rss_conf.queue_num,
1971 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
1972 			      sizeof(*parser.rss_conf.key) *
1973 			      parser.rss_conf.key_len),
1974 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
1975 				sizeof(*parser.rss_conf.queue) *
1976 				parser.rss_conf.queue_num),
1977 	};
1978 	flow->mark = parser.mark;
1979 	/* finalise the flow. */
1980 	if (parser.drop)
1981 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1982 							 error);
1983 	else
1984 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1985 	if (ret)
1986 		goto exit;
1987 	TAILQ_INSERT_TAIL(list, flow, next);
1988 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1989 		(void *)flow);
1990 	return flow;
1991 exit:
1992 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1993 		error->message);
1994 	for (i = 0; i != hash_rxq_init_n; ++i) {
1995 		if (parser.queue[i].ibv_attr)
1996 			rte_free(parser.queue[i].ibv_attr);
1997 	}
1998 	rte_free(flow);
1999 	return NULL;
2000 }
2001 
2002 /**
2003  * Validate a flow supported by the NIC.
2004  *
2005  * @see rte_flow_validate()
2006  * @see rte_flow_ops
2007  */
2008 int
2009 mlx5_flow_validate(struct rte_eth_dev *dev,
2010 		   const struct rte_flow_attr *attr,
2011 		   const struct rte_flow_item items[],
2012 		   const struct rte_flow_action actions[],
2013 		   struct rte_flow_error *error)
2014 {
2015 	struct mlx5_flow_parse parser = { .create = 0, };
2016 
2017 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2018 }
2019 
2020 /**
2021  * Create a flow.
2022  *
2023  * @see rte_flow_create()
2024  * @see rte_flow_ops
2025  */
2026 struct rte_flow *
2027 mlx5_flow_create(struct rte_eth_dev *dev,
2028 		 const struct rte_flow_attr *attr,
2029 		 const struct rte_flow_item items[],
2030 		 const struct rte_flow_action actions[],
2031 		 struct rte_flow_error *error)
2032 {
2033 	struct priv *priv = dev->data->dev_private;
2034 
2035 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2036 				     error);
2037 }
2038 
2039 /**
2040  * Destroy a flow in a list.
2041  *
2042  * @param dev
2043  *   Pointer to Ethernet device.
2044  * @param list
2045  *   Pointer to a TAILQ flow list.
2046  * @param[in] flow
2047  *   Flow to destroy.
2048  */
2049 static void
2050 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2051 		       struct rte_flow *flow)
2052 {
2053 	struct priv *priv = dev->data->dev_private;
2054 	unsigned int i;
2055 
2056 	if (flow->drop || !flow->mark)
2057 		goto free;
2058 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2059 		struct rte_flow *tmp;
2060 		int mark = 0;
2061 
2062 		/*
2063 		 * To remove the mark from the queue, the queue must not be
2064 		 * present in any other marked flow (RSS or not).
2065 		 */
2066 		TAILQ_FOREACH(tmp, list, next) {
2067 			unsigned int j;
2068 			uint16_t *tqs = NULL;
2069 			uint16_t tq_n = 0;
2070 
2071 			if (!tmp->mark)
2072 				continue;
2073 			for (j = 0; j != hash_rxq_init_n; ++j) {
2074 				if (!tmp->frxq[j].hrxq)
2075 					continue;
2076 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2077 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2078 			}
2079 			if (!tq_n)
2080 				continue;
2081 			for (j = 0; (j != tq_n) && !mark; j++)
2082 				if (tqs[j] == (*flow->queues)[i])
2083 					mark = 1;
2084 		}
2085 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2086 	}
2087 free:
2088 	if (flow->drop) {
2089 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2090 			claim_zero(mlx5_glue->destroy_flow
2091 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2092 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2093 	} else {
2094 		for (i = 0; i != hash_rxq_init_n; ++i) {
2095 			struct mlx5_flow *frxq = &flow->frxq[i];
2096 
2097 			if (frxq->ibv_flow)
2098 				claim_zero(mlx5_glue->destroy_flow
2099 					   (frxq->ibv_flow));
2100 			if (frxq->hrxq)
2101 				mlx5_hrxq_release(dev, frxq->hrxq);
2102 			if (frxq->ibv_attr)
2103 				rte_free(frxq->ibv_attr);
2104 		}
2105 	}
2106 	if (flow->cs) {
2107 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2108 		flow->cs = NULL;
2109 	}
2110 	TAILQ_REMOVE(list, flow, next);
2111 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2112 		(void *)flow);
2113 	rte_free(flow);
2114 }
2115 
2116 /**
2117  * Destroy all flows.
2118  *
2119  * @param dev
2120  *   Pointer to Ethernet device.
2121  * @param list
2122  *   Pointer to a TAILQ flow list.
2123  */
2124 void
2125 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2126 {
2127 	while (!TAILQ_EMPTY(list)) {
2128 		struct rte_flow *flow;
2129 
2130 		flow = TAILQ_FIRST(list);
2131 		mlx5_flow_list_destroy(dev, list, flow);
2132 	}
2133 }
2134 
2135 /**
2136  * Create drop queue.
2137  *
2138  * @param dev
2139  *   Pointer to Ethernet device.
2140  *
2141  * @return
2142  *   0 on success, a negative errno value otherwise and rte_errno is set.
2143  */
2144 int
2145 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2146 {
2147 	struct priv *priv = dev->data->dev_private;
2148 	struct mlx5_hrxq_drop *fdq = NULL;
2149 
2150 	assert(priv->pd);
2151 	assert(priv->ctx);
2152 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2153 	if (!fdq) {
2154 		DRV_LOG(WARNING,
2155 			"port %u cannot allocate memory for drop queue",
2156 			dev->data->port_id);
2157 		rte_errno = ENOMEM;
2158 		return -rte_errno;
2159 	}
2160 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2161 	if (!fdq->cq) {
2162 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2163 			dev->data->port_id);
2164 		rte_errno = errno;
2165 		goto error;
2166 	}
2167 	fdq->wq = mlx5_glue->create_wq
2168 		(priv->ctx,
2169 		 &(struct ibv_wq_init_attr){
2170 			.wq_type = IBV_WQT_RQ,
2171 			.max_wr = 1,
2172 			.max_sge = 1,
2173 			.pd = priv->pd,
2174 			.cq = fdq->cq,
2175 		 });
2176 	if (!fdq->wq) {
2177 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2178 			dev->data->port_id);
2179 		rte_errno = errno;
2180 		goto error;
2181 	}
2182 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2183 		(priv->ctx,
2184 		 &(struct ibv_rwq_ind_table_init_attr){
2185 			.log_ind_tbl_size = 0,
2186 			.ind_tbl = &fdq->wq,
2187 			.comp_mask = 0,
2188 		 });
2189 	if (!fdq->ind_table) {
2190 		DRV_LOG(WARNING,
2191 			"port %u cannot allocate indirection table for drop"
2192 			" queue",
2193 			dev->data->port_id);
2194 		rte_errno = errno;
2195 		goto error;
2196 	}
2197 	fdq->qp = mlx5_glue->create_qp_ex
2198 		(priv->ctx,
2199 		 &(struct ibv_qp_init_attr_ex){
2200 			.qp_type = IBV_QPT_RAW_PACKET,
2201 			.comp_mask =
2202 				IBV_QP_INIT_ATTR_PD |
2203 				IBV_QP_INIT_ATTR_IND_TABLE |
2204 				IBV_QP_INIT_ATTR_RX_HASH,
2205 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2206 				.rx_hash_function =
2207 					IBV_RX_HASH_FUNC_TOEPLITZ,
2208 				.rx_hash_key_len = rss_hash_default_key_len,
2209 				.rx_hash_key = rss_hash_default_key,
2210 				.rx_hash_fields_mask = 0,
2211 				},
2212 			.rwq_ind_tbl = fdq->ind_table,
2213 			.pd = priv->pd
2214 		 });
2215 	if (!fdq->qp) {
2216 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2217 			dev->data->port_id);
2218 		rte_errno = errno;
2219 		goto error;
2220 	}
2221 	priv->flow_drop_queue = fdq;
2222 	return 0;
2223 error:
2224 	if (fdq->qp)
2225 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2226 	if (fdq->ind_table)
2227 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2228 	if (fdq->wq)
2229 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2230 	if (fdq->cq)
2231 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2232 	if (fdq)
2233 		rte_free(fdq);
2234 	priv->flow_drop_queue = NULL;
2235 	return -rte_errno;
2236 }
2237 
2238 /**
2239  * Delete drop queue.
2240  *
2241  * @param dev
2242  *   Pointer to Ethernet device.
2243  */
2244 void
2245 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2246 {
2247 	struct priv *priv = dev->data->dev_private;
2248 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2249 
2250 	if (!fdq)
2251 		return;
2252 	if (fdq->qp)
2253 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2254 	if (fdq->ind_table)
2255 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2256 	if (fdq->wq)
2257 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2258 	if (fdq->cq)
2259 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2260 	rte_free(fdq);
2261 	priv->flow_drop_queue = NULL;
2262 }
2263 
2264 /**
2265  * Remove all flows.
2266  *
2267  * @param dev
2268  *   Pointer to Ethernet device.
2269  * @param list
2270  *   Pointer to a TAILQ flow list.
2271  */
2272 void
2273 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2274 {
2275 	struct priv *priv = dev->data->dev_private;
2276 	struct rte_flow *flow;
2277 
2278 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2279 		unsigned int i;
2280 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2281 
2282 		if (flow->drop) {
2283 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2284 				continue;
2285 			claim_zero(mlx5_glue->destroy_flow
2286 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2287 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2288 			DRV_LOG(DEBUG, "port %u flow %p removed",
2289 				dev->data->port_id, (void *)flow);
2290 			/* Next flow. */
2291 			continue;
2292 		}
2293 		/* Verify the flow has not already been cleaned. */
2294 		for (i = 0; i != hash_rxq_init_n; ++i) {
2295 			if (!flow->frxq[i].ibv_flow)
2296 				continue;
2297 			/*
2298 			 * Indirection table may be necessary to remove the
2299 			 * flags in the Rx queues.
2300 			 * This helps to speed-up the process by avoiding
2301 			 * another loop.
2302 			 */
2303 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2304 			break;
2305 		}
2306 		if (i == hash_rxq_init_n)
2307 			return;
2308 		if (flow->mark) {
2309 			assert(ind_tbl);
2310 			for (i = 0; i != ind_tbl->queues_n; ++i)
2311 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2312 		}
2313 		for (i = 0; i != hash_rxq_init_n; ++i) {
2314 			if (!flow->frxq[i].ibv_flow)
2315 				continue;
2316 			claim_zero(mlx5_glue->destroy_flow
2317 				   (flow->frxq[i].ibv_flow));
2318 			flow->frxq[i].ibv_flow = NULL;
2319 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2320 			flow->frxq[i].hrxq = NULL;
2321 		}
2322 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2323 			(void *)flow);
2324 	}
2325 }
2326 
2327 /**
2328  * Add all flows.
2329  *
2330  * @param dev
2331  *   Pointer to Ethernet device.
2332  * @param list
2333  *   Pointer to a TAILQ flow list.
2334  *
2335  * @return
2336  *   0 on success, a negative errno value otherwise and rte_errno is set.
2337  */
2338 int
2339 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2340 {
2341 	struct priv *priv = dev->data->dev_private;
2342 	struct rte_flow *flow;
2343 
2344 	TAILQ_FOREACH(flow, list, next) {
2345 		unsigned int i;
2346 
2347 		if (flow->drop) {
2348 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2349 				mlx5_glue->create_flow
2350 				(priv->flow_drop_queue->qp,
2351 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2352 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2353 				DRV_LOG(DEBUG,
2354 					"port %u flow %p cannot be applied",
2355 					dev->data->port_id, (void *)flow);
2356 				rte_errno = EINVAL;
2357 				return -rte_errno;
2358 			}
2359 			DRV_LOG(DEBUG, "port %u flow %p applied",
2360 				dev->data->port_id, (void *)flow);
2361 			/* Next flow. */
2362 			continue;
2363 		}
2364 		for (i = 0; i != hash_rxq_init_n; ++i) {
2365 			if (!flow->frxq[i].ibv_attr)
2366 				continue;
2367 			flow->frxq[i].hrxq =
2368 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2369 					      flow->rss_conf.key_len,
2370 					      hash_rxq_init[i].hash_fields,
2371 					      flow->rss_conf.queue,
2372 					      flow->rss_conf.queue_num);
2373 			if (flow->frxq[i].hrxq)
2374 				goto flow_create;
2375 			flow->frxq[i].hrxq =
2376 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2377 					      flow->rss_conf.key_len,
2378 					      hash_rxq_init[i].hash_fields,
2379 					      flow->rss_conf.queue,
2380 					      flow->rss_conf.queue_num);
2381 			if (!flow->frxq[i].hrxq) {
2382 				DRV_LOG(DEBUG,
2383 					"port %u flow %p cannot be applied",
2384 					dev->data->port_id, (void *)flow);
2385 				rte_errno = EINVAL;
2386 				return -rte_errno;
2387 			}
2388 flow_create:
2389 			flow->frxq[i].ibv_flow =
2390 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2391 						       flow->frxq[i].ibv_attr);
2392 			if (!flow->frxq[i].ibv_flow) {
2393 				DRV_LOG(DEBUG,
2394 					"port %u flow %p cannot be applied",
2395 					dev->data->port_id, (void *)flow);
2396 				rte_errno = EINVAL;
2397 				return -rte_errno;
2398 			}
2399 			DRV_LOG(DEBUG, "port %u flow %p applied",
2400 				dev->data->port_id, (void *)flow);
2401 		}
2402 		if (!flow->mark)
2403 			continue;
2404 		for (i = 0; i != flow->rss_conf.queue_num; ++i)
2405 			(*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2406 	}
2407 	return 0;
2408 }
2409 
2410 /**
2411  * Verify the flow list is empty
2412  *
2413  * @param dev
2414  *  Pointer to Ethernet device.
2415  *
2416  * @return the number of flows not released.
2417  */
2418 int
2419 mlx5_flow_verify(struct rte_eth_dev *dev)
2420 {
2421 	struct priv *priv = dev->data->dev_private;
2422 	struct rte_flow *flow;
2423 	int ret = 0;
2424 
2425 	TAILQ_FOREACH(flow, &priv->flows, next) {
2426 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2427 			dev->data->port_id, (void *)flow);
2428 		++ret;
2429 	}
2430 	return ret;
2431 }
2432 
2433 /**
2434  * Enable a control flow configured from the control plane.
2435  *
2436  * @param dev
2437  *   Pointer to Ethernet device.
2438  * @param eth_spec
2439  *   An Ethernet flow spec to apply.
2440  * @param eth_mask
2441  *   An Ethernet flow mask to apply.
2442  * @param vlan_spec
2443  *   A VLAN flow spec to apply.
2444  * @param vlan_mask
2445  *   A VLAN flow mask to apply.
2446  *
2447  * @return
2448  *   0 on success, a negative errno value otherwise and rte_errno is set.
2449  */
2450 int
2451 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2452 		    struct rte_flow_item_eth *eth_spec,
2453 		    struct rte_flow_item_eth *eth_mask,
2454 		    struct rte_flow_item_vlan *vlan_spec,
2455 		    struct rte_flow_item_vlan *vlan_mask)
2456 {
2457 	struct priv *priv = dev->data->dev_private;
2458 	const struct rte_flow_attr attr = {
2459 		.ingress = 1,
2460 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2461 	};
2462 	struct rte_flow_item items[] = {
2463 		{
2464 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2465 			.spec = eth_spec,
2466 			.last = NULL,
2467 			.mask = eth_mask,
2468 		},
2469 		{
2470 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2471 				RTE_FLOW_ITEM_TYPE_END,
2472 			.spec = vlan_spec,
2473 			.last = NULL,
2474 			.mask = vlan_mask,
2475 		},
2476 		{
2477 			.type = RTE_FLOW_ITEM_TYPE_END,
2478 		},
2479 	};
2480 	uint16_t queue[priv->reta_idx_n];
2481 	struct rte_flow_action_rss action_rss = {
2482 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2483 		.level = 0,
2484 		.types = priv->rss_conf.rss_hf,
2485 		.key_len = priv->rss_conf.rss_key_len,
2486 		.queue_num = priv->reta_idx_n,
2487 		.key = priv->rss_conf.rss_key,
2488 		.queue = queue,
2489 	};
2490 	struct rte_flow_action actions[] = {
2491 		{
2492 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2493 			.conf = &action_rss,
2494 		},
2495 		{
2496 			.type = RTE_FLOW_ACTION_TYPE_END,
2497 		},
2498 	};
2499 	struct rte_flow *flow;
2500 	struct rte_flow_error error;
2501 	unsigned int i;
2502 
2503 	if (!priv->reta_idx_n) {
2504 		rte_errno = EINVAL;
2505 		return -rte_errno;
2506 	}
2507 	for (i = 0; i != priv->reta_idx_n; ++i)
2508 		queue[i] = (*priv->reta_idx)[i];
2509 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2510 				     actions, &error);
2511 	if (!flow)
2512 		return -rte_errno;
2513 	return 0;
2514 }
2515 
2516 /**
2517  * Enable a flow control configured from the control plane.
2518  *
2519  * @param dev
2520  *   Pointer to Ethernet device.
2521  * @param eth_spec
2522  *   An Ethernet flow spec to apply.
2523  * @param eth_mask
2524  *   An Ethernet flow mask to apply.
2525  *
2526  * @return
2527  *   0 on success, a negative errno value otherwise and rte_errno is set.
2528  */
2529 int
2530 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2531 	       struct rte_flow_item_eth *eth_spec,
2532 	       struct rte_flow_item_eth *eth_mask)
2533 {
2534 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2535 }
2536 
2537 /**
2538  * Destroy a flow.
2539  *
2540  * @see rte_flow_destroy()
2541  * @see rte_flow_ops
2542  */
2543 int
2544 mlx5_flow_destroy(struct rte_eth_dev *dev,
2545 		  struct rte_flow *flow,
2546 		  struct rte_flow_error *error __rte_unused)
2547 {
2548 	struct priv *priv = dev->data->dev_private;
2549 
2550 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2551 	return 0;
2552 }
2553 
2554 /**
2555  * Destroy all flows.
2556  *
2557  * @see rte_flow_flush()
2558  * @see rte_flow_ops
2559  */
2560 int
2561 mlx5_flow_flush(struct rte_eth_dev *dev,
2562 		struct rte_flow_error *error __rte_unused)
2563 {
2564 	struct priv *priv = dev->data->dev_private;
2565 
2566 	mlx5_flow_list_flush(dev, &priv->flows);
2567 	return 0;
2568 }
2569 
2570 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2571 /**
2572  * Query flow counter.
2573  *
2574  * @param cs
2575  *   the counter set.
2576  * @param counter_value
2577  *   returned data from the counter.
2578  *
2579  * @return
2580  *   0 on success, a negative errno value otherwise and rte_errno is set.
2581  */
2582 static int
2583 mlx5_flow_query_count(struct ibv_counter_set *cs,
2584 		      struct mlx5_flow_counter_stats *counter_stats,
2585 		      struct rte_flow_query_count *query_count,
2586 		      struct rte_flow_error *error)
2587 {
2588 	uint64_t counters[2];
2589 	struct ibv_query_counter_set_attr query_cs_attr = {
2590 		.cs = cs,
2591 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2592 	};
2593 	struct ibv_counter_set_data query_out = {
2594 		.out = counters,
2595 		.outlen = 2 * sizeof(uint64_t),
2596 	};
2597 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2598 
2599 	if (err)
2600 		return rte_flow_error_set(error, err,
2601 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2602 					  NULL,
2603 					  "cannot read counter");
2604 	query_count->hits_set = 1;
2605 	query_count->bytes_set = 1;
2606 	query_count->hits = counters[0] - counter_stats->hits;
2607 	query_count->bytes = counters[1] - counter_stats->bytes;
2608 	if (query_count->reset) {
2609 		counter_stats->hits = counters[0];
2610 		counter_stats->bytes = counters[1];
2611 	}
2612 	return 0;
2613 }
2614 
2615 /**
2616  * Query a flows.
2617  *
2618  * @see rte_flow_query()
2619  * @see rte_flow_ops
2620  */
2621 int
2622 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2623 		struct rte_flow *flow,
2624 		enum rte_flow_action_type action __rte_unused,
2625 		void *data,
2626 		struct rte_flow_error *error)
2627 {
2628 	if (flow->cs) {
2629 		int ret;
2630 
2631 		ret = mlx5_flow_query_count(flow->cs,
2632 					    &flow->counter_stats,
2633 					    (struct rte_flow_query_count *)data,
2634 					    error);
2635 		if (ret)
2636 			return ret;
2637 	} else {
2638 		return rte_flow_error_set(error, EINVAL,
2639 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2640 					  NULL,
2641 					  "no counter found for flow");
2642 	}
2643 	return 0;
2644 }
2645 #endif
2646 
2647 /**
2648  * Isolated mode.
2649  *
2650  * @see rte_flow_isolate()
2651  * @see rte_flow_ops
2652  */
2653 int
2654 mlx5_flow_isolate(struct rte_eth_dev *dev,
2655 		  int enable,
2656 		  struct rte_flow_error *error)
2657 {
2658 	struct priv *priv = dev->data->dev_private;
2659 
2660 	if (dev->data->dev_started) {
2661 		rte_flow_error_set(error, EBUSY,
2662 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2663 				   NULL,
2664 				   "port must be stopped first");
2665 		return -rte_errno;
2666 	}
2667 	priv->isolated = !!enable;
2668 	if (enable)
2669 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2670 	else
2671 		priv->dev->dev_ops = &mlx5_dev_ops;
2672 	return 0;
2673 }
2674 
2675 /**
2676  * Convert a flow director filter to a generic flow.
2677  *
2678  * @param dev
2679  *   Pointer to Ethernet device.
2680  * @param fdir_filter
2681  *   Flow director filter to add.
2682  * @param attributes
2683  *   Generic flow parameters structure.
2684  *
2685  * @return
2686  *   0 on success, a negative errno value otherwise and rte_errno is set.
2687  */
2688 static int
2689 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2690 			 const struct rte_eth_fdir_filter *fdir_filter,
2691 			 struct mlx5_fdir *attributes)
2692 {
2693 	struct priv *priv = dev->data->dev_private;
2694 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2695 	const struct rte_eth_fdir_masks *mask =
2696 		&dev->data->dev_conf.fdir_conf.mask;
2697 
2698 	/* Validate queue number. */
2699 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2700 		DRV_LOG(ERR, "port %u invalid queue number %d",
2701 			dev->data->port_id, fdir_filter->action.rx_queue);
2702 		rte_errno = EINVAL;
2703 		return -rte_errno;
2704 	}
2705 	attributes->attr.ingress = 1;
2706 	attributes->items[0] = (struct rte_flow_item) {
2707 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2708 		.spec = &attributes->l2,
2709 		.mask = &attributes->l2_mask,
2710 	};
2711 	switch (fdir_filter->action.behavior) {
2712 	case RTE_ETH_FDIR_ACCEPT:
2713 		attributes->actions[0] = (struct rte_flow_action){
2714 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2715 			.conf = &attributes->queue,
2716 		};
2717 		break;
2718 	case RTE_ETH_FDIR_REJECT:
2719 		attributes->actions[0] = (struct rte_flow_action){
2720 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2721 		};
2722 		break;
2723 	default:
2724 		DRV_LOG(ERR, "port %u invalid behavior %d",
2725 			dev->data->port_id,
2726 			fdir_filter->action.behavior);
2727 		rte_errno = ENOTSUP;
2728 		return -rte_errno;
2729 	}
2730 	attributes->queue.index = fdir_filter->action.rx_queue;
2731 	/* Handle L3. */
2732 	switch (fdir_filter->input.flow_type) {
2733 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2734 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2735 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2736 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2737 			.src_addr = input->flow.ip4_flow.src_ip,
2738 			.dst_addr = input->flow.ip4_flow.dst_ip,
2739 			.time_to_live = input->flow.ip4_flow.ttl,
2740 			.type_of_service = input->flow.ip4_flow.tos,
2741 			.next_proto_id = input->flow.ip4_flow.proto,
2742 		};
2743 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2744 			.src_addr = mask->ipv4_mask.src_ip,
2745 			.dst_addr = mask->ipv4_mask.dst_ip,
2746 			.time_to_live = mask->ipv4_mask.ttl,
2747 			.type_of_service = mask->ipv4_mask.tos,
2748 			.next_proto_id = mask->ipv4_mask.proto,
2749 		};
2750 		attributes->items[1] = (struct rte_flow_item){
2751 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2752 			.spec = &attributes->l3,
2753 			.mask = &attributes->l3_mask,
2754 		};
2755 		break;
2756 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2757 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2758 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2759 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2760 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2761 			.proto = input->flow.ipv6_flow.proto,
2762 		};
2763 
2764 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2765 		       input->flow.ipv6_flow.src_ip,
2766 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2767 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2768 		       input->flow.ipv6_flow.dst_ip,
2769 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2770 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2771 		       mask->ipv6_mask.src_ip,
2772 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2773 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2774 		       mask->ipv6_mask.dst_ip,
2775 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2776 		attributes->items[1] = (struct rte_flow_item){
2777 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2778 			.spec = &attributes->l3,
2779 			.mask = &attributes->l3_mask,
2780 		};
2781 		break;
2782 	default:
2783 		DRV_LOG(ERR, "port %u invalid flow type%d",
2784 			dev->data->port_id, fdir_filter->input.flow_type);
2785 		rte_errno = ENOTSUP;
2786 		return -rte_errno;
2787 	}
2788 	/* Handle L4. */
2789 	switch (fdir_filter->input.flow_type) {
2790 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2791 		attributes->l4.udp.hdr = (struct udp_hdr){
2792 			.src_port = input->flow.udp4_flow.src_port,
2793 			.dst_port = input->flow.udp4_flow.dst_port,
2794 		};
2795 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2796 			.src_port = mask->src_port_mask,
2797 			.dst_port = mask->dst_port_mask,
2798 		};
2799 		attributes->items[2] = (struct rte_flow_item){
2800 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2801 			.spec = &attributes->l4,
2802 			.mask = &attributes->l4_mask,
2803 		};
2804 		break;
2805 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2806 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2807 			.src_port = input->flow.tcp4_flow.src_port,
2808 			.dst_port = input->flow.tcp4_flow.dst_port,
2809 		};
2810 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2811 			.src_port = mask->src_port_mask,
2812 			.dst_port = mask->dst_port_mask,
2813 		};
2814 		attributes->items[2] = (struct rte_flow_item){
2815 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2816 			.spec = &attributes->l4,
2817 			.mask = &attributes->l4_mask,
2818 		};
2819 		break;
2820 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2821 		attributes->l4.udp.hdr = (struct udp_hdr){
2822 			.src_port = input->flow.udp6_flow.src_port,
2823 			.dst_port = input->flow.udp6_flow.dst_port,
2824 		};
2825 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2826 			.src_port = mask->src_port_mask,
2827 			.dst_port = mask->dst_port_mask,
2828 		};
2829 		attributes->items[2] = (struct rte_flow_item){
2830 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2831 			.spec = &attributes->l4,
2832 			.mask = &attributes->l4_mask,
2833 		};
2834 		break;
2835 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2836 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2837 			.src_port = input->flow.tcp6_flow.src_port,
2838 			.dst_port = input->flow.tcp6_flow.dst_port,
2839 		};
2840 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2841 			.src_port = mask->src_port_mask,
2842 			.dst_port = mask->dst_port_mask,
2843 		};
2844 		attributes->items[2] = (struct rte_flow_item){
2845 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2846 			.spec = &attributes->l4,
2847 			.mask = &attributes->l4_mask,
2848 		};
2849 		break;
2850 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2851 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2852 		break;
2853 	default:
2854 		DRV_LOG(ERR, "port %u invalid flow type%d",
2855 			dev->data->port_id, fdir_filter->input.flow_type);
2856 		rte_errno = ENOTSUP;
2857 		return -rte_errno;
2858 	}
2859 	return 0;
2860 }
2861 
2862 /**
2863  * Add new flow director filter and store it in list.
2864  *
2865  * @param dev
2866  *   Pointer to Ethernet device.
2867  * @param fdir_filter
2868  *   Flow director filter to add.
2869  *
2870  * @return
2871  *   0 on success, a negative errno value otherwise and rte_errno is set.
2872  */
2873 static int
2874 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2875 		     const struct rte_eth_fdir_filter *fdir_filter)
2876 {
2877 	struct priv *priv = dev->data->dev_private;
2878 	struct mlx5_fdir attributes = {
2879 		.attr.group = 0,
2880 		.l2_mask = {
2881 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2882 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2883 			.type = 0,
2884 		},
2885 	};
2886 	struct mlx5_flow_parse parser = {
2887 		.layer = HASH_RXQ_ETH,
2888 	};
2889 	struct rte_flow_error error;
2890 	struct rte_flow *flow;
2891 	int ret;
2892 
2893 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2894 	if (ret)
2895 		return ret;
2896 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2897 				attributes.actions, &error, &parser);
2898 	if (ret)
2899 		return ret;
2900 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2901 				     attributes.items, attributes.actions,
2902 				     &error);
2903 	if (flow) {
2904 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2905 			(void *)flow);
2906 		return 0;
2907 	}
2908 	return -rte_errno;
2909 }
2910 
2911 /**
2912  * Delete specific filter.
2913  *
2914  * @param dev
2915  *   Pointer to Ethernet device.
2916  * @param fdir_filter
2917  *   Filter to be deleted.
2918  *
2919  * @return
2920  *   0 on success, a negative errno value otherwise and rte_errno is set.
2921  */
2922 static int
2923 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2924 			const struct rte_eth_fdir_filter *fdir_filter)
2925 {
2926 	struct priv *priv = dev->data->dev_private;
2927 	struct mlx5_fdir attributes = {
2928 		.attr.group = 0,
2929 	};
2930 	struct mlx5_flow_parse parser = {
2931 		.create = 1,
2932 		.layer = HASH_RXQ_ETH,
2933 	};
2934 	struct rte_flow_error error;
2935 	struct rte_flow *flow;
2936 	unsigned int i;
2937 	int ret;
2938 
2939 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2940 	if (ret)
2941 		return ret;
2942 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2943 				attributes.actions, &error, &parser);
2944 	if (ret)
2945 		goto exit;
2946 	/*
2947 	 * Special case for drop action which is only set in the
2948 	 * specifications when the flow is created.  In this situation the
2949 	 * drop specification is missing.
2950 	 */
2951 	if (parser.drop) {
2952 		struct ibv_flow_spec_action_drop *drop;
2953 
2954 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2955 				parser.queue[HASH_RXQ_ETH].offset);
2956 		*drop = (struct ibv_flow_spec_action_drop){
2957 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2958 			.size = sizeof(struct ibv_flow_spec_action_drop),
2959 		};
2960 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2961 	}
2962 	TAILQ_FOREACH(flow, &priv->flows, next) {
2963 		struct ibv_flow_attr *attr;
2964 		struct ibv_spec_header *attr_h;
2965 		void *spec;
2966 		struct ibv_flow_attr *flow_attr;
2967 		struct ibv_spec_header *flow_h;
2968 		void *flow_spec;
2969 		unsigned int specs_n;
2970 
2971 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2972 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2973 		/* Compare first the attributes. */
2974 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2975 			continue;
2976 		if (attr->num_of_specs == 0)
2977 			continue;
2978 		spec = (void *)((uintptr_t)attr +
2979 				sizeof(struct ibv_flow_attr));
2980 		flow_spec = (void *)((uintptr_t)flow_attr +
2981 				     sizeof(struct ibv_flow_attr));
2982 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2983 		for (i = 0; i != specs_n; ++i) {
2984 			attr_h = spec;
2985 			flow_h = flow_spec;
2986 			if (memcmp(spec, flow_spec,
2987 				   RTE_MIN(attr_h->size, flow_h->size)))
2988 				goto wrong_flow;
2989 			spec = (void *)((uintptr_t)spec + attr_h->size);
2990 			flow_spec = (void *)((uintptr_t)flow_spec +
2991 					     flow_h->size);
2992 		}
2993 		/* At this point, the flow match. */
2994 		break;
2995 wrong_flow:
2996 		/* The flow does not match. */
2997 		continue;
2998 	}
2999 	ret = rte_errno; /* Save rte_errno before cleanup. */
3000 	if (flow)
3001 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
3002 exit:
3003 	for (i = 0; i != hash_rxq_init_n; ++i) {
3004 		if (parser.queue[i].ibv_attr)
3005 			rte_free(parser.queue[i].ibv_attr);
3006 	}
3007 	rte_errno = ret; /* Restore rte_errno. */
3008 	return -rte_errno;
3009 }
3010 
3011 /**
3012  * Update queue for specific filter.
3013  *
3014  * @param dev
3015  *   Pointer to Ethernet device.
3016  * @param fdir_filter
3017  *   Filter to be updated.
3018  *
3019  * @return
3020  *   0 on success, a negative errno value otherwise and rte_errno is set.
3021  */
3022 static int
3023 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3024 			const struct rte_eth_fdir_filter *fdir_filter)
3025 {
3026 	int ret;
3027 
3028 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3029 	if (ret)
3030 		return ret;
3031 	return mlx5_fdir_filter_add(dev, fdir_filter);
3032 }
3033 
3034 /**
3035  * Flush all filters.
3036  *
3037  * @param dev
3038  *   Pointer to Ethernet device.
3039  */
3040 static void
3041 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3042 {
3043 	struct priv *priv = dev->data->dev_private;
3044 
3045 	mlx5_flow_list_flush(dev, &priv->flows);
3046 }
3047 
3048 /**
3049  * Get flow director information.
3050  *
3051  * @param dev
3052  *   Pointer to Ethernet device.
3053  * @param[out] fdir_info
3054  *   Resulting flow director information.
3055  */
3056 static void
3057 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3058 {
3059 	struct priv *priv = dev->data->dev_private;
3060 	struct rte_eth_fdir_masks *mask =
3061 		&priv->dev->data->dev_conf.fdir_conf.mask;
3062 
3063 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3064 	fdir_info->guarant_spc = 0;
3065 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3066 	fdir_info->max_flexpayload = 0;
3067 	fdir_info->flow_types_mask[0] = 0;
3068 	fdir_info->flex_payload_unit = 0;
3069 	fdir_info->max_flex_payload_segment_num = 0;
3070 	fdir_info->flex_payload_limit = 0;
3071 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3072 }
3073 
3074 /**
3075  * Deal with flow director operations.
3076  *
3077  * @param dev
3078  *   Pointer to Ethernet device.
3079  * @param filter_op
3080  *   Operation to perform.
3081  * @param arg
3082  *   Pointer to operation-specific structure.
3083  *
3084  * @return
3085  *   0 on success, a negative errno value otherwise and rte_errno is set.
3086  */
3087 static int
3088 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3089 		    void *arg)
3090 {
3091 	struct priv *priv = dev->data->dev_private;
3092 	enum rte_fdir_mode fdir_mode =
3093 		priv->dev->data->dev_conf.fdir_conf.mode;
3094 
3095 	if (filter_op == RTE_ETH_FILTER_NOP)
3096 		return 0;
3097 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3098 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3099 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3100 			dev->data->port_id, fdir_mode);
3101 		rte_errno = EINVAL;
3102 		return -rte_errno;
3103 	}
3104 	switch (filter_op) {
3105 	case RTE_ETH_FILTER_ADD:
3106 		return mlx5_fdir_filter_add(dev, arg);
3107 	case RTE_ETH_FILTER_UPDATE:
3108 		return mlx5_fdir_filter_update(dev, arg);
3109 	case RTE_ETH_FILTER_DELETE:
3110 		return mlx5_fdir_filter_delete(dev, arg);
3111 	case RTE_ETH_FILTER_FLUSH:
3112 		mlx5_fdir_filter_flush(dev);
3113 		break;
3114 	case RTE_ETH_FILTER_INFO:
3115 		mlx5_fdir_info_get(dev, arg);
3116 		break;
3117 	default:
3118 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3119 			dev->data->port_id, filter_op);
3120 		rte_errno = EINVAL;
3121 		return -rte_errno;
3122 	}
3123 	return 0;
3124 }
3125 
3126 /**
3127  * Manage filter operations.
3128  *
3129  * @param dev
3130  *   Pointer to Ethernet device structure.
3131  * @param filter_type
3132  *   Filter type.
3133  * @param filter_op
3134  *   Operation to perform.
3135  * @param arg
3136  *   Pointer to operation-specific structure.
3137  *
3138  * @return
3139  *   0 on success, a negative errno value otherwise and rte_errno is set.
3140  */
3141 int
3142 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3143 		     enum rte_filter_type filter_type,
3144 		     enum rte_filter_op filter_op,
3145 		     void *arg)
3146 {
3147 	switch (filter_type) {
3148 	case RTE_ETH_FILTER_GENERIC:
3149 		if (filter_op != RTE_ETH_FILTER_GET) {
3150 			rte_errno = EINVAL;
3151 			return -rte_errno;
3152 		}
3153 		*(const void **)arg = &mlx5_flow_ops;
3154 		return 0;
3155 	case RTE_ETH_FILTER_FDIR:
3156 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3157 	default:
3158 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3159 			dev->data->port_id, filter_type);
3160 		rte_errno = ENOTSUP;
3161 		return -rte_errno;
3162 	}
3163 	return 0;
3164 }
3165