xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision fd5baf09cdf9170e0f92a112fd0ef19c29649330)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <string.h>
8 
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18 
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24 
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29 
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32 
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36 
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39 	int dummy;
40 };
41 #endif
42 
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46 
47 /** Structure give to the conversion functions. */
48 struct mlx5_flow_data {
49 	struct mlx5_flow_parse *parser; /** Parser context. */
50 	struct rte_flow_error *error; /** Error context. */
51 };
52 
53 static int
54 mlx5_flow_create_eth(const struct rte_flow_item *item,
55 		     const void *default_mask,
56 		     struct mlx5_flow_data *data);
57 
58 static int
59 mlx5_flow_create_vlan(const struct rte_flow_item *item,
60 		      const void *default_mask,
61 		      struct mlx5_flow_data *data);
62 
63 static int
64 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
65 		      const void *default_mask,
66 		      struct mlx5_flow_data *data);
67 
68 static int
69 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
70 		      const void *default_mask,
71 		      struct mlx5_flow_data *data);
72 
73 static int
74 mlx5_flow_create_udp(const struct rte_flow_item *item,
75 		     const void *default_mask,
76 		     struct mlx5_flow_data *data);
77 
78 static int
79 mlx5_flow_create_tcp(const struct rte_flow_item *item,
80 		     const void *default_mask,
81 		     struct mlx5_flow_data *data);
82 
83 static int
84 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
85 		       const void *default_mask,
86 		       struct mlx5_flow_data *data);
87 
88 struct mlx5_flow_parse;
89 
90 static void
91 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
92 		      unsigned int size);
93 
94 static int
95 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
96 
97 static int
98 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
99 
100 /* Hash RX queue types. */
101 enum hash_rxq_type {
102 	HASH_RXQ_TCPV4,
103 	HASH_RXQ_UDPV4,
104 	HASH_RXQ_IPV4,
105 	HASH_RXQ_TCPV6,
106 	HASH_RXQ_UDPV6,
107 	HASH_RXQ_IPV6,
108 	HASH_RXQ_ETH,
109 };
110 
111 /* Initialization data for hash RX queue. */
112 struct hash_rxq_init {
113 	uint64_t hash_fields; /* Fields that participate in the hash. */
114 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
115 	unsigned int flow_priority; /* Flow priority to use. */
116 	unsigned int ip_version; /* Internet protocol. */
117 };
118 
119 /* Initialization data for hash RX queues. */
120 const struct hash_rxq_init hash_rxq_init[] = {
121 	[HASH_RXQ_TCPV4] = {
122 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
123 				IBV_RX_HASH_DST_IPV4 |
124 				IBV_RX_HASH_SRC_PORT_TCP |
125 				IBV_RX_HASH_DST_PORT_TCP),
126 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
127 		.flow_priority = 1,
128 		.ip_version = MLX5_IPV4,
129 	},
130 	[HASH_RXQ_UDPV4] = {
131 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
132 				IBV_RX_HASH_DST_IPV4 |
133 				IBV_RX_HASH_SRC_PORT_UDP |
134 				IBV_RX_HASH_DST_PORT_UDP),
135 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
136 		.flow_priority = 1,
137 		.ip_version = MLX5_IPV4,
138 	},
139 	[HASH_RXQ_IPV4] = {
140 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
141 				IBV_RX_HASH_DST_IPV4),
142 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
143 				ETH_RSS_FRAG_IPV4),
144 		.flow_priority = 2,
145 		.ip_version = MLX5_IPV4,
146 	},
147 	[HASH_RXQ_TCPV6] = {
148 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
149 				IBV_RX_HASH_DST_IPV6 |
150 				IBV_RX_HASH_SRC_PORT_TCP |
151 				IBV_RX_HASH_DST_PORT_TCP),
152 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
153 		.flow_priority = 1,
154 		.ip_version = MLX5_IPV6,
155 	},
156 	[HASH_RXQ_UDPV6] = {
157 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
158 				IBV_RX_HASH_DST_IPV6 |
159 				IBV_RX_HASH_SRC_PORT_UDP |
160 				IBV_RX_HASH_DST_PORT_UDP),
161 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
162 		.flow_priority = 1,
163 		.ip_version = MLX5_IPV6,
164 	},
165 	[HASH_RXQ_IPV6] = {
166 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
167 				IBV_RX_HASH_DST_IPV6),
168 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
169 				ETH_RSS_FRAG_IPV6),
170 		.flow_priority = 2,
171 		.ip_version = MLX5_IPV6,
172 	},
173 	[HASH_RXQ_ETH] = {
174 		.hash_fields = 0,
175 		.dpdk_rss_hf = 0,
176 		.flow_priority = 3,
177 	},
178 };
179 
180 /* Number of entries in hash_rxq_init[]. */
181 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
182 
183 /** Structure for holding counter stats. */
184 struct mlx5_flow_counter_stats {
185 	uint64_t hits; /**< Number of packets matched by the rule. */
186 	uint64_t bytes; /**< Number of bytes matched by the rule. */
187 };
188 
189 /** Structure for Drop queue. */
190 struct mlx5_hrxq_drop {
191 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
192 	struct ibv_qp *qp; /**< Verbs queue pair. */
193 	struct ibv_wq *wq; /**< Verbs work queue. */
194 	struct ibv_cq *cq; /**< Verbs completion queue. */
195 };
196 
197 /* Flows structures. */
198 struct mlx5_flow {
199 	uint64_t hash_fields; /**< Fields that participate in the hash. */
200 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
201 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
202 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
203 };
204 
205 /* Drop flows structures. */
206 struct mlx5_flow_drop {
207 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
208 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
209 };
210 
211 struct rte_flow {
212 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
213 	uint32_t mark:1; /**< Set if the flow is marked. */
214 	uint32_t drop:1; /**< Drop queue. */
215 	uint16_t queues_n; /**< Number of entries in queue[]. */
216 	uint16_t (*queues)[]; /**< Queues indexes to use. */
217 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
218 	uint8_t rss_key[40]; /**< copy of the RSS key. */
219 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
220 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
221 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
222 	/**< Flow with Rx queue. */
223 };
224 
225 /** Static initializer for items. */
226 #define ITEMS(...) \
227 	(const enum rte_flow_item_type []){ \
228 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
229 	}
230 
231 /** Structure to generate a simple graph of layers supported by the NIC. */
232 struct mlx5_flow_items {
233 	/** List of possible actions for these items. */
234 	const enum rte_flow_action_type *const actions;
235 	/** Bit-masks corresponding to the possibilities for the item. */
236 	const void *mask;
237 	/**
238 	 * Default bit-masks to use when item->mask is not provided. When
239 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
240 	 * used instead.
241 	 */
242 	const void *default_mask;
243 	/** Bit-masks size in bytes. */
244 	const unsigned int mask_sz;
245 	/**
246 	 * Conversion function from rte_flow to NIC specific flow.
247 	 *
248 	 * @param item
249 	 *   rte_flow item to convert.
250 	 * @param default_mask
251 	 *   Default bit-masks to use when item->mask is not provided.
252 	 * @param data
253 	 *   Internal structure to store the conversion.
254 	 *
255 	 * @return
256 	 *   0 on success, a negative errno value otherwise and rte_errno is
257 	 *   set.
258 	 */
259 	int (*convert)(const struct rte_flow_item *item,
260 		       const void *default_mask,
261 		       struct mlx5_flow_data *data);
262 	/** Size in bytes of the destination structure. */
263 	const unsigned int dst_sz;
264 	/** List of possible following items.  */
265 	const enum rte_flow_item_type *const items;
266 };
267 
268 /** Valid action for this PMD. */
269 static const enum rte_flow_action_type valid_actions[] = {
270 	RTE_FLOW_ACTION_TYPE_DROP,
271 	RTE_FLOW_ACTION_TYPE_QUEUE,
272 	RTE_FLOW_ACTION_TYPE_MARK,
273 	RTE_FLOW_ACTION_TYPE_FLAG,
274 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
275 	RTE_FLOW_ACTION_TYPE_COUNT,
276 #endif
277 	RTE_FLOW_ACTION_TYPE_END,
278 };
279 
280 /** Graph of supported items and associated actions. */
281 static const struct mlx5_flow_items mlx5_flow_items[] = {
282 	[RTE_FLOW_ITEM_TYPE_END] = {
283 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
284 			       RTE_FLOW_ITEM_TYPE_VXLAN),
285 	},
286 	[RTE_FLOW_ITEM_TYPE_ETH] = {
287 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
288 			       RTE_FLOW_ITEM_TYPE_IPV4,
289 			       RTE_FLOW_ITEM_TYPE_IPV6),
290 		.actions = valid_actions,
291 		.mask = &(const struct rte_flow_item_eth){
292 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
293 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 			.type = -1,
295 		},
296 		.default_mask = &rte_flow_item_eth_mask,
297 		.mask_sz = sizeof(struct rte_flow_item_eth),
298 		.convert = mlx5_flow_create_eth,
299 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
300 	},
301 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
302 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
303 			       RTE_FLOW_ITEM_TYPE_IPV6),
304 		.actions = valid_actions,
305 		.mask = &(const struct rte_flow_item_vlan){
306 			.tci = -1,
307 		},
308 		.default_mask = &rte_flow_item_vlan_mask,
309 		.mask_sz = sizeof(struct rte_flow_item_vlan),
310 		.convert = mlx5_flow_create_vlan,
311 		.dst_sz = 0,
312 	},
313 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
314 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
315 			       RTE_FLOW_ITEM_TYPE_TCP),
316 		.actions = valid_actions,
317 		.mask = &(const struct rte_flow_item_ipv4){
318 			.hdr = {
319 				.src_addr = -1,
320 				.dst_addr = -1,
321 				.type_of_service = -1,
322 				.next_proto_id = -1,
323 			},
324 		},
325 		.default_mask = &rte_flow_item_ipv4_mask,
326 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
327 		.convert = mlx5_flow_create_ipv4,
328 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
329 	},
330 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
331 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
332 			       RTE_FLOW_ITEM_TYPE_TCP),
333 		.actions = valid_actions,
334 		.mask = &(const struct rte_flow_item_ipv6){
335 			.hdr = {
336 				.src_addr = {
337 					0xff, 0xff, 0xff, 0xff,
338 					0xff, 0xff, 0xff, 0xff,
339 					0xff, 0xff, 0xff, 0xff,
340 					0xff, 0xff, 0xff, 0xff,
341 				},
342 				.dst_addr = {
343 					0xff, 0xff, 0xff, 0xff,
344 					0xff, 0xff, 0xff, 0xff,
345 					0xff, 0xff, 0xff, 0xff,
346 					0xff, 0xff, 0xff, 0xff,
347 				},
348 				.vtc_flow = -1,
349 				.proto = -1,
350 				.hop_limits = -1,
351 			},
352 		},
353 		.default_mask = &rte_flow_item_ipv6_mask,
354 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
355 		.convert = mlx5_flow_create_ipv6,
356 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
357 	},
358 	[RTE_FLOW_ITEM_TYPE_UDP] = {
359 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
360 		.actions = valid_actions,
361 		.mask = &(const struct rte_flow_item_udp){
362 			.hdr = {
363 				.src_port = -1,
364 				.dst_port = -1,
365 			},
366 		},
367 		.default_mask = &rte_flow_item_udp_mask,
368 		.mask_sz = sizeof(struct rte_flow_item_udp),
369 		.convert = mlx5_flow_create_udp,
370 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
371 	},
372 	[RTE_FLOW_ITEM_TYPE_TCP] = {
373 		.actions = valid_actions,
374 		.mask = &(const struct rte_flow_item_tcp){
375 			.hdr = {
376 				.src_port = -1,
377 				.dst_port = -1,
378 			},
379 		},
380 		.default_mask = &rte_flow_item_tcp_mask,
381 		.mask_sz = sizeof(struct rte_flow_item_tcp),
382 		.convert = mlx5_flow_create_tcp,
383 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
384 	},
385 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
386 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
387 		.actions = valid_actions,
388 		.mask = &(const struct rte_flow_item_vxlan){
389 			.vni = "\xff\xff\xff",
390 		},
391 		.default_mask = &rte_flow_item_vxlan_mask,
392 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
393 		.convert = mlx5_flow_create_vxlan,
394 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
395 	},
396 };
397 
398 /** Structure to pass to the conversion function. */
399 struct mlx5_flow_parse {
400 	uint32_t inner; /**< Set once VXLAN is encountered. */
401 	uint32_t create:1;
402 	/**< Whether resources should remain after a validate. */
403 	uint32_t drop:1; /**< Target is a drop queue. */
404 	uint32_t mark:1; /**< Mark is present in the flow. */
405 	uint32_t count:1; /**< Count is present in the flow. */
406 	uint32_t mark_id; /**< Mark identifier. */
407 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
408 	uint16_t queues_n; /**< Number of entries in queue[]. */
409 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
410 	uint8_t rss_key[40]; /**< copy of the RSS key. */
411 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
412 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
413 	struct {
414 		struct ibv_flow_attr *ibv_attr;
415 		/**< Pointer to Verbs attributes. */
416 		unsigned int offset;
417 		/**< Current position or total size of the attribute. */
418 	} queue[RTE_DIM(hash_rxq_init)];
419 };
420 
421 static const struct rte_flow_ops mlx5_flow_ops = {
422 	.validate = mlx5_flow_validate,
423 	.create = mlx5_flow_create,
424 	.destroy = mlx5_flow_destroy,
425 	.flush = mlx5_flow_flush,
426 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
427 	.query = mlx5_flow_query,
428 #else
429 	.query = NULL,
430 #endif
431 	.isolate = mlx5_flow_isolate,
432 };
433 
434 /* Convert FDIR request to Generic flow. */
435 struct mlx5_fdir {
436 	struct rte_flow_attr attr;
437 	struct rte_flow_action actions[2];
438 	struct rte_flow_item items[4];
439 	struct rte_flow_item_eth l2;
440 	struct rte_flow_item_eth l2_mask;
441 	union {
442 		struct rte_flow_item_ipv4 ipv4;
443 		struct rte_flow_item_ipv6 ipv6;
444 	} l3;
445 	union {
446 		struct rte_flow_item_udp udp;
447 		struct rte_flow_item_tcp tcp;
448 	} l4;
449 	struct rte_flow_action_queue queue;
450 };
451 
452 /* Verbs specification header. */
453 struct ibv_spec_header {
454 	enum ibv_flow_spec_type type;
455 	uint16_t size;
456 };
457 
458 /**
459  * Check support for a given item.
460  *
461  * @param item[in]
462  *   Item specification.
463  * @param mask[in]
464  *   Bit-masks covering supported fields to compare with spec, last and mask in
465  *   \item.
466  * @param size
467  *   Bit-Mask size in bytes.
468  *
469  * @return
470  *   0 on success, a negative errno value otherwise and rte_errno is set.
471  */
472 static int
473 mlx5_flow_item_validate(const struct rte_flow_item *item,
474 			const uint8_t *mask, unsigned int size)
475 {
476 	if (!item->spec && (item->mask || item->last)) {
477 		rte_errno = EINVAL;
478 		return -rte_errno;
479 	}
480 	if (item->spec && !item->mask) {
481 		unsigned int i;
482 		const uint8_t *spec = item->spec;
483 
484 		for (i = 0; i < size; ++i)
485 			if ((spec[i] | mask[i]) != mask[i]) {
486 				rte_errno = EINVAL;
487 				return -rte_errno;
488 			}
489 	}
490 	if (item->last && !item->mask) {
491 		unsigned int i;
492 		const uint8_t *spec = item->last;
493 
494 		for (i = 0; i < size; ++i)
495 			if ((spec[i] | mask[i]) != mask[i]) {
496 				rte_errno = EINVAL;
497 				return -rte_errno;
498 			}
499 	}
500 	if (item->mask) {
501 		unsigned int i;
502 		const uint8_t *spec = item->spec;
503 
504 		for (i = 0; i < size; ++i)
505 			if ((spec[i] | mask[i]) != mask[i]) {
506 				rte_errno = EINVAL;
507 				return -rte_errno;
508 			}
509 	}
510 	if (item->spec && item->last) {
511 		uint8_t spec[size];
512 		uint8_t last[size];
513 		const uint8_t *apply = mask;
514 		unsigned int i;
515 		int ret;
516 
517 		if (item->mask)
518 			apply = item->mask;
519 		for (i = 0; i < size; ++i) {
520 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
521 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
522 		}
523 		ret = memcmp(spec, last, size);
524 		if (ret != 0) {
525 			rte_errno = EINVAL;
526 			return -rte_errno;
527 		}
528 	}
529 	return 0;
530 }
531 
532 /**
533  * Copy the RSS configuration from the user ones, of the rss_conf is null,
534  * uses the driver one.
535  *
536  * @param parser
537  *   Internal parser structure.
538  * @param rss_conf
539  *   User RSS configuration to save.
540  *
541  * @return
542  *   0 on success, a negative errno value otherwise and rte_errno is set.
543  */
544 static int
545 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
546 			   const struct rte_eth_rss_conf *rss_conf)
547 {
548 	/*
549 	 * This function is also called at the beginning of
550 	 * mlx5_flow_convert_actions() to initialize the parser with the
551 	 * device default RSS configuration.
552 	 */
553 	if (rss_conf) {
554 		if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
555 			rte_errno = EINVAL;
556 			return -rte_errno;
557 		}
558 		if (rss_conf->rss_key_len != 40) {
559 			rte_errno = EINVAL;
560 			return -rte_errno;
561 		}
562 		if (rss_conf->rss_key_len && rss_conf->rss_key) {
563 			parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
564 			memcpy(parser->rss_key, rss_conf->rss_key,
565 			       rss_conf->rss_key_len);
566 			parser->rss_conf.rss_key = parser->rss_key;
567 		}
568 		parser->rss_conf.rss_hf = rss_conf->rss_hf;
569 	}
570 	return 0;
571 }
572 
573 /**
574  * Extract attribute to the parser.
575  *
576  * @param[in] attr
577  *   Flow rule attributes.
578  * @param[out] error
579  *   Perform verbose error reporting if not NULL.
580  *
581  * @return
582  *   0 on success, a negative errno value otherwise and rte_errno is set.
583  */
584 static int
585 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
586 			     struct rte_flow_error *error)
587 {
588 	if (attr->group) {
589 		rte_flow_error_set(error, ENOTSUP,
590 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
591 				   NULL,
592 				   "groups are not supported");
593 		return -rte_errno;
594 	}
595 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
596 		rte_flow_error_set(error, ENOTSUP,
597 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
598 				   NULL,
599 				   "priorities are not supported");
600 		return -rte_errno;
601 	}
602 	if (attr->egress) {
603 		rte_flow_error_set(error, ENOTSUP,
604 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
605 				   NULL,
606 				   "egress is not supported");
607 		return -rte_errno;
608 	}
609 	if (!attr->ingress) {
610 		rte_flow_error_set(error, ENOTSUP,
611 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
612 				   NULL,
613 				   "only ingress is supported");
614 		return -rte_errno;
615 	}
616 	return 0;
617 }
618 
619 /**
620  * Extract actions request to the parser.
621  *
622  * @param dev
623  *   Pointer to Ethernet device.
624  * @param[in] actions
625  *   Associated actions (list terminated by the END action).
626  * @param[out] error
627  *   Perform verbose error reporting if not NULL.
628  * @param[in, out] parser
629  *   Internal parser structure.
630  *
631  * @return
632  *   0 on success, a negative errno value otherwise and rte_errno is set.
633  */
634 static int
635 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
636 			  const struct rte_flow_action actions[],
637 			  struct rte_flow_error *error,
638 			  struct mlx5_flow_parse *parser)
639 {
640 	struct priv *priv = dev->data->dev_private;
641 	int ret;
642 
643 	/*
644 	 * Add default RSS configuration necessary for Verbs to create QP even
645 	 * if no RSS is necessary.
646 	 */
647 	ret = mlx5_flow_convert_rss_conf(parser,
648 					 (const struct rte_eth_rss_conf *)
649 					 &priv->rss_conf);
650 	if (ret)
651 		return ret;
652 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
653 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
654 			continue;
655 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
656 			parser->drop = 1;
657 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
658 			const struct rte_flow_action_queue *queue =
659 				(const struct rte_flow_action_queue *)
660 				actions->conf;
661 			uint16_t n;
662 			uint16_t found = 0;
663 
664 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
665 				goto exit_action_not_supported;
666 			for (n = 0; n < parser->queues_n; ++n) {
667 				if (parser->queues[n] == queue->index) {
668 					found = 1;
669 					break;
670 				}
671 			}
672 			if (parser->queues_n > 1 && !found) {
673 				rte_flow_error_set(error, ENOTSUP,
674 					   RTE_FLOW_ERROR_TYPE_ACTION,
675 					   actions,
676 					   "queue action not in RSS queues");
677 				return -rte_errno;
678 			}
679 			if (!found) {
680 				parser->queues_n = 1;
681 				parser->queues[0] = queue->index;
682 			}
683 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
684 			const struct rte_flow_action_rss *rss =
685 				(const struct rte_flow_action_rss *)
686 				actions->conf;
687 			uint16_t n;
688 
689 			if (!rss || !rss->num) {
690 				rte_flow_error_set(error, EINVAL,
691 						   RTE_FLOW_ERROR_TYPE_ACTION,
692 						   actions,
693 						   "no valid queues");
694 				return -rte_errno;
695 			}
696 			if (parser->queues_n == 1) {
697 				uint16_t found = 0;
698 
699 				assert(parser->queues_n);
700 				for (n = 0; n < rss->num; ++n) {
701 					if (parser->queues[0] ==
702 					    rss->queue[n]) {
703 						found = 1;
704 						break;
705 					}
706 				}
707 				if (!found) {
708 					rte_flow_error_set(error, ENOTSUP,
709 						   RTE_FLOW_ERROR_TYPE_ACTION,
710 						   actions,
711 						   "queue action not in RSS"
712 						   " queues");
713 					return -rte_errno;
714 				}
715 			}
716 			for (n = 0; n < rss->num; ++n) {
717 				if (rss->queue[n] >= priv->rxqs_n) {
718 					rte_flow_error_set(error, EINVAL,
719 						   RTE_FLOW_ERROR_TYPE_ACTION,
720 						   actions,
721 						   "queue id > number of"
722 						   " queues");
723 					return -rte_errno;
724 				}
725 			}
726 			for (n = 0; n < rss->num; ++n)
727 				parser->queues[n] = rss->queue[n];
728 			parser->queues_n = rss->num;
729 			if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
730 				rte_flow_error_set(error, EINVAL,
731 						   RTE_FLOW_ERROR_TYPE_ACTION,
732 						   actions,
733 						   "wrong RSS configuration");
734 				return -rte_errno;
735 			}
736 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
737 			const struct rte_flow_action_mark *mark =
738 				(const struct rte_flow_action_mark *)
739 				actions->conf;
740 
741 			if (!mark) {
742 				rte_flow_error_set(error, EINVAL,
743 						   RTE_FLOW_ERROR_TYPE_ACTION,
744 						   actions,
745 						   "mark must be defined");
746 				return -rte_errno;
747 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
748 				rte_flow_error_set(error, ENOTSUP,
749 						   RTE_FLOW_ERROR_TYPE_ACTION,
750 						   actions,
751 						   "mark must be between 0"
752 						   " and 16777199");
753 				return -rte_errno;
754 			}
755 			parser->mark = 1;
756 			parser->mark_id = mark->id;
757 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
758 			parser->mark = 1;
759 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
760 			   priv->config.flow_counter_en) {
761 			parser->count = 1;
762 		} else {
763 			goto exit_action_not_supported;
764 		}
765 	}
766 	if (parser->drop && parser->mark)
767 		parser->mark = 0;
768 	if (!parser->queues_n && !parser->drop) {
769 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
770 				   NULL, "no valid action");
771 		return -rte_errno;
772 	}
773 	return 0;
774 exit_action_not_supported:
775 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
776 			   actions, "action not supported");
777 	return -rte_errno;
778 }
779 
780 /**
781  * Validate items.
782  *
783  * @param[in] items
784  *   Pattern specification (list terminated by the END pattern item).
785  * @param[out] error
786  *   Perform verbose error reporting if not NULL.
787  * @param[in, out] parser
788  *   Internal parser structure.
789  *
790  * @return
791  *   0 on success, a negative errno value otherwise and rte_errno is set.
792  */
793 static int
794 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
795 				 struct rte_flow_error *error,
796 				 struct mlx5_flow_parse *parser)
797 {
798 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
799 	unsigned int i;
800 	int ret = 0;
801 
802 	/* Initialise the offsets to start after verbs attribute. */
803 	for (i = 0; i != hash_rxq_init_n; ++i)
804 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
805 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
806 		const struct mlx5_flow_items *token = NULL;
807 		unsigned int n;
808 
809 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
810 			continue;
811 		for (i = 0;
812 		     cur_item->items &&
813 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
814 		     ++i) {
815 			if (cur_item->items[i] == items->type) {
816 				token = &mlx5_flow_items[items->type];
817 				break;
818 			}
819 		}
820 		if (!token)
821 			goto exit_item_not_supported;
822 		cur_item = token;
823 		ret = mlx5_flow_item_validate(items,
824 					      (const uint8_t *)cur_item->mask,
825 					      cur_item->mask_sz);
826 		if (ret)
827 			goto exit_item_not_supported;
828 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
829 			if (parser->inner) {
830 				rte_flow_error_set(error, ENOTSUP,
831 						   RTE_FLOW_ERROR_TYPE_ITEM,
832 						   items,
833 						   "cannot recognize multiple"
834 						   " VXLAN encapsulations");
835 				return -rte_errno;
836 			}
837 			parser->inner = IBV_FLOW_SPEC_INNER;
838 		}
839 		if (parser->drop) {
840 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
841 		} else {
842 			for (n = 0; n != hash_rxq_init_n; ++n)
843 				parser->queue[n].offset += cur_item->dst_sz;
844 		}
845 	}
846 	if (parser->drop) {
847 		parser->queue[HASH_RXQ_ETH].offset +=
848 			sizeof(struct ibv_flow_spec_action_drop);
849 	}
850 	if (parser->mark) {
851 		for (i = 0; i != hash_rxq_init_n; ++i)
852 			parser->queue[i].offset +=
853 				sizeof(struct ibv_flow_spec_action_tag);
854 	}
855 	if (parser->count) {
856 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
857 
858 		for (i = 0; i != hash_rxq_init_n; ++i)
859 			parser->queue[i].offset += size;
860 	}
861 	return 0;
862 exit_item_not_supported:
863 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
864 				  items, "item not supported");
865 }
866 
867 /**
868  * Allocate memory space to store verbs flow attributes.
869  *
870  * @param[in] size
871  *   Amount of byte to allocate.
872  * @param[out] error
873  *   Perform verbose error reporting if not NULL.
874  *
875  * @return
876  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
877  */
878 static struct ibv_flow_attr *
879 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
880 {
881 	struct ibv_flow_attr *ibv_attr;
882 
883 	ibv_attr = rte_calloc(__func__, 1, size, 0);
884 	if (!ibv_attr) {
885 		rte_flow_error_set(error, ENOMEM,
886 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
887 				   NULL,
888 				   "cannot allocate verbs spec attributes");
889 		return NULL;
890 	}
891 	return ibv_attr;
892 }
893 
894 /**
895  * Make inner packet matching with an higher priority from the non Inner
896  * matching.
897  *
898  * @param[in, out] parser
899  *   Internal parser structure.
900  * @param attr
901  *   User flow attribute.
902  */
903 static void
904 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
905 			  const struct rte_flow_attr *attr)
906 {
907 	unsigned int i;
908 
909 	if (parser->drop) {
910 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
911 			attr->priority +
912 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
913 		return;
914 	}
915 	for (i = 0; i != hash_rxq_init_n; ++i) {
916 		if (parser->queue[i].ibv_attr) {
917 			parser->queue[i].ibv_attr->priority =
918 				attr->priority +
919 				hash_rxq_init[i].flow_priority -
920 				(parser->inner ? 1 : 0);
921 		}
922 	}
923 }
924 
925 /**
926  * Finalise verbs flow attributes.
927  *
928  * @param[in, out] parser
929  *   Internal parser structure.
930  */
931 static void
932 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
933 {
934 	const unsigned int ipv4 =
935 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
936 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
937 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
938 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
939 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
940 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
941 	unsigned int i;
942 
943 	/* Remove any other flow not matching the pattern. */
944 	if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
945 		for (i = 0; i != hash_rxq_init_n; ++i) {
946 			if (i == HASH_RXQ_ETH)
947 				continue;
948 			rte_free(parser->queue[i].ibv_attr);
949 			parser->queue[i].ibv_attr = NULL;
950 		}
951 		return;
952 	}
953 	if (parser->layer == HASH_RXQ_ETH) {
954 		goto fill;
955 	} else {
956 		/*
957 		 * This layer becomes useless as the pattern define under
958 		 * layers.
959 		 */
960 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
961 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
962 	}
963 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
964 	for (i = ohmin; i != (ohmax + 1); ++i) {
965 		if (!parser->queue[i].ibv_attr)
966 			continue;
967 		rte_free(parser->queue[i].ibv_attr);
968 		parser->queue[i].ibv_attr = NULL;
969 	}
970 	/* Remove impossible flow according to the RSS configuration. */
971 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
972 	    parser->rss_conf.rss_hf) {
973 		/* Remove any other flow. */
974 		for (i = hmin; i != (hmax + 1); ++i) {
975 			if ((i == parser->layer) ||
976 			     (!parser->queue[i].ibv_attr))
977 				continue;
978 			rte_free(parser->queue[i].ibv_attr);
979 			parser->queue[i].ibv_attr = NULL;
980 		}
981 	} else  if (!parser->queue[ip].ibv_attr) {
982 		/* no RSS possible with the current configuration. */
983 		parser->queues_n = 1;
984 		return;
985 	}
986 fill:
987 	/*
988 	 * Fill missing layers in verbs specifications, or compute the correct
989 	 * offset to allocate the memory space for the attributes and
990 	 * specifications.
991 	 */
992 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
993 		union {
994 			struct ibv_flow_spec_ipv4_ext ipv4;
995 			struct ibv_flow_spec_ipv6 ipv6;
996 			struct ibv_flow_spec_tcp_udp udp_tcp;
997 		} specs;
998 		void *dst;
999 		uint16_t size;
1000 
1001 		if (i == parser->layer)
1002 			continue;
1003 		if (parser->layer == HASH_RXQ_ETH) {
1004 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1005 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1006 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1007 					.type = IBV_FLOW_SPEC_IPV4_EXT,
1008 					.size = size,
1009 				};
1010 			} else {
1011 				size = sizeof(struct ibv_flow_spec_ipv6);
1012 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1013 					.type = IBV_FLOW_SPEC_IPV6,
1014 					.size = size,
1015 				};
1016 			}
1017 			if (parser->queue[i].ibv_attr) {
1018 				dst = (void *)((uintptr_t)
1019 					       parser->queue[i].ibv_attr +
1020 					       parser->queue[i].offset);
1021 				memcpy(dst, &specs, size);
1022 				++parser->queue[i].ibv_attr->num_of_specs;
1023 			}
1024 			parser->queue[i].offset += size;
1025 		}
1026 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1027 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1028 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1029 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1030 				.type = ((i == HASH_RXQ_UDPV4 ||
1031 					  i == HASH_RXQ_UDPV6) ?
1032 					 IBV_FLOW_SPEC_UDP :
1033 					 IBV_FLOW_SPEC_TCP),
1034 				.size = size,
1035 			};
1036 			if (parser->queue[i].ibv_attr) {
1037 				dst = (void *)((uintptr_t)
1038 					       parser->queue[i].ibv_attr +
1039 					       parser->queue[i].offset);
1040 				memcpy(dst, &specs, size);
1041 				++parser->queue[i].ibv_attr->num_of_specs;
1042 			}
1043 			parser->queue[i].offset += size;
1044 		}
1045 	}
1046 }
1047 
1048 /**
1049  * Validate and convert a flow supported by the NIC.
1050  *
1051  * @param dev
1052  *   Pointer to Ethernet device.
1053  * @param[in] attr
1054  *   Flow rule attributes.
1055  * @param[in] pattern
1056  *   Pattern specification (list terminated by the END pattern item).
1057  * @param[in] actions
1058  *   Associated actions (list terminated by the END action).
1059  * @param[out] error
1060  *   Perform verbose error reporting if not NULL.
1061  * @param[in, out] parser
1062  *   Internal parser structure.
1063  *
1064  * @return
1065  *   0 on success, a negative errno value otherwise and rte_errno is set.
1066  */
1067 static int
1068 mlx5_flow_convert(struct rte_eth_dev *dev,
1069 		  const struct rte_flow_attr *attr,
1070 		  const struct rte_flow_item items[],
1071 		  const struct rte_flow_action actions[],
1072 		  struct rte_flow_error *error,
1073 		  struct mlx5_flow_parse *parser)
1074 {
1075 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1076 	unsigned int i;
1077 	int ret;
1078 
1079 	/* First step. Validate the attributes, items and actions. */
1080 	*parser = (struct mlx5_flow_parse){
1081 		.create = parser->create,
1082 		.layer = HASH_RXQ_ETH,
1083 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1084 	};
1085 	ret = mlx5_flow_convert_attributes(attr, error);
1086 	if (ret)
1087 		return ret;
1088 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1089 	if (ret)
1090 		return ret;
1091 	ret = mlx5_flow_convert_items_validate(items, error, parser);
1092 	if (ret)
1093 		return ret;
1094 	mlx5_flow_convert_finalise(parser);
1095 	/*
1096 	 * Second step.
1097 	 * Allocate the memory space to store verbs specifications.
1098 	 */
1099 	if (parser->drop) {
1100 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1101 
1102 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1103 			mlx5_flow_convert_allocate(offset, error);
1104 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1105 			goto exit_enomem;
1106 		parser->queue[HASH_RXQ_ETH].offset =
1107 			sizeof(struct ibv_flow_attr);
1108 	} else {
1109 		for (i = 0; i != hash_rxq_init_n; ++i) {
1110 			unsigned int offset;
1111 
1112 			if (!(parser->rss_conf.rss_hf &
1113 			      hash_rxq_init[i].dpdk_rss_hf) &&
1114 			    (i != HASH_RXQ_ETH))
1115 				continue;
1116 			offset = parser->queue[i].offset;
1117 			parser->queue[i].ibv_attr =
1118 				mlx5_flow_convert_allocate(offset, error);
1119 			if (!parser->queue[i].ibv_attr)
1120 				goto exit_enomem;
1121 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1122 		}
1123 	}
1124 	/* Third step. Conversion parse, fill the specifications. */
1125 	parser->inner = 0;
1126 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1127 		struct mlx5_flow_data data = {
1128 			.parser = parser,
1129 			.error = error,
1130 		};
1131 
1132 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1133 			continue;
1134 		cur_item = &mlx5_flow_items[items->type];
1135 		ret = cur_item->convert(items,
1136 					(cur_item->default_mask ?
1137 					 cur_item->default_mask :
1138 					 cur_item->mask),
1139 					 &data);
1140 		if (ret)
1141 			goto exit_free;
1142 	}
1143 	if (parser->mark)
1144 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1145 	if (parser->count && parser->create) {
1146 		mlx5_flow_create_count(dev, parser);
1147 		if (!parser->cs)
1148 			goto exit_count_error;
1149 	}
1150 	/*
1151 	 * Last step. Complete missing specification to reach the RSS
1152 	 * configuration.
1153 	 */
1154 	if (!parser->drop)
1155 		mlx5_flow_convert_finalise(parser);
1156 	mlx5_flow_update_priority(parser, attr);
1157 exit_free:
1158 	/* Only verification is expected, all resources should be released. */
1159 	if (!parser->create) {
1160 		for (i = 0; i != hash_rxq_init_n; ++i) {
1161 			if (parser->queue[i].ibv_attr) {
1162 				rte_free(parser->queue[i].ibv_attr);
1163 				parser->queue[i].ibv_attr = NULL;
1164 			}
1165 		}
1166 	}
1167 	return ret;
1168 exit_enomem:
1169 	for (i = 0; i != hash_rxq_init_n; ++i) {
1170 		if (parser->queue[i].ibv_attr) {
1171 			rte_free(parser->queue[i].ibv_attr);
1172 			parser->queue[i].ibv_attr = NULL;
1173 		}
1174 	}
1175 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1176 			   NULL, "cannot allocate verbs spec attributes");
1177 	return -rte_errno;
1178 exit_count_error:
1179 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1180 			   NULL, "cannot create counter");
1181 	return -rte_errno;
1182 }
1183 
1184 /**
1185  * Copy the specification created into the flow.
1186  *
1187  * @param parser
1188  *   Internal parser structure.
1189  * @param src
1190  *   Create specification.
1191  * @param size
1192  *   Size in bytes of the specification to copy.
1193  */
1194 static void
1195 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1196 		      unsigned int size)
1197 {
1198 	unsigned int i;
1199 	void *dst;
1200 
1201 	for (i = 0; i != hash_rxq_init_n; ++i) {
1202 		if (!parser->queue[i].ibv_attr)
1203 			continue;
1204 		/* Specification must be the same l3 type or none. */
1205 		if (parser->layer == HASH_RXQ_ETH ||
1206 		    (hash_rxq_init[parser->layer].ip_version ==
1207 		     hash_rxq_init[i].ip_version) ||
1208 		    (hash_rxq_init[i].ip_version == 0)) {
1209 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1210 					parser->queue[i].offset);
1211 			memcpy(dst, src, size);
1212 			++parser->queue[i].ibv_attr->num_of_specs;
1213 			parser->queue[i].offset += size;
1214 		}
1215 	}
1216 }
1217 
1218 /**
1219  * Convert Ethernet item to Verbs specification.
1220  *
1221  * @param item[in]
1222  *   Item specification.
1223  * @param default_mask[in]
1224  *   Default bit-masks to use when item->mask is not provided.
1225  * @param data[in, out]
1226  *   User structure.
1227  *
1228  * @return
1229  *   0 on success, a negative errno value otherwise and rte_errno is set.
1230  */
1231 static int
1232 mlx5_flow_create_eth(const struct rte_flow_item *item,
1233 		     const void *default_mask,
1234 		     struct mlx5_flow_data *data)
1235 {
1236 	const struct rte_flow_item_eth *spec = item->spec;
1237 	const struct rte_flow_item_eth *mask = item->mask;
1238 	struct mlx5_flow_parse *parser = data->parser;
1239 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1240 	struct ibv_flow_spec_eth eth = {
1241 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1242 		.size = eth_size,
1243 	};
1244 
1245 	/* Don't update layer for the inner pattern. */
1246 	if (!parser->inner)
1247 		parser->layer = HASH_RXQ_ETH;
1248 	if (spec) {
1249 		unsigned int i;
1250 
1251 		if (!mask)
1252 			mask = default_mask;
1253 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1254 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1255 		eth.val.ether_type = spec->type;
1256 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1257 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1258 		eth.mask.ether_type = mask->type;
1259 		/* Remove unwanted bits from values. */
1260 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1261 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1262 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1263 		}
1264 		eth.val.ether_type &= eth.mask.ether_type;
1265 	}
1266 	mlx5_flow_create_copy(parser, &eth, eth_size);
1267 	return 0;
1268 }
1269 
1270 /**
1271  * Convert VLAN item to Verbs specification.
1272  *
1273  * @param item[in]
1274  *   Item specification.
1275  * @param default_mask[in]
1276  *   Default bit-masks to use when item->mask is not provided.
1277  * @param data[in, out]
1278  *   User structure.
1279  *
1280  * @return
1281  *   0 on success, a negative errno value otherwise and rte_errno is set.
1282  */
1283 static int
1284 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1285 		      const void *default_mask,
1286 		      struct mlx5_flow_data *data)
1287 {
1288 	const struct rte_flow_item_vlan *spec = item->spec;
1289 	const struct rte_flow_item_vlan *mask = item->mask;
1290 	struct mlx5_flow_parse *parser = data->parser;
1291 	struct ibv_flow_spec_eth *eth;
1292 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1293 
1294 	if (spec) {
1295 		unsigned int i;
1296 		if (!mask)
1297 			mask = default_mask;
1298 
1299 		for (i = 0; i != hash_rxq_init_n; ++i) {
1300 			if (!parser->queue[i].ibv_attr)
1301 				continue;
1302 
1303 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1304 				       parser->queue[i].offset - eth_size);
1305 			eth->val.vlan_tag = spec->tci;
1306 			eth->mask.vlan_tag = mask->tci;
1307 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1308 			/*
1309 			 * From verbs perspective an empty VLAN is equivalent
1310 			 * to a packet without VLAN layer.
1311 			 */
1312 			if (!eth->mask.vlan_tag)
1313 				goto error;
1314 		}
1315 		return 0;
1316 	}
1317 error:
1318 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1319 				  item, "VLAN cannot be empty");
1320 }
1321 
1322 /**
1323  * Convert IPv4 item to Verbs specification.
1324  *
1325  * @param item[in]
1326  *   Item specification.
1327  * @param default_mask[in]
1328  *   Default bit-masks to use when item->mask is not provided.
1329  * @param data[in, out]
1330  *   User structure.
1331  *
1332  * @return
1333  *   0 on success, a negative errno value otherwise and rte_errno is set.
1334  */
1335 static int
1336 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1337 		      const void *default_mask,
1338 		      struct mlx5_flow_data *data)
1339 {
1340 	const struct rte_flow_item_ipv4 *spec = item->spec;
1341 	const struct rte_flow_item_ipv4 *mask = item->mask;
1342 	struct mlx5_flow_parse *parser = data->parser;
1343 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1344 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1345 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1346 		.size = ipv4_size,
1347 	};
1348 
1349 	/* Don't update layer for the inner pattern. */
1350 	if (!parser->inner)
1351 		parser->layer = HASH_RXQ_IPV4;
1352 	if (spec) {
1353 		if (!mask)
1354 			mask = default_mask;
1355 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1356 			.src_ip = spec->hdr.src_addr,
1357 			.dst_ip = spec->hdr.dst_addr,
1358 			.proto = spec->hdr.next_proto_id,
1359 			.tos = spec->hdr.type_of_service,
1360 		};
1361 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1362 			.src_ip = mask->hdr.src_addr,
1363 			.dst_ip = mask->hdr.dst_addr,
1364 			.proto = mask->hdr.next_proto_id,
1365 			.tos = mask->hdr.type_of_service,
1366 		};
1367 		/* Remove unwanted bits from values. */
1368 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1369 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1370 		ipv4.val.proto &= ipv4.mask.proto;
1371 		ipv4.val.tos &= ipv4.mask.tos;
1372 	}
1373 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1374 	return 0;
1375 }
1376 
1377 /**
1378  * Convert IPv6 item to Verbs specification.
1379  *
1380  * @param item[in]
1381  *   Item specification.
1382  * @param default_mask[in]
1383  *   Default bit-masks to use when item->mask is not provided.
1384  * @param data[in, out]
1385  *   User structure.
1386  *
1387  * @return
1388  *   0 on success, a negative errno value otherwise and rte_errno is set.
1389  */
1390 static int
1391 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1392 		      const void *default_mask,
1393 		      struct mlx5_flow_data *data)
1394 {
1395 	const struct rte_flow_item_ipv6 *spec = item->spec;
1396 	const struct rte_flow_item_ipv6 *mask = item->mask;
1397 	struct mlx5_flow_parse *parser = data->parser;
1398 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1399 	struct ibv_flow_spec_ipv6 ipv6 = {
1400 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1401 		.size = ipv6_size,
1402 	};
1403 
1404 	/* Don't update layer for the inner pattern. */
1405 	if (!parser->inner)
1406 		parser->layer = HASH_RXQ_IPV6;
1407 	if (spec) {
1408 		unsigned int i;
1409 		uint32_t vtc_flow_val;
1410 		uint32_t vtc_flow_mask;
1411 
1412 		if (!mask)
1413 			mask = default_mask;
1414 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1415 		       RTE_DIM(ipv6.val.src_ip));
1416 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1417 		       RTE_DIM(ipv6.val.dst_ip));
1418 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1419 		       RTE_DIM(ipv6.mask.src_ip));
1420 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1421 		       RTE_DIM(ipv6.mask.dst_ip));
1422 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1423 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1424 		ipv6.val.flow_label =
1425 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1426 					 IPV6_HDR_FL_SHIFT);
1427 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1428 					 IPV6_HDR_TC_SHIFT;
1429 		ipv6.val.next_hdr = spec->hdr.proto;
1430 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1431 		ipv6.mask.flow_label =
1432 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1433 					 IPV6_HDR_FL_SHIFT);
1434 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1435 					  IPV6_HDR_TC_SHIFT;
1436 		ipv6.mask.next_hdr = mask->hdr.proto;
1437 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1438 		/* Remove unwanted bits from values. */
1439 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1440 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1441 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1442 		}
1443 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1444 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1445 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1446 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1447 	}
1448 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1449 	return 0;
1450 }
1451 
1452 /**
1453  * Convert UDP item to Verbs specification.
1454  *
1455  * @param item[in]
1456  *   Item specification.
1457  * @param default_mask[in]
1458  *   Default bit-masks to use when item->mask is not provided.
1459  * @param data[in, out]
1460  *   User structure.
1461  *
1462  * @return
1463  *   0 on success, a negative errno value otherwise and rte_errno is set.
1464  */
1465 static int
1466 mlx5_flow_create_udp(const struct rte_flow_item *item,
1467 		     const void *default_mask,
1468 		     struct mlx5_flow_data *data)
1469 {
1470 	const struct rte_flow_item_udp *spec = item->spec;
1471 	const struct rte_flow_item_udp *mask = item->mask;
1472 	struct mlx5_flow_parse *parser = data->parser;
1473 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1474 	struct ibv_flow_spec_tcp_udp udp = {
1475 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1476 		.size = udp_size,
1477 	};
1478 
1479 	/* Don't update layer for the inner pattern. */
1480 	if (!parser->inner) {
1481 		if (parser->layer == HASH_RXQ_IPV4)
1482 			parser->layer = HASH_RXQ_UDPV4;
1483 		else
1484 			parser->layer = HASH_RXQ_UDPV6;
1485 	}
1486 	if (spec) {
1487 		if (!mask)
1488 			mask = default_mask;
1489 		udp.val.dst_port = spec->hdr.dst_port;
1490 		udp.val.src_port = spec->hdr.src_port;
1491 		udp.mask.dst_port = mask->hdr.dst_port;
1492 		udp.mask.src_port = mask->hdr.src_port;
1493 		/* Remove unwanted bits from values. */
1494 		udp.val.src_port &= udp.mask.src_port;
1495 		udp.val.dst_port &= udp.mask.dst_port;
1496 	}
1497 	mlx5_flow_create_copy(parser, &udp, udp_size);
1498 	return 0;
1499 }
1500 
1501 /**
1502  * Convert TCP item to Verbs specification.
1503  *
1504  * @param item[in]
1505  *   Item specification.
1506  * @param default_mask[in]
1507  *   Default bit-masks to use when item->mask is not provided.
1508  * @param data[in, out]
1509  *   User structure.
1510  *
1511  * @return
1512  *   0 on success, a negative errno value otherwise and rte_errno is set.
1513  */
1514 static int
1515 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1516 		     const void *default_mask,
1517 		     struct mlx5_flow_data *data)
1518 {
1519 	const struct rte_flow_item_tcp *spec = item->spec;
1520 	const struct rte_flow_item_tcp *mask = item->mask;
1521 	struct mlx5_flow_parse *parser = data->parser;
1522 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1523 	struct ibv_flow_spec_tcp_udp tcp = {
1524 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1525 		.size = tcp_size,
1526 	};
1527 
1528 	/* Don't update layer for the inner pattern. */
1529 	if (!parser->inner) {
1530 		if (parser->layer == HASH_RXQ_IPV4)
1531 			parser->layer = HASH_RXQ_TCPV4;
1532 		else
1533 			parser->layer = HASH_RXQ_TCPV6;
1534 	}
1535 	if (spec) {
1536 		if (!mask)
1537 			mask = default_mask;
1538 		tcp.val.dst_port = spec->hdr.dst_port;
1539 		tcp.val.src_port = spec->hdr.src_port;
1540 		tcp.mask.dst_port = mask->hdr.dst_port;
1541 		tcp.mask.src_port = mask->hdr.src_port;
1542 		/* Remove unwanted bits from values. */
1543 		tcp.val.src_port &= tcp.mask.src_port;
1544 		tcp.val.dst_port &= tcp.mask.dst_port;
1545 	}
1546 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1547 	return 0;
1548 }
1549 
1550 /**
1551  * Convert VXLAN item to Verbs specification.
1552  *
1553  * @param item[in]
1554  *   Item specification.
1555  * @param default_mask[in]
1556  *   Default bit-masks to use when item->mask is not provided.
1557  * @param data[in, out]
1558  *   User structure.
1559  *
1560  * @return
1561  *   0 on success, a negative errno value otherwise and rte_errno is set.
1562  */
1563 static int
1564 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1565 		       const void *default_mask,
1566 		       struct mlx5_flow_data *data)
1567 {
1568 	const struct rte_flow_item_vxlan *spec = item->spec;
1569 	const struct rte_flow_item_vxlan *mask = item->mask;
1570 	struct mlx5_flow_parse *parser = data->parser;
1571 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1572 	struct ibv_flow_spec_tunnel vxlan = {
1573 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1574 		.size = size,
1575 	};
1576 	union vni {
1577 		uint32_t vlan_id;
1578 		uint8_t vni[4];
1579 	} id;
1580 
1581 	id.vni[0] = 0;
1582 	parser->inner = IBV_FLOW_SPEC_INNER;
1583 	if (spec) {
1584 		if (!mask)
1585 			mask = default_mask;
1586 		memcpy(&id.vni[1], spec->vni, 3);
1587 		vxlan.val.tunnel_id = id.vlan_id;
1588 		memcpy(&id.vni[1], mask->vni, 3);
1589 		vxlan.mask.tunnel_id = id.vlan_id;
1590 		/* Remove unwanted bits from values. */
1591 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1592 	}
1593 	/*
1594 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1595 	 * layer is defined in the Verbs specification it is interpreted as
1596 	 * wildcard and all packets will match this rule, if it follows a full
1597 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1598 	 * before will also match this rule.
1599 	 * To avoid such situation, VNI 0 is currently refused.
1600 	 */
1601 	if (!vxlan.val.tunnel_id)
1602 		return rte_flow_error_set(data->error, EINVAL,
1603 					  RTE_FLOW_ERROR_TYPE_ITEM,
1604 					  item,
1605 					  "VxLAN vni cannot be 0");
1606 	mlx5_flow_create_copy(parser, &vxlan, size);
1607 	return 0;
1608 }
1609 
1610 /**
1611  * Convert mark/flag action to Verbs specification.
1612  *
1613  * @param parser
1614  *   Internal parser structure.
1615  * @param mark_id
1616  *   Mark identifier.
1617  *
1618  * @return
1619  *   0 on success, a negative errno value otherwise and rte_errno is set.
1620  */
1621 static int
1622 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1623 {
1624 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1625 	struct ibv_flow_spec_action_tag tag = {
1626 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1627 		.size = size,
1628 		.tag_id = mlx5_flow_mark_set(mark_id),
1629 	};
1630 
1631 	assert(parser->mark);
1632 	mlx5_flow_create_copy(parser, &tag, size);
1633 	return 0;
1634 }
1635 
1636 /**
1637  * Convert count action to Verbs specification.
1638  *
1639  * @param dev
1640  *   Pointer to Ethernet device.
1641  * @param parser
1642  *   Pointer to MLX5 flow parser structure.
1643  *
1644  * @return
1645  *   0 on success, a negative errno value otherwise and rte_errno is set.
1646  */
1647 static int
1648 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1649 		       struct mlx5_flow_parse *parser __rte_unused)
1650 {
1651 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1652 	struct priv *priv = dev->data->dev_private;
1653 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1654 	struct ibv_counter_set_init_attr init_attr = {0};
1655 	struct ibv_flow_spec_counter_action counter = {
1656 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1657 		.size = size,
1658 		.counter_set_handle = 0,
1659 	};
1660 
1661 	init_attr.counter_set_id = 0;
1662 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1663 	if (!parser->cs) {
1664 		rte_errno = EINVAL;
1665 		return -rte_errno;
1666 	}
1667 	counter.counter_set_handle = parser->cs->handle;
1668 	mlx5_flow_create_copy(parser, &counter, size);
1669 #endif
1670 	return 0;
1671 }
1672 
1673 /**
1674  * Complete flow rule creation with a drop queue.
1675  *
1676  * @param dev
1677  *   Pointer to Ethernet device.
1678  * @param parser
1679  *   Internal parser structure.
1680  * @param flow
1681  *   Pointer to the rte_flow.
1682  * @param[out] error
1683  *   Perform verbose error reporting if not NULL.
1684  *
1685  * @return
1686  *   0 on success, a negative errno value otherwise and rte_errno is set.
1687  */
1688 static int
1689 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1690 				   struct mlx5_flow_parse *parser,
1691 				   struct rte_flow *flow,
1692 				   struct rte_flow_error *error)
1693 {
1694 	struct priv *priv = dev->data->dev_private;
1695 	struct ibv_flow_spec_action_drop *drop;
1696 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1697 
1698 	assert(priv->pd);
1699 	assert(priv->ctx);
1700 	flow->drop = 1;
1701 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1702 			parser->queue[HASH_RXQ_ETH].offset);
1703 	*drop = (struct ibv_flow_spec_action_drop){
1704 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1705 			.size = size,
1706 	};
1707 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1708 	parser->queue[HASH_RXQ_ETH].offset += size;
1709 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1710 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1711 	if (parser->count)
1712 		flow->cs = parser->cs;
1713 	if (!priv->dev->data->dev_started)
1714 		return 0;
1715 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1716 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1717 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1718 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
1719 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1720 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1721 				   NULL, "flow rule creation failure");
1722 		goto error;
1723 	}
1724 	return 0;
1725 error:
1726 	assert(flow);
1727 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1728 		claim_zero(mlx5_glue->destroy_flow
1729 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1730 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1731 	}
1732 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1733 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1734 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1735 	}
1736 	if (flow->cs) {
1737 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1738 		flow->cs = NULL;
1739 		parser->cs = NULL;
1740 	}
1741 	return -rte_errno;
1742 }
1743 
1744 /**
1745  * Create hash Rx queues when RSS is enabled.
1746  *
1747  * @param dev
1748  *   Pointer to Ethernet device.
1749  * @param parser
1750  *   Internal parser structure.
1751  * @param flow
1752  *   Pointer to the rte_flow.
1753  * @param[out] error
1754  *   Perform verbose error reporting if not NULL.
1755  *
1756  * @return
1757  *   0 on success, a negative errno value otherwise and rte_errno is set.
1758  */
1759 static int
1760 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1761 				  struct mlx5_flow_parse *parser,
1762 				  struct rte_flow *flow,
1763 				  struct rte_flow_error *error)
1764 {
1765 	struct priv *priv = dev->data->dev_private;
1766 	unsigned int i;
1767 
1768 	for (i = 0; i != hash_rxq_init_n; ++i) {
1769 		uint64_t hash_fields;
1770 
1771 		if (!parser->queue[i].ibv_attr)
1772 			continue;
1773 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1774 		parser->queue[i].ibv_attr = NULL;
1775 		hash_fields = hash_rxq_init[i].hash_fields;
1776 		if (!priv->dev->data->dev_started)
1777 			continue;
1778 		flow->frxq[i].hrxq =
1779 			mlx5_hrxq_get(dev,
1780 				      parser->rss_conf.rss_key,
1781 				      parser->rss_conf.rss_key_len,
1782 				      hash_fields,
1783 				      parser->queues,
1784 				      parser->queues_n);
1785 		if (flow->frxq[i].hrxq)
1786 			continue;
1787 		flow->frxq[i].hrxq =
1788 			mlx5_hrxq_new(dev,
1789 				      parser->rss_conf.rss_key,
1790 				      parser->rss_conf.rss_key_len,
1791 				      hash_fields,
1792 				      parser->queues,
1793 				      parser->queues_n);
1794 		if (!flow->frxq[i].hrxq) {
1795 			return rte_flow_error_set(error, ENOMEM,
1796 						  RTE_FLOW_ERROR_TYPE_HANDLE,
1797 						  NULL,
1798 						  "cannot create hash rxq");
1799 		}
1800 	}
1801 	return 0;
1802 }
1803 
1804 /**
1805  * Complete flow rule creation.
1806  *
1807  * @param dev
1808  *   Pointer to Ethernet device.
1809  * @param parser
1810  *   Internal parser structure.
1811  * @param flow
1812  *   Pointer to the rte_flow.
1813  * @param[out] error
1814  *   Perform verbose error reporting if not NULL.
1815  *
1816  * @return
1817  *   0 on success, a negative errno value otherwise and rte_errno is set.
1818  */
1819 static int
1820 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1821 			      struct mlx5_flow_parse *parser,
1822 			      struct rte_flow *flow,
1823 			      struct rte_flow_error *error)
1824 {
1825 	struct priv *priv = dev->data->dev_private;
1826 	int ret;
1827 	unsigned int i;
1828 	unsigned int flows_n = 0;
1829 
1830 	assert(priv->pd);
1831 	assert(priv->ctx);
1832 	assert(!parser->drop);
1833 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1834 	if (ret)
1835 		goto error;
1836 	if (parser->count)
1837 		flow->cs = parser->cs;
1838 	if (!priv->dev->data->dev_started)
1839 		return 0;
1840 	for (i = 0; i != hash_rxq_init_n; ++i) {
1841 		if (!flow->frxq[i].hrxq)
1842 			continue;
1843 		flow->frxq[i].ibv_flow =
1844 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1845 					       flow->frxq[i].ibv_attr);
1846 		if (!flow->frxq[i].ibv_flow) {
1847 			rte_flow_error_set(error, ENOMEM,
1848 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1849 					   NULL, "flow rule creation failure");
1850 			goto error;
1851 		}
1852 		++flows_n;
1853 		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1854 			dev->data->port_id,
1855 			(void *)flow, i,
1856 			(void *)flow->frxq[i].hrxq,
1857 			(void *)flow->frxq[i].ibv_flow);
1858 	}
1859 	if (!flows_n) {
1860 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1861 				   NULL, "internal error in flow creation");
1862 		goto error;
1863 	}
1864 	for (i = 0; i != parser->queues_n; ++i) {
1865 		struct mlx5_rxq_data *q =
1866 			(*priv->rxqs)[parser->queues[i]];
1867 
1868 		q->mark |= parser->mark;
1869 	}
1870 	return 0;
1871 error:
1872 	ret = rte_errno; /* Save rte_errno before cleanup. */
1873 	assert(flow);
1874 	for (i = 0; i != hash_rxq_init_n; ++i) {
1875 		if (flow->frxq[i].ibv_flow) {
1876 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1877 
1878 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1879 		}
1880 		if (flow->frxq[i].hrxq)
1881 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1882 		if (flow->frxq[i].ibv_attr)
1883 			rte_free(flow->frxq[i].ibv_attr);
1884 	}
1885 	if (flow->cs) {
1886 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1887 		flow->cs = NULL;
1888 		parser->cs = NULL;
1889 	}
1890 	rte_errno = ret; /* Restore rte_errno. */
1891 	return -rte_errno;
1892 }
1893 
1894 /**
1895  * Convert a flow.
1896  *
1897  * @param dev
1898  *   Pointer to Ethernet device.
1899  * @param list
1900  *   Pointer to a TAILQ flow list.
1901  * @param[in] attr
1902  *   Flow rule attributes.
1903  * @param[in] pattern
1904  *   Pattern specification (list terminated by the END pattern item).
1905  * @param[in] actions
1906  *   Associated actions (list terminated by the END action).
1907  * @param[out] error
1908  *   Perform verbose error reporting if not NULL.
1909  *
1910  * @return
1911  *   A flow on success, NULL otherwise and rte_errno is set.
1912  */
1913 static struct rte_flow *
1914 mlx5_flow_list_create(struct rte_eth_dev *dev,
1915 		      struct mlx5_flows *list,
1916 		      const struct rte_flow_attr *attr,
1917 		      const struct rte_flow_item items[],
1918 		      const struct rte_flow_action actions[],
1919 		      struct rte_flow_error *error)
1920 {
1921 	struct mlx5_flow_parse parser = { .create = 1, };
1922 	struct rte_flow *flow = NULL;
1923 	unsigned int i;
1924 	int ret;
1925 
1926 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1927 	if (ret)
1928 		goto exit;
1929 	flow = rte_calloc(__func__, 1,
1930 			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1931 			  0);
1932 	if (!flow) {
1933 		rte_flow_error_set(error, ENOMEM,
1934 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1935 				   NULL,
1936 				   "cannot allocate flow memory");
1937 		return NULL;
1938 	}
1939 	/* Copy queues configuration. */
1940 	flow->queues = (uint16_t (*)[])(flow + 1);
1941 	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1942 	flow->queues_n = parser.queues_n;
1943 	flow->mark = parser.mark;
1944 	/* Copy RSS configuration. */
1945 	flow->rss_conf = parser.rss_conf;
1946 	flow->rss_conf.rss_key = flow->rss_key;
1947 	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1948 	/* finalise the flow. */
1949 	if (parser.drop)
1950 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1951 							 error);
1952 	else
1953 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1954 	if (ret)
1955 		goto exit;
1956 	TAILQ_INSERT_TAIL(list, flow, next);
1957 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1958 		(void *)flow);
1959 	return flow;
1960 exit:
1961 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1962 		error->message);
1963 	for (i = 0; i != hash_rxq_init_n; ++i) {
1964 		if (parser.queue[i].ibv_attr)
1965 			rte_free(parser.queue[i].ibv_attr);
1966 	}
1967 	rte_free(flow);
1968 	return NULL;
1969 }
1970 
1971 /**
1972  * Validate a flow supported by the NIC.
1973  *
1974  * @see rte_flow_validate()
1975  * @see rte_flow_ops
1976  */
1977 int
1978 mlx5_flow_validate(struct rte_eth_dev *dev,
1979 		   const struct rte_flow_attr *attr,
1980 		   const struct rte_flow_item items[],
1981 		   const struct rte_flow_action actions[],
1982 		   struct rte_flow_error *error)
1983 {
1984 	struct mlx5_flow_parse parser = { .create = 0, };
1985 
1986 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1987 }
1988 
1989 /**
1990  * Create a flow.
1991  *
1992  * @see rte_flow_create()
1993  * @see rte_flow_ops
1994  */
1995 struct rte_flow *
1996 mlx5_flow_create(struct rte_eth_dev *dev,
1997 		 const struct rte_flow_attr *attr,
1998 		 const struct rte_flow_item items[],
1999 		 const struct rte_flow_action actions[],
2000 		 struct rte_flow_error *error)
2001 {
2002 	struct priv *priv = dev->data->dev_private;
2003 
2004 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2005 				     error);
2006 }
2007 
2008 /**
2009  * Destroy a flow in a list.
2010  *
2011  * @param dev
2012  *   Pointer to Ethernet device.
2013  * @param list
2014  *   Pointer to a TAILQ flow list.
2015  * @param[in] flow
2016  *   Flow to destroy.
2017  */
2018 static void
2019 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2020 		       struct rte_flow *flow)
2021 {
2022 	struct priv *priv = dev->data->dev_private;
2023 	unsigned int i;
2024 
2025 	if (flow->drop || !flow->mark)
2026 		goto free;
2027 	for (i = 0; i != flow->queues_n; ++i) {
2028 		struct rte_flow *tmp;
2029 		int mark = 0;
2030 
2031 		/*
2032 		 * To remove the mark from the queue, the queue must not be
2033 		 * present in any other marked flow (RSS or not).
2034 		 */
2035 		TAILQ_FOREACH(tmp, list, next) {
2036 			unsigned int j;
2037 			uint16_t *tqs = NULL;
2038 			uint16_t tq_n = 0;
2039 
2040 			if (!tmp->mark)
2041 				continue;
2042 			for (j = 0; j != hash_rxq_init_n; ++j) {
2043 				if (!tmp->frxq[j].hrxq)
2044 					continue;
2045 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2046 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2047 			}
2048 			if (!tq_n)
2049 				continue;
2050 			for (j = 0; (j != tq_n) && !mark; j++)
2051 				if (tqs[j] == (*flow->queues)[i])
2052 					mark = 1;
2053 		}
2054 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2055 	}
2056 free:
2057 	if (flow->drop) {
2058 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2059 			claim_zero(mlx5_glue->destroy_flow
2060 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2061 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2062 	} else {
2063 		for (i = 0; i != hash_rxq_init_n; ++i) {
2064 			struct mlx5_flow *frxq = &flow->frxq[i];
2065 
2066 			if (frxq->ibv_flow)
2067 				claim_zero(mlx5_glue->destroy_flow
2068 					   (frxq->ibv_flow));
2069 			if (frxq->hrxq)
2070 				mlx5_hrxq_release(dev, frxq->hrxq);
2071 			if (frxq->ibv_attr)
2072 				rte_free(frxq->ibv_attr);
2073 		}
2074 	}
2075 	if (flow->cs) {
2076 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2077 		flow->cs = NULL;
2078 	}
2079 	TAILQ_REMOVE(list, flow, next);
2080 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2081 		(void *)flow);
2082 	rte_free(flow);
2083 }
2084 
2085 /**
2086  * Destroy all flows.
2087  *
2088  * @param dev
2089  *   Pointer to Ethernet device.
2090  * @param list
2091  *   Pointer to a TAILQ flow list.
2092  */
2093 void
2094 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2095 {
2096 	while (!TAILQ_EMPTY(list)) {
2097 		struct rte_flow *flow;
2098 
2099 		flow = TAILQ_FIRST(list);
2100 		mlx5_flow_list_destroy(dev, list, flow);
2101 	}
2102 }
2103 
2104 /**
2105  * Create drop queue.
2106  *
2107  * @param dev
2108  *   Pointer to Ethernet device.
2109  *
2110  * @return
2111  *   0 on success, a negative errno value otherwise and rte_errno is set.
2112  */
2113 int
2114 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2115 {
2116 	struct priv *priv = dev->data->dev_private;
2117 	struct mlx5_hrxq_drop *fdq = NULL;
2118 
2119 	assert(priv->pd);
2120 	assert(priv->ctx);
2121 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2122 	if (!fdq) {
2123 		DRV_LOG(WARNING,
2124 			"port %u cannot allocate memory for drop queue",
2125 			dev->data->port_id);
2126 		rte_errno = ENOMEM;
2127 		return -rte_errno;
2128 	}
2129 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2130 	if (!fdq->cq) {
2131 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2132 			dev->data->port_id);
2133 		rte_errno = errno;
2134 		goto error;
2135 	}
2136 	fdq->wq = mlx5_glue->create_wq
2137 		(priv->ctx,
2138 		 &(struct ibv_wq_init_attr){
2139 			.wq_type = IBV_WQT_RQ,
2140 			.max_wr = 1,
2141 			.max_sge = 1,
2142 			.pd = priv->pd,
2143 			.cq = fdq->cq,
2144 		 });
2145 	if (!fdq->wq) {
2146 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2147 			dev->data->port_id);
2148 		rte_errno = errno;
2149 		goto error;
2150 	}
2151 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2152 		(priv->ctx,
2153 		 &(struct ibv_rwq_ind_table_init_attr){
2154 			.log_ind_tbl_size = 0,
2155 			.ind_tbl = &fdq->wq,
2156 			.comp_mask = 0,
2157 		 });
2158 	if (!fdq->ind_table) {
2159 		DRV_LOG(WARNING,
2160 			"port %u cannot allocate indirection table for drop"
2161 			" queue",
2162 			dev->data->port_id);
2163 		rte_errno = errno;
2164 		goto error;
2165 	}
2166 	fdq->qp = mlx5_glue->create_qp_ex
2167 		(priv->ctx,
2168 		 &(struct ibv_qp_init_attr_ex){
2169 			.qp_type = IBV_QPT_RAW_PACKET,
2170 			.comp_mask =
2171 				IBV_QP_INIT_ATTR_PD |
2172 				IBV_QP_INIT_ATTR_IND_TABLE |
2173 				IBV_QP_INIT_ATTR_RX_HASH,
2174 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2175 				.rx_hash_function =
2176 					IBV_RX_HASH_FUNC_TOEPLITZ,
2177 				.rx_hash_key_len = rss_hash_default_key_len,
2178 				.rx_hash_key = rss_hash_default_key,
2179 				.rx_hash_fields_mask = 0,
2180 				},
2181 			.rwq_ind_tbl = fdq->ind_table,
2182 			.pd = priv->pd
2183 		 });
2184 	if (!fdq->qp) {
2185 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2186 			dev->data->port_id);
2187 		rte_errno = errno;
2188 		goto error;
2189 	}
2190 	priv->flow_drop_queue = fdq;
2191 	return 0;
2192 error:
2193 	if (fdq->qp)
2194 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2195 	if (fdq->ind_table)
2196 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2197 	if (fdq->wq)
2198 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2199 	if (fdq->cq)
2200 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2201 	if (fdq)
2202 		rte_free(fdq);
2203 	priv->flow_drop_queue = NULL;
2204 	return -rte_errno;
2205 }
2206 
2207 /**
2208  * Delete drop queue.
2209  *
2210  * @param dev
2211  *   Pointer to Ethernet device.
2212  */
2213 void
2214 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2215 {
2216 	struct priv *priv = dev->data->dev_private;
2217 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2218 
2219 	if (!fdq)
2220 		return;
2221 	if (fdq->qp)
2222 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2223 	if (fdq->ind_table)
2224 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2225 	if (fdq->wq)
2226 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2227 	if (fdq->cq)
2228 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2229 	rte_free(fdq);
2230 	priv->flow_drop_queue = NULL;
2231 }
2232 
2233 /**
2234  * Remove all flows.
2235  *
2236  * @param dev
2237  *   Pointer to Ethernet device.
2238  * @param list
2239  *   Pointer to a TAILQ flow list.
2240  */
2241 void
2242 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2243 {
2244 	struct priv *priv = dev->data->dev_private;
2245 	struct rte_flow *flow;
2246 
2247 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2248 		unsigned int i;
2249 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2250 
2251 		if (flow->drop) {
2252 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2253 				continue;
2254 			claim_zero(mlx5_glue->destroy_flow
2255 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2256 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2257 			DRV_LOG(DEBUG, "port %u flow %p removed",
2258 				dev->data->port_id, (void *)flow);
2259 			/* Next flow. */
2260 			continue;
2261 		}
2262 		/* Verify the flow has not already been cleaned. */
2263 		for (i = 0; i != hash_rxq_init_n; ++i) {
2264 			if (!flow->frxq[i].ibv_flow)
2265 				continue;
2266 			/*
2267 			 * Indirection table may be necessary to remove the
2268 			 * flags in the Rx queues.
2269 			 * This helps to speed-up the process by avoiding
2270 			 * another loop.
2271 			 */
2272 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2273 			break;
2274 		}
2275 		if (i == hash_rxq_init_n)
2276 			return;
2277 		if (flow->mark) {
2278 			assert(ind_tbl);
2279 			for (i = 0; i != ind_tbl->queues_n; ++i)
2280 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2281 		}
2282 		for (i = 0; i != hash_rxq_init_n; ++i) {
2283 			if (!flow->frxq[i].ibv_flow)
2284 				continue;
2285 			claim_zero(mlx5_glue->destroy_flow
2286 				   (flow->frxq[i].ibv_flow));
2287 			flow->frxq[i].ibv_flow = NULL;
2288 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2289 			flow->frxq[i].hrxq = NULL;
2290 		}
2291 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2292 			(void *)flow);
2293 	}
2294 }
2295 
2296 /**
2297  * Add all flows.
2298  *
2299  * @param dev
2300  *   Pointer to Ethernet device.
2301  * @param list
2302  *   Pointer to a TAILQ flow list.
2303  *
2304  * @return
2305  *   0 on success, a negative errno value otherwise and rte_errno is set.
2306  */
2307 int
2308 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2309 {
2310 	struct priv *priv = dev->data->dev_private;
2311 	struct rte_flow *flow;
2312 
2313 	TAILQ_FOREACH(flow, list, next) {
2314 		unsigned int i;
2315 
2316 		if (flow->drop) {
2317 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2318 				mlx5_glue->create_flow
2319 				(priv->flow_drop_queue->qp,
2320 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2321 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2322 				DRV_LOG(DEBUG,
2323 					"port %u flow %p cannot be applied",
2324 					dev->data->port_id, (void *)flow);
2325 				rte_errno = EINVAL;
2326 				return -rte_errno;
2327 			}
2328 			DRV_LOG(DEBUG, "port %u flow %p applied",
2329 				dev->data->port_id, (void *)flow);
2330 			/* Next flow. */
2331 			continue;
2332 		}
2333 		for (i = 0; i != hash_rxq_init_n; ++i) {
2334 			if (!flow->frxq[i].ibv_attr)
2335 				continue;
2336 			flow->frxq[i].hrxq =
2337 				mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2338 					      flow->rss_conf.rss_key_len,
2339 					      hash_rxq_init[i].hash_fields,
2340 					      (*flow->queues),
2341 					      flow->queues_n);
2342 			if (flow->frxq[i].hrxq)
2343 				goto flow_create;
2344 			flow->frxq[i].hrxq =
2345 				mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2346 					      flow->rss_conf.rss_key_len,
2347 					      hash_rxq_init[i].hash_fields,
2348 					      (*flow->queues),
2349 					      flow->queues_n);
2350 			if (!flow->frxq[i].hrxq) {
2351 				DRV_LOG(DEBUG,
2352 					"port %u flow %p cannot be applied",
2353 					dev->data->port_id, (void *)flow);
2354 				rte_errno = EINVAL;
2355 				return -rte_errno;
2356 			}
2357 flow_create:
2358 			flow->frxq[i].ibv_flow =
2359 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2360 						       flow->frxq[i].ibv_attr);
2361 			if (!flow->frxq[i].ibv_flow) {
2362 				DRV_LOG(DEBUG,
2363 					"port %u flow %p cannot be applied",
2364 					dev->data->port_id, (void *)flow);
2365 				rte_errno = EINVAL;
2366 				return -rte_errno;
2367 			}
2368 			DRV_LOG(DEBUG, "port %u flow %p applied",
2369 				dev->data->port_id, (void *)flow);
2370 		}
2371 		if (!flow->mark)
2372 			continue;
2373 		for (i = 0; i != flow->queues_n; ++i)
2374 			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2375 	}
2376 	return 0;
2377 }
2378 
2379 /**
2380  * Verify the flow list is empty
2381  *
2382  * @param dev
2383  *  Pointer to Ethernet device.
2384  *
2385  * @return the number of flows not released.
2386  */
2387 int
2388 mlx5_flow_verify(struct rte_eth_dev *dev)
2389 {
2390 	struct priv *priv = dev->data->dev_private;
2391 	struct rte_flow *flow;
2392 	int ret = 0;
2393 
2394 	TAILQ_FOREACH(flow, &priv->flows, next) {
2395 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2396 			dev->data->port_id, (void *)flow);
2397 		++ret;
2398 	}
2399 	return ret;
2400 }
2401 
2402 /**
2403  * Enable a control flow configured from the control plane.
2404  *
2405  * @param dev
2406  *   Pointer to Ethernet device.
2407  * @param eth_spec
2408  *   An Ethernet flow spec to apply.
2409  * @param eth_mask
2410  *   An Ethernet flow mask to apply.
2411  * @param vlan_spec
2412  *   A VLAN flow spec to apply.
2413  * @param vlan_mask
2414  *   A VLAN flow mask to apply.
2415  *
2416  * @return
2417  *   0 on success, a negative errno value otherwise and rte_errno is set.
2418  */
2419 int
2420 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2421 		    struct rte_flow_item_eth *eth_spec,
2422 		    struct rte_flow_item_eth *eth_mask,
2423 		    struct rte_flow_item_vlan *vlan_spec,
2424 		    struct rte_flow_item_vlan *vlan_mask)
2425 {
2426 	struct priv *priv = dev->data->dev_private;
2427 	const struct rte_flow_attr attr = {
2428 		.ingress = 1,
2429 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2430 	};
2431 	struct rte_flow_item items[] = {
2432 		{
2433 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2434 			.spec = eth_spec,
2435 			.last = NULL,
2436 			.mask = eth_mask,
2437 		},
2438 		{
2439 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2440 				RTE_FLOW_ITEM_TYPE_END,
2441 			.spec = vlan_spec,
2442 			.last = NULL,
2443 			.mask = vlan_mask,
2444 		},
2445 		{
2446 			.type = RTE_FLOW_ITEM_TYPE_END,
2447 		},
2448 	};
2449 	struct rte_flow_action actions[] = {
2450 		{
2451 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2452 		},
2453 		{
2454 			.type = RTE_FLOW_ACTION_TYPE_END,
2455 		},
2456 	};
2457 	struct rte_flow *flow;
2458 	struct rte_flow_error error;
2459 	unsigned int i;
2460 	union {
2461 		struct rte_flow_action_rss rss;
2462 		struct {
2463 			const struct rte_eth_rss_conf *rss_conf;
2464 			uint16_t num;
2465 			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2466 		} local;
2467 	} action_rss;
2468 
2469 	if (!priv->reta_idx_n) {
2470 		rte_errno = EINVAL;
2471 		return -rte_errno;
2472 	}
2473 	for (i = 0; i != priv->reta_idx_n; ++i)
2474 		action_rss.local.queue[i] = (*priv->reta_idx)[i];
2475 	action_rss.local.rss_conf = &priv->rss_conf;
2476 	action_rss.local.num = priv->reta_idx_n;
2477 	actions[0].conf = (const void *)&action_rss.rss;
2478 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2479 				     actions, &error);
2480 	if (!flow)
2481 		return -rte_errno;
2482 	return 0;
2483 }
2484 
2485 /**
2486  * Enable a flow control configured from the control plane.
2487  *
2488  * @param dev
2489  *   Pointer to Ethernet device.
2490  * @param eth_spec
2491  *   An Ethernet flow spec to apply.
2492  * @param eth_mask
2493  *   An Ethernet flow mask to apply.
2494  *
2495  * @return
2496  *   0 on success, a negative errno value otherwise and rte_errno is set.
2497  */
2498 int
2499 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2500 	       struct rte_flow_item_eth *eth_spec,
2501 	       struct rte_flow_item_eth *eth_mask)
2502 {
2503 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2504 }
2505 
2506 /**
2507  * Destroy a flow.
2508  *
2509  * @see rte_flow_destroy()
2510  * @see rte_flow_ops
2511  */
2512 int
2513 mlx5_flow_destroy(struct rte_eth_dev *dev,
2514 		  struct rte_flow *flow,
2515 		  struct rte_flow_error *error __rte_unused)
2516 {
2517 	struct priv *priv = dev->data->dev_private;
2518 
2519 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2520 	return 0;
2521 }
2522 
2523 /**
2524  * Destroy all flows.
2525  *
2526  * @see rte_flow_flush()
2527  * @see rte_flow_ops
2528  */
2529 int
2530 mlx5_flow_flush(struct rte_eth_dev *dev,
2531 		struct rte_flow_error *error __rte_unused)
2532 {
2533 	struct priv *priv = dev->data->dev_private;
2534 
2535 	mlx5_flow_list_flush(dev, &priv->flows);
2536 	return 0;
2537 }
2538 
2539 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2540 /**
2541  * Query flow counter.
2542  *
2543  * @param cs
2544  *   the counter set.
2545  * @param counter_value
2546  *   returned data from the counter.
2547  *
2548  * @return
2549  *   0 on success, a negative errno value otherwise and rte_errno is set.
2550  */
2551 static int
2552 mlx5_flow_query_count(struct ibv_counter_set *cs,
2553 		      struct mlx5_flow_counter_stats *counter_stats,
2554 		      struct rte_flow_query_count *query_count,
2555 		      struct rte_flow_error *error)
2556 {
2557 	uint64_t counters[2];
2558 	struct ibv_query_counter_set_attr query_cs_attr = {
2559 		.cs = cs,
2560 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2561 	};
2562 	struct ibv_counter_set_data query_out = {
2563 		.out = counters,
2564 		.outlen = 2 * sizeof(uint64_t),
2565 	};
2566 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2567 
2568 	if (err)
2569 		return rte_flow_error_set(error, err,
2570 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2571 					  NULL,
2572 					  "cannot read counter");
2573 	query_count->hits_set = 1;
2574 	query_count->bytes_set = 1;
2575 	query_count->hits = counters[0] - counter_stats->hits;
2576 	query_count->bytes = counters[1] - counter_stats->bytes;
2577 	if (query_count->reset) {
2578 		counter_stats->hits = counters[0];
2579 		counter_stats->bytes = counters[1];
2580 	}
2581 	return 0;
2582 }
2583 
2584 /**
2585  * Query a flows.
2586  *
2587  * @see rte_flow_query()
2588  * @see rte_flow_ops
2589  */
2590 int
2591 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2592 		struct rte_flow *flow,
2593 		enum rte_flow_action_type action __rte_unused,
2594 		void *data,
2595 		struct rte_flow_error *error)
2596 {
2597 	if (flow->cs) {
2598 		int ret;
2599 
2600 		ret = mlx5_flow_query_count(flow->cs,
2601 					    &flow->counter_stats,
2602 					    (struct rte_flow_query_count *)data,
2603 					    error);
2604 		if (ret)
2605 			return ret;
2606 	} else {
2607 		return rte_flow_error_set(error, EINVAL,
2608 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2609 					  NULL,
2610 					  "no counter found for flow");
2611 	}
2612 	return 0;
2613 }
2614 #endif
2615 
2616 /**
2617  * Isolated mode.
2618  *
2619  * @see rte_flow_isolate()
2620  * @see rte_flow_ops
2621  */
2622 int
2623 mlx5_flow_isolate(struct rte_eth_dev *dev,
2624 		  int enable,
2625 		  struct rte_flow_error *error)
2626 {
2627 	struct priv *priv = dev->data->dev_private;
2628 
2629 	if (dev->data->dev_started) {
2630 		rte_flow_error_set(error, EBUSY,
2631 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2632 				   NULL,
2633 				   "port must be stopped first");
2634 		return -rte_errno;
2635 	}
2636 	priv->isolated = !!enable;
2637 	if (enable)
2638 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2639 	else
2640 		priv->dev->dev_ops = &mlx5_dev_ops;
2641 	return 0;
2642 }
2643 
2644 /**
2645  * Convert a flow director filter to a generic flow.
2646  *
2647  * @param dev
2648  *   Pointer to Ethernet device.
2649  * @param fdir_filter
2650  *   Flow director filter to add.
2651  * @param attributes
2652  *   Generic flow parameters structure.
2653  *
2654  * @return
2655  *   0 on success, a negative errno value otherwise and rte_errno is set.
2656  */
2657 static int
2658 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2659 			 const struct rte_eth_fdir_filter *fdir_filter,
2660 			 struct mlx5_fdir *attributes)
2661 {
2662 	struct priv *priv = dev->data->dev_private;
2663 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2664 
2665 	/* Validate queue number. */
2666 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2667 		DRV_LOG(ERR, "port %u invalid queue number %d",
2668 			dev->data->port_id, fdir_filter->action.rx_queue);
2669 		rte_errno = EINVAL;
2670 		return -rte_errno;
2671 	}
2672 	attributes->attr.ingress = 1;
2673 	attributes->items[0] = (struct rte_flow_item) {
2674 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2675 		.spec = &attributes->l2,
2676 		.mask = &attributes->l2_mask,
2677 	};
2678 	switch (fdir_filter->action.behavior) {
2679 	case RTE_ETH_FDIR_ACCEPT:
2680 		attributes->actions[0] = (struct rte_flow_action){
2681 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2682 			.conf = &attributes->queue,
2683 		};
2684 		break;
2685 	case RTE_ETH_FDIR_REJECT:
2686 		attributes->actions[0] = (struct rte_flow_action){
2687 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2688 		};
2689 		break;
2690 	default:
2691 		DRV_LOG(ERR, "port %u invalid behavior %d",
2692 			dev->data->port_id,
2693 			fdir_filter->action.behavior);
2694 		rte_errno = ENOTSUP;
2695 		return -rte_errno;
2696 	}
2697 	attributes->queue.index = fdir_filter->action.rx_queue;
2698 	switch (fdir_filter->input.flow_type) {
2699 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2700 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2701 			.src_addr = input->flow.udp4_flow.ip.src_ip,
2702 			.dst_addr = input->flow.udp4_flow.ip.dst_ip,
2703 			.time_to_live = input->flow.udp4_flow.ip.ttl,
2704 			.type_of_service = input->flow.udp4_flow.ip.tos,
2705 			.next_proto_id = input->flow.udp4_flow.ip.proto,
2706 		};
2707 		attributes->l4.udp.hdr = (struct udp_hdr){
2708 			.src_port = input->flow.udp4_flow.src_port,
2709 			.dst_port = input->flow.udp4_flow.dst_port,
2710 		};
2711 		attributes->items[1] = (struct rte_flow_item){
2712 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2713 			.spec = &attributes->l3,
2714 			.mask = &attributes->l3,
2715 		};
2716 		attributes->items[2] = (struct rte_flow_item){
2717 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2718 			.spec = &attributes->l4,
2719 			.mask = &attributes->l4,
2720 		};
2721 		break;
2722 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2723 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2724 			.src_addr = input->flow.tcp4_flow.ip.src_ip,
2725 			.dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2726 			.time_to_live = input->flow.tcp4_flow.ip.ttl,
2727 			.type_of_service = input->flow.tcp4_flow.ip.tos,
2728 			.next_proto_id = input->flow.tcp4_flow.ip.proto,
2729 		};
2730 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2731 			.src_port = input->flow.tcp4_flow.src_port,
2732 			.dst_port = input->flow.tcp4_flow.dst_port,
2733 		};
2734 		attributes->items[1] = (struct rte_flow_item){
2735 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2736 			.spec = &attributes->l3,
2737 			.mask = &attributes->l3,
2738 		};
2739 		attributes->items[2] = (struct rte_flow_item){
2740 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2741 			.spec = &attributes->l4,
2742 			.mask = &attributes->l4,
2743 		};
2744 		break;
2745 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2746 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2747 			.src_addr = input->flow.ip4_flow.src_ip,
2748 			.dst_addr = input->flow.ip4_flow.dst_ip,
2749 			.time_to_live = input->flow.ip4_flow.ttl,
2750 			.type_of_service = input->flow.ip4_flow.tos,
2751 			.next_proto_id = input->flow.ip4_flow.proto,
2752 		};
2753 		attributes->items[1] = (struct rte_flow_item){
2754 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2755 			.spec = &attributes->l3,
2756 			.mask = &attributes->l3,
2757 		};
2758 		break;
2759 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2760 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2761 			.hop_limits = input->flow.udp6_flow.ip.hop_limits,
2762 			.proto = input->flow.udp6_flow.ip.proto,
2763 		};
2764 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2765 		       input->flow.udp6_flow.ip.src_ip,
2766 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2767 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2768 		       input->flow.udp6_flow.ip.dst_ip,
2769 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2770 		attributes->l4.udp.hdr = (struct udp_hdr){
2771 			.src_port = input->flow.udp6_flow.src_port,
2772 			.dst_port = input->flow.udp6_flow.dst_port,
2773 		};
2774 		attributes->items[1] = (struct rte_flow_item){
2775 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2776 			.spec = &attributes->l3,
2777 			.mask = &attributes->l3,
2778 		};
2779 		attributes->items[2] = (struct rte_flow_item){
2780 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2781 			.spec = &attributes->l4,
2782 			.mask = &attributes->l4,
2783 		};
2784 		break;
2785 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2786 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2787 			.hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2788 			.proto = input->flow.tcp6_flow.ip.proto,
2789 		};
2790 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2791 		       input->flow.tcp6_flow.ip.src_ip,
2792 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2793 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2794 		       input->flow.tcp6_flow.ip.dst_ip,
2795 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2796 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2797 			.src_port = input->flow.tcp6_flow.src_port,
2798 			.dst_port = input->flow.tcp6_flow.dst_port,
2799 		};
2800 		attributes->items[1] = (struct rte_flow_item){
2801 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2802 			.spec = &attributes->l3,
2803 			.mask = &attributes->l3,
2804 		};
2805 		attributes->items[2] = (struct rte_flow_item){
2806 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2807 			.spec = &attributes->l4,
2808 			.mask = &attributes->l4,
2809 		};
2810 		break;
2811 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2812 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2813 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2814 			.proto = input->flow.ipv6_flow.proto,
2815 		};
2816 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2817 		       input->flow.ipv6_flow.src_ip,
2818 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2819 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2820 		       input->flow.ipv6_flow.dst_ip,
2821 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2822 		attributes->items[1] = (struct rte_flow_item){
2823 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2824 			.spec = &attributes->l3,
2825 			.mask = &attributes->l3,
2826 		};
2827 		break;
2828 	default:
2829 		DRV_LOG(ERR, "port %u invalid flow type%d",
2830 			dev->data->port_id, fdir_filter->input.flow_type);
2831 		rte_errno = ENOTSUP;
2832 		return -rte_errno;
2833 	}
2834 	return 0;
2835 }
2836 
2837 /**
2838  * Add new flow director filter and store it in list.
2839  *
2840  * @param dev
2841  *   Pointer to Ethernet device.
2842  * @param fdir_filter
2843  *   Flow director filter to add.
2844  *
2845  * @return
2846  *   0 on success, a negative errno value otherwise and rte_errno is set.
2847  */
2848 static int
2849 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2850 		     const struct rte_eth_fdir_filter *fdir_filter)
2851 {
2852 	struct priv *priv = dev->data->dev_private;
2853 	struct mlx5_fdir attributes = {
2854 		.attr.group = 0,
2855 		.l2_mask = {
2856 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2857 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2858 			.type = 0,
2859 		},
2860 	};
2861 	struct mlx5_flow_parse parser = {
2862 		.layer = HASH_RXQ_ETH,
2863 	};
2864 	struct rte_flow_error error;
2865 	struct rte_flow *flow;
2866 	int ret;
2867 
2868 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2869 	if (ret)
2870 		return ret;
2871 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2872 				attributes.actions, &error, &parser);
2873 	if (ret)
2874 		return ret;
2875 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2876 				     attributes.items, attributes.actions,
2877 				     &error);
2878 	if (flow) {
2879 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2880 			(void *)flow);
2881 		return 0;
2882 	}
2883 	return -rte_errno;
2884 }
2885 
2886 /**
2887  * Delete specific filter.
2888  *
2889  * @param dev
2890  *   Pointer to Ethernet device.
2891  * @param fdir_filter
2892  *   Filter to be deleted.
2893  *
2894  * @return
2895  *   0 on success, a negative errno value otherwise and rte_errno is set.
2896  */
2897 static int
2898 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2899 			const struct rte_eth_fdir_filter *fdir_filter)
2900 {
2901 	struct priv *priv = dev->data->dev_private;
2902 	struct mlx5_fdir attributes = {
2903 		.attr.group = 0,
2904 	};
2905 	struct mlx5_flow_parse parser = {
2906 		.create = 1,
2907 		.layer = HASH_RXQ_ETH,
2908 	};
2909 	struct rte_flow_error error;
2910 	struct rte_flow *flow;
2911 	unsigned int i;
2912 	int ret;
2913 
2914 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2915 	if (ret)
2916 		return ret;
2917 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2918 				attributes.actions, &error, &parser);
2919 	if (ret)
2920 		goto exit;
2921 	/*
2922 	 * Special case for drop action which is only set in the
2923 	 * specifications when the flow is created.  In this situation the
2924 	 * drop specification is missing.
2925 	 */
2926 	if (parser.drop) {
2927 		struct ibv_flow_spec_action_drop *drop;
2928 
2929 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2930 				parser.queue[HASH_RXQ_ETH].offset);
2931 		*drop = (struct ibv_flow_spec_action_drop){
2932 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2933 			.size = sizeof(struct ibv_flow_spec_action_drop),
2934 		};
2935 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2936 	}
2937 	TAILQ_FOREACH(flow, &priv->flows, next) {
2938 		struct ibv_flow_attr *attr;
2939 		struct ibv_spec_header *attr_h;
2940 		void *spec;
2941 		struct ibv_flow_attr *flow_attr;
2942 		struct ibv_spec_header *flow_h;
2943 		void *flow_spec;
2944 		unsigned int specs_n;
2945 
2946 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2947 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2948 		/* Compare first the attributes. */
2949 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2950 			continue;
2951 		if (attr->num_of_specs == 0)
2952 			continue;
2953 		spec = (void *)((uintptr_t)attr +
2954 				sizeof(struct ibv_flow_attr));
2955 		flow_spec = (void *)((uintptr_t)flow_attr +
2956 				     sizeof(struct ibv_flow_attr));
2957 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2958 		for (i = 0; i != specs_n; ++i) {
2959 			attr_h = spec;
2960 			flow_h = flow_spec;
2961 			if (memcmp(spec, flow_spec,
2962 				   RTE_MIN(attr_h->size, flow_h->size)))
2963 				goto wrong_flow;
2964 			spec = (void *)((uintptr_t)spec + attr_h->size);
2965 			flow_spec = (void *)((uintptr_t)flow_spec +
2966 					     flow_h->size);
2967 		}
2968 		/* At this point, the flow match. */
2969 		break;
2970 wrong_flow:
2971 		/* The flow does not match. */
2972 		continue;
2973 	}
2974 	ret = rte_errno; /* Save rte_errno before cleanup. */
2975 	if (flow)
2976 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
2977 exit:
2978 	for (i = 0; i != hash_rxq_init_n; ++i) {
2979 		if (parser.queue[i].ibv_attr)
2980 			rte_free(parser.queue[i].ibv_attr);
2981 	}
2982 	rte_errno = ret; /* Restore rte_errno. */
2983 	return -rte_errno;
2984 }
2985 
2986 /**
2987  * Update queue for specific filter.
2988  *
2989  * @param dev
2990  *   Pointer to Ethernet device.
2991  * @param fdir_filter
2992  *   Filter to be updated.
2993  *
2994  * @return
2995  *   0 on success, a negative errno value otherwise and rte_errno is set.
2996  */
2997 static int
2998 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2999 			const struct rte_eth_fdir_filter *fdir_filter)
3000 {
3001 	int ret;
3002 
3003 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3004 	if (ret)
3005 		return ret;
3006 	return mlx5_fdir_filter_add(dev, fdir_filter);
3007 }
3008 
3009 /**
3010  * Flush all filters.
3011  *
3012  * @param dev
3013  *   Pointer to Ethernet device.
3014  */
3015 static void
3016 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3017 {
3018 	struct priv *priv = dev->data->dev_private;
3019 
3020 	mlx5_flow_list_flush(dev, &priv->flows);
3021 }
3022 
3023 /**
3024  * Get flow director information.
3025  *
3026  * @param dev
3027  *   Pointer to Ethernet device.
3028  * @param[out] fdir_info
3029  *   Resulting flow director information.
3030  */
3031 static void
3032 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3033 {
3034 	struct priv *priv = dev->data->dev_private;
3035 	struct rte_eth_fdir_masks *mask =
3036 		&priv->dev->data->dev_conf.fdir_conf.mask;
3037 
3038 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3039 	fdir_info->guarant_spc = 0;
3040 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3041 	fdir_info->max_flexpayload = 0;
3042 	fdir_info->flow_types_mask[0] = 0;
3043 	fdir_info->flex_payload_unit = 0;
3044 	fdir_info->max_flex_payload_segment_num = 0;
3045 	fdir_info->flex_payload_limit = 0;
3046 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3047 }
3048 
3049 /**
3050  * Deal with flow director operations.
3051  *
3052  * @param dev
3053  *   Pointer to Ethernet device.
3054  * @param filter_op
3055  *   Operation to perform.
3056  * @param arg
3057  *   Pointer to operation-specific structure.
3058  *
3059  * @return
3060  *   0 on success, a negative errno value otherwise and rte_errno is set.
3061  */
3062 static int
3063 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3064 		    void *arg)
3065 {
3066 	struct priv *priv = dev->data->dev_private;
3067 	enum rte_fdir_mode fdir_mode =
3068 		priv->dev->data->dev_conf.fdir_conf.mode;
3069 
3070 	if (filter_op == RTE_ETH_FILTER_NOP)
3071 		return 0;
3072 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3073 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3074 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3075 			dev->data->port_id, fdir_mode);
3076 		rte_errno = EINVAL;
3077 		return -rte_errno;
3078 	}
3079 	switch (filter_op) {
3080 	case RTE_ETH_FILTER_ADD:
3081 		return mlx5_fdir_filter_add(dev, arg);
3082 	case RTE_ETH_FILTER_UPDATE:
3083 		return mlx5_fdir_filter_update(dev, arg);
3084 	case RTE_ETH_FILTER_DELETE:
3085 		return mlx5_fdir_filter_delete(dev, arg);
3086 	case RTE_ETH_FILTER_FLUSH:
3087 		mlx5_fdir_filter_flush(dev);
3088 		break;
3089 	case RTE_ETH_FILTER_INFO:
3090 		mlx5_fdir_info_get(dev, arg);
3091 		break;
3092 	default:
3093 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3094 			dev->data->port_id, filter_op);
3095 		rte_errno = EINVAL;
3096 		return -rte_errno;
3097 	}
3098 	return 0;
3099 }
3100 
3101 /**
3102  * Manage filter operations.
3103  *
3104  * @param dev
3105  *   Pointer to Ethernet device structure.
3106  * @param filter_type
3107  *   Filter type.
3108  * @param filter_op
3109  *   Operation to perform.
3110  * @param arg
3111  *   Pointer to operation-specific structure.
3112  *
3113  * @return
3114  *   0 on success, a negative errno value otherwise and rte_errno is set.
3115  */
3116 int
3117 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3118 		     enum rte_filter_type filter_type,
3119 		     enum rte_filter_op filter_op,
3120 		     void *arg)
3121 {
3122 	switch (filter_type) {
3123 	case RTE_ETH_FILTER_GENERIC:
3124 		if (filter_op != RTE_ETH_FILTER_GET) {
3125 			rte_errno = EINVAL;
3126 			return -rte_errno;
3127 		}
3128 		*(const void **)arg = &mlx5_flow_ops;
3129 		return 0;
3130 	case RTE_ETH_FILTER_FDIR:
3131 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3132 	default:
3133 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3134 			dev->data->port_id, filter_type);
3135 		rte_errno = ENOTSUP;
3136 		return -rte_errno;
3137 	}
3138 	return 0;
3139 }
3140