xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision e58638c32411b7ae60ed4dea131728faee962327)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Define minimal priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 4
36 
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 
41 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
42 struct ibv_flow_spec_counter_action {
43 	int dummy;
44 };
45 #endif
46 
47 /* Dev ops structure defined in mlx5.c */
48 extern const struct eth_dev_ops mlx5_dev_ops;
49 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
50 
51 /** Structure give to the conversion functions. */
52 struct mlx5_flow_data {
53 	struct mlx5_flow_parse *parser; /** Parser context. */
54 	struct rte_flow_error *error; /** Error context. */
55 };
56 
57 static int
58 mlx5_flow_create_eth(const struct rte_flow_item *item,
59 		     const void *default_mask,
60 		     struct mlx5_flow_data *data);
61 
62 static int
63 mlx5_flow_create_vlan(const struct rte_flow_item *item,
64 		      const void *default_mask,
65 		      struct mlx5_flow_data *data);
66 
67 static int
68 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
69 		      const void *default_mask,
70 		      struct mlx5_flow_data *data);
71 
72 static int
73 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
74 		      const void *default_mask,
75 		      struct mlx5_flow_data *data);
76 
77 static int
78 mlx5_flow_create_udp(const struct rte_flow_item *item,
79 		     const void *default_mask,
80 		     struct mlx5_flow_data *data);
81 
82 static int
83 mlx5_flow_create_tcp(const struct rte_flow_item *item,
84 		     const void *default_mask,
85 		     struct mlx5_flow_data *data);
86 
87 static int
88 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
89 		       const void *default_mask,
90 		       struct mlx5_flow_data *data);
91 
92 struct mlx5_flow_parse;
93 
94 static void
95 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
96 		      unsigned int size);
97 
98 static int
99 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
100 
101 static int
102 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
103 
104 /* Hash RX queue types. */
105 enum hash_rxq_type {
106 	HASH_RXQ_TCPV4,
107 	HASH_RXQ_UDPV4,
108 	HASH_RXQ_IPV4,
109 	HASH_RXQ_TCPV6,
110 	HASH_RXQ_UDPV6,
111 	HASH_RXQ_IPV6,
112 	HASH_RXQ_ETH,
113 };
114 
115 /* Initialization data for hash RX queue. */
116 struct hash_rxq_init {
117 	uint64_t hash_fields; /* Fields that participate in the hash. */
118 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
119 	unsigned int flow_priority; /* Flow priority to use. */
120 	unsigned int ip_version; /* Internet protocol. */
121 };
122 
123 /* Initialization data for hash RX queues. */
124 const struct hash_rxq_init hash_rxq_init[] = {
125 	[HASH_RXQ_TCPV4] = {
126 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
127 				IBV_RX_HASH_DST_IPV4 |
128 				IBV_RX_HASH_SRC_PORT_TCP |
129 				IBV_RX_HASH_DST_PORT_TCP),
130 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
131 		.flow_priority = 1,
132 		.ip_version = MLX5_IPV4,
133 	},
134 	[HASH_RXQ_UDPV4] = {
135 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
136 				IBV_RX_HASH_DST_IPV4 |
137 				IBV_RX_HASH_SRC_PORT_UDP |
138 				IBV_RX_HASH_DST_PORT_UDP),
139 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
140 		.flow_priority = 1,
141 		.ip_version = MLX5_IPV4,
142 	},
143 	[HASH_RXQ_IPV4] = {
144 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 				IBV_RX_HASH_DST_IPV4),
146 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
147 				ETH_RSS_FRAG_IPV4),
148 		.flow_priority = 2,
149 		.ip_version = MLX5_IPV4,
150 	},
151 	[HASH_RXQ_TCPV6] = {
152 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
153 				IBV_RX_HASH_DST_IPV6 |
154 				IBV_RX_HASH_SRC_PORT_TCP |
155 				IBV_RX_HASH_DST_PORT_TCP),
156 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
157 		.flow_priority = 1,
158 		.ip_version = MLX5_IPV6,
159 	},
160 	[HASH_RXQ_UDPV6] = {
161 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
162 				IBV_RX_HASH_DST_IPV6 |
163 				IBV_RX_HASH_SRC_PORT_UDP |
164 				IBV_RX_HASH_DST_PORT_UDP),
165 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
166 		.flow_priority = 1,
167 		.ip_version = MLX5_IPV6,
168 	},
169 	[HASH_RXQ_IPV6] = {
170 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 				IBV_RX_HASH_DST_IPV6),
172 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
173 				ETH_RSS_FRAG_IPV6),
174 		.flow_priority = 2,
175 		.ip_version = MLX5_IPV6,
176 	},
177 	[HASH_RXQ_ETH] = {
178 		.hash_fields = 0,
179 		.dpdk_rss_hf = 0,
180 		.flow_priority = 3,
181 	},
182 };
183 
184 /* Number of entries in hash_rxq_init[]. */
185 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
186 
187 /** Structure for holding counter stats. */
188 struct mlx5_flow_counter_stats {
189 	uint64_t hits; /**< Number of packets matched by the rule. */
190 	uint64_t bytes; /**< Number of bytes matched by the rule. */
191 };
192 
193 /** Structure for Drop queue. */
194 struct mlx5_hrxq_drop {
195 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
196 	struct ibv_qp *qp; /**< Verbs queue pair. */
197 	struct ibv_wq *wq; /**< Verbs work queue. */
198 	struct ibv_cq *cq; /**< Verbs completion queue. */
199 };
200 
201 /* Flows structures. */
202 struct mlx5_flow {
203 	uint64_t hash_fields; /**< Fields that participate in the hash. */
204 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
205 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
207 };
208 
209 /* Drop flows structures. */
210 struct mlx5_flow_drop {
211 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
212 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 };
214 
215 struct rte_flow {
216 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
217 	uint32_t mark:1; /**< Set if the flow is marked. */
218 	uint32_t drop:1; /**< Drop queue. */
219 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
220 	uint16_t (*queues)[]; /**< Queues indexes to use. */
221 	uint8_t rss_key[40]; /**< copy of the RSS key. */
222 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
223 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
224 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
225 	/**< Flow with Rx queue. */
226 };
227 
228 /** Static initializer for items. */
229 #define ITEMS(...) \
230 	(const enum rte_flow_item_type []){ \
231 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232 	}
233 
234 /** Structure to generate a simple graph of layers supported by the NIC. */
235 struct mlx5_flow_items {
236 	/** List of possible actions for these items. */
237 	const enum rte_flow_action_type *const actions;
238 	/** Bit-masks corresponding to the possibilities for the item. */
239 	const void *mask;
240 	/**
241 	 * Default bit-masks to use when item->mask is not provided. When
242 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
243 	 * used instead.
244 	 */
245 	const void *default_mask;
246 	/** Bit-masks size in bytes. */
247 	const unsigned int mask_sz;
248 	/**
249 	 * Conversion function from rte_flow to NIC specific flow.
250 	 *
251 	 * @param item
252 	 *   rte_flow item to convert.
253 	 * @param default_mask
254 	 *   Default bit-masks to use when item->mask is not provided.
255 	 * @param data
256 	 *   Internal structure to store the conversion.
257 	 *
258 	 * @return
259 	 *   0 on success, a negative errno value otherwise and rte_errno is
260 	 *   set.
261 	 */
262 	int (*convert)(const struct rte_flow_item *item,
263 		       const void *default_mask,
264 		       struct mlx5_flow_data *data);
265 	/** Size in bytes of the destination structure. */
266 	const unsigned int dst_sz;
267 	/** List of possible following items.  */
268 	const enum rte_flow_item_type *const items;
269 };
270 
271 /** Valid action for this PMD. */
272 static const enum rte_flow_action_type valid_actions[] = {
273 	RTE_FLOW_ACTION_TYPE_DROP,
274 	RTE_FLOW_ACTION_TYPE_QUEUE,
275 	RTE_FLOW_ACTION_TYPE_MARK,
276 	RTE_FLOW_ACTION_TYPE_FLAG,
277 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
278 	RTE_FLOW_ACTION_TYPE_COUNT,
279 #endif
280 	RTE_FLOW_ACTION_TYPE_END,
281 };
282 
283 /** Graph of supported items and associated actions. */
284 static const struct mlx5_flow_items mlx5_flow_items[] = {
285 	[RTE_FLOW_ITEM_TYPE_END] = {
286 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
287 			       RTE_FLOW_ITEM_TYPE_VXLAN),
288 	},
289 	[RTE_FLOW_ITEM_TYPE_ETH] = {
290 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
291 			       RTE_FLOW_ITEM_TYPE_IPV4,
292 			       RTE_FLOW_ITEM_TYPE_IPV6),
293 		.actions = valid_actions,
294 		.mask = &(const struct rte_flow_item_eth){
295 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
297 			.type = -1,
298 		},
299 		.default_mask = &rte_flow_item_eth_mask,
300 		.mask_sz = sizeof(struct rte_flow_item_eth),
301 		.convert = mlx5_flow_create_eth,
302 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
303 	},
304 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
305 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
306 			       RTE_FLOW_ITEM_TYPE_IPV6),
307 		.actions = valid_actions,
308 		.mask = &(const struct rte_flow_item_vlan){
309 			.tci = -1,
310 			.inner_type = -1,
311 		},
312 		.default_mask = &rte_flow_item_vlan_mask,
313 		.mask_sz = sizeof(struct rte_flow_item_vlan),
314 		.convert = mlx5_flow_create_vlan,
315 		.dst_sz = 0,
316 	},
317 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
318 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
319 			       RTE_FLOW_ITEM_TYPE_TCP),
320 		.actions = valid_actions,
321 		.mask = &(const struct rte_flow_item_ipv4){
322 			.hdr = {
323 				.src_addr = -1,
324 				.dst_addr = -1,
325 				.type_of_service = -1,
326 				.next_proto_id = -1,
327 			},
328 		},
329 		.default_mask = &rte_flow_item_ipv4_mask,
330 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
331 		.convert = mlx5_flow_create_ipv4,
332 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
333 	},
334 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
335 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
336 			       RTE_FLOW_ITEM_TYPE_TCP),
337 		.actions = valid_actions,
338 		.mask = &(const struct rte_flow_item_ipv6){
339 			.hdr = {
340 				.src_addr = {
341 					0xff, 0xff, 0xff, 0xff,
342 					0xff, 0xff, 0xff, 0xff,
343 					0xff, 0xff, 0xff, 0xff,
344 					0xff, 0xff, 0xff, 0xff,
345 				},
346 				.dst_addr = {
347 					0xff, 0xff, 0xff, 0xff,
348 					0xff, 0xff, 0xff, 0xff,
349 					0xff, 0xff, 0xff, 0xff,
350 					0xff, 0xff, 0xff, 0xff,
351 				},
352 				.vtc_flow = -1,
353 				.proto = -1,
354 				.hop_limits = -1,
355 			},
356 		},
357 		.default_mask = &rte_flow_item_ipv6_mask,
358 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
359 		.convert = mlx5_flow_create_ipv6,
360 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
361 	},
362 	[RTE_FLOW_ITEM_TYPE_UDP] = {
363 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
364 		.actions = valid_actions,
365 		.mask = &(const struct rte_flow_item_udp){
366 			.hdr = {
367 				.src_port = -1,
368 				.dst_port = -1,
369 			},
370 		},
371 		.default_mask = &rte_flow_item_udp_mask,
372 		.mask_sz = sizeof(struct rte_flow_item_udp),
373 		.convert = mlx5_flow_create_udp,
374 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
375 	},
376 	[RTE_FLOW_ITEM_TYPE_TCP] = {
377 		.actions = valid_actions,
378 		.mask = &(const struct rte_flow_item_tcp){
379 			.hdr = {
380 				.src_port = -1,
381 				.dst_port = -1,
382 			},
383 		},
384 		.default_mask = &rte_flow_item_tcp_mask,
385 		.mask_sz = sizeof(struct rte_flow_item_tcp),
386 		.convert = mlx5_flow_create_tcp,
387 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
388 	},
389 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
390 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
391 		.actions = valid_actions,
392 		.mask = &(const struct rte_flow_item_vxlan){
393 			.vni = "\xff\xff\xff",
394 		},
395 		.default_mask = &rte_flow_item_vxlan_mask,
396 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
397 		.convert = mlx5_flow_create_vxlan,
398 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
399 	},
400 };
401 
402 /** Structure to pass to the conversion function. */
403 struct mlx5_flow_parse {
404 	uint32_t inner; /**< Set once VXLAN is encountered. */
405 	uint32_t create:1;
406 	/**< Whether resources should remain after a validate. */
407 	uint32_t drop:1; /**< Target is a drop queue. */
408 	uint32_t mark:1; /**< Mark is present in the flow. */
409 	uint32_t count:1; /**< Count is present in the flow. */
410 	uint32_t mark_id; /**< Mark identifier. */
411 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
412 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
413 	uint8_t rss_key[40]; /**< copy of the RSS key. */
414 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
415 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
416 	struct {
417 		struct ibv_flow_attr *ibv_attr;
418 		/**< Pointer to Verbs attributes. */
419 		unsigned int offset;
420 		/**< Current position or total size of the attribute. */
421 	} queue[RTE_DIM(hash_rxq_init)];
422 };
423 
424 static const struct rte_flow_ops mlx5_flow_ops = {
425 	.validate = mlx5_flow_validate,
426 	.create = mlx5_flow_create,
427 	.destroy = mlx5_flow_destroy,
428 	.flush = mlx5_flow_flush,
429 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
430 	.query = mlx5_flow_query,
431 #else
432 	.query = NULL,
433 #endif
434 	.isolate = mlx5_flow_isolate,
435 };
436 
437 /* Convert FDIR request to Generic flow. */
438 struct mlx5_fdir {
439 	struct rte_flow_attr attr;
440 	struct rte_flow_action actions[2];
441 	struct rte_flow_item items[4];
442 	struct rte_flow_item_eth l2;
443 	struct rte_flow_item_eth l2_mask;
444 	union {
445 		struct rte_flow_item_ipv4 ipv4;
446 		struct rte_flow_item_ipv6 ipv6;
447 	} l3;
448 	union {
449 		struct rte_flow_item_ipv4 ipv4;
450 		struct rte_flow_item_ipv6 ipv6;
451 	} l3_mask;
452 	union {
453 		struct rte_flow_item_udp udp;
454 		struct rte_flow_item_tcp tcp;
455 	} l4;
456 	union {
457 		struct rte_flow_item_udp udp;
458 		struct rte_flow_item_tcp tcp;
459 	} l4_mask;
460 	struct rte_flow_action_queue queue;
461 };
462 
463 /* Verbs specification header. */
464 struct ibv_spec_header {
465 	enum ibv_flow_spec_type type;
466 	uint16_t size;
467 };
468 
469 /**
470  * Check support for a given item.
471  *
472  * @param item[in]
473  *   Item specification.
474  * @param mask[in]
475  *   Bit-masks covering supported fields to compare with spec, last and mask in
476  *   \item.
477  * @param size
478  *   Bit-Mask size in bytes.
479  *
480  * @return
481  *   0 on success, a negative errno value otherwise and rte_errno is set.
482  */
483 static int
484 mlx5_flow_item_validate(const struct rte_flow_item *item,
485 			const uint8_t *mask, unsigned int size)
486 {
487 	if (!item->spec && (item->mask || item->last)) {
488 		rte_errno = EINVAL;
489 		return -rte_errno;
490 	}
491 	if (item->spec && !item->mask) {
492 		unsigned int i;
493 		const uint8_t *spec = item->spec;
494 
495 		for (i = 0; i < size; ++i)
496 			if ((spec[i] | mask[i]) != mask[i]) {
497 				rte_errno = EINVAL;
498 				return -rte_errno;
499 			}
500 	}
501 	if (item->last && !item->mask) {
502 		unsigned int i;
503 		const uint8_t *spec = item->last;
504 
505 		for (i = 0; i < size; ++i)
506 			if ((spec[i] | mask[i]) != mask[i]) {
507 				rte_errno = EINVAL;
508 				return -rte_errno;
509 			}
510 	}
511 	if (item->mask) {
512 		unsigned int i;
513 		const uint8_t *spec = item->spec;
514 
515 		for (i = 0; i < size; ++i)
516 			if ((spec[i] | mask[i]) != mask[i]) {
517 				rte_errno = EINVAL;
518 				return -rte_errno;
519 			}
520 	}
521 	if (item->spec && item->last) {
522 		uint8_t spec[size];
523 		uint8_t last[size];
524 		const uint8_t *apply = mask;
525 		unsigned int i;
526 		int ret;
527 
528 		if (item->mask)
529 			apply = item->mask;
530 		for (i = 0; i < size; ++i) {
531 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
532 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
533 		}
534 		ret = memcmp(spec, last, size);
535 		if (ret != 0) {
536 			rte_errno = EINVAL;
537 			return -rte_errno;
538 		}
539 	}
540 	return 0;
541 }
542 
543 /**
544  * Extract attribute to the parser.
545  *
546  * @param[in] attr
547  *   Flow rule attributes.
548  * @param[out] error
549  *   Perform verbose error reporting if not NULL.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
556 			     struct rte_flow_error *error)
557 {
558 	if (attr->group) {
559 		rte_flow_error_set(error, ENOTSUP,
560 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
561 				   NULL,
562 				   "groups are not supported");
563 		return -rte_errno;
564 	}
565 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
566 		rte_flow_error_set(error, ENOTSUP,
567 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
568 				   NULL,
569 				   "priorities are not supported");
570 		return -rte_errno;
571 	}
572 	if (attr->egress) {
573 		rte_flow_error_set(error, ENOTSUP,
574 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
575 				   NULL,
576 				   "egress is not supported");
577 		return -rte_errno;
578 	}
579 	if (!attr->ingress) {
580 		rte_flow_error_set(error, ENOTSUP,
581 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
582 				   NULL,
583 				   "only ingress is supported");
584 		return -rte_errno;
585 	}
586 	return 0;
587 }
588 
589 /**
590  * Extract actions request to the parser.
591  *
592  * @param dev
593  *   Pointer to Ethernet device.
594  * @param[in] actions
595  *   Associated actions (list terminated by the END action).
596  * @param[out] error
597  *   Perform verbose error reporting if not NULL.
598  * @param[in, out] parser
599  *   Internal parser structure.
600  *
601  * @return
602  *   0 on success, a negative errno value otherwise and rte_errno is set.
603  */
604 static int
605 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
606 			  const struct rte_flow_action actions[],
607 			  struct rte_flow_error *error,
608 			  struct mlx5_flow_parse *parser)
609 {
610 	enum { FATE = 1, MARK = 2, COUNT = 4, };
611 	uint32_t overlap = 0;
612 	struct priv *priv = dev->data->dev_private;
613 
614 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
615 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
616 			continue;
617 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
618 			if (overlap & FATE)
619 				goto exit_action_overlap;
620 			overlap |= FATE;
621 			parser->drop = 1;
622 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
623 			const struct rte_flow_action_queue *queue =
624 				(const struct rte_flow_action_queue *)
625 				actions->conf;
626 
627 			if (overlap & FATE)
628 				goto exit_action_overlap;
629 			overlap |= FATE;
630 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
631 				goto exit_action_not_supported;
632 			parser->queues[0] = queue->index;
633 			parser->rss_conf = (struct rte_flow_action_rss){
634 				.queue_num = 1,
635 				.queue = parser->queues,
636 			};
637 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
638 			const struct rte_flow_action_rss *rss =
639 				(const struct rte_flow_action_rss *)
640 				actions->conf;
641 			const uint8_t *rss_key;
642 			uint32_t rss_key_len;
643 			uint16_t n;
644 
645 			if (overlap & FATE)
646 				goto exit_action_overlap;
647 			overlap |= FATE;
648 			if (rss->func &&
649 			    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
650 				rte_flow_error_set(error, EINVAL,
651 						   RTE_FLOW_ERROR_TYPE_ACTION,
652 						   actions,
653 						   "the only supported RSS hash"
654 						   " function is Toeplitz");
655 				return -rte_errno;
656 			}
657 			if (rss->level) {
658 				rte_flow_error_set(error, EINVAL,
659 						   RTE_FLOW_ERROR_TYPE_ACTION,
660 						   actions,
661 						   "a nonzero RSS encapsulation"
662 						   " level is not supported");
663 				return -rte_errno;
664 			}
665 			if (rss->types & MLX5_RSS_HF_MASK) {
666 				rte_flow_error_set(error, EINVAL,
667 						   RTE_FLOW_ERROR_TYPE_ACTION,
668 						   actions,
669 						   "unsupported RSS type"
670 						   " requested");
671 				return -rte_errno;
672 			}
673 			if (rss->key_len) {
674 				rss_key_len = rss->key_len;
675 				rss_key = rss->key;
676 			} else {
677 				rss_key_len = rss_hash_default_key_len;
678 				rss_key = rss_hash_default_key;
679 			}
680 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
681 				rte_flow_error_set(error, EINVAL,
682 						   RTE_FLOW_ERROR_TYPE_ACTION,
683 						   actions,
684 						   "RSS hash key must be"
685 						   " exactly 40 bytes long");
686 				return -rte_errno;
687 			}
688 			if (!rss->queue_num) {
689 				rte_flow_error_set(error, EINVAL,
690 						   RTE_FLOW_ERROR_TYPE_ACTION,
691 						   actions,
692 						   "no valid queues");
693 				return -rte_errno;
694 			}
695 			if (rss->queue_num > RTE_DIM(parser->queues)) {
696 				rte_flow_error_set(error, EINVAL,
697 						   RTE_FLOW_ERROR_TYPE_ACTION,
698 						   actions,
699 						   "too many queues for RSS"
700 						   " context");
701 				return -rte_errno;
702 			}
703 			for (n = 0; n < rss->queue_num; ++n) {
704 				if (rss->queue[n] >= priv->rxqs_n) {
705 					rte_flow_error_set(error, EINVAL,
706 						   RTE_FLOW_ERROR_TYPE_ACTION,
707 						   actions,
708 						   "queue id > number of"
709 						   " queues");
710 					return -rte_errno;
711 				}
712 			}
713 			parser->rss_conf = (struct rte_flow_action_rss){
714 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
715 				.level = 0,
716 				.types = rss->types,
717 				.key_len = rss_key_len,
718 				.queue_num = rss->queue_num,
719 				.key = memcpy(parser->rss_key, rss_key,
720 					      sizeof(*rss_key) * rss_key_len),
721 				.queue = memcpy(parser->queues, rss->queue,
722 						sizeof(*rss->queue) *
723 						rss->queue_num),
724 			};
725 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
726 			const struct rte_flow_action_mark *mark =
727 				(const struct rte_flow_action_mark *)
728 				actions->conf;
729 
730 			if (overlap & MARK)
731 				goto exit_action_overlap;
732 			overlap |= MARK;
733 			if (!mark) {
734 				rte_flow_error_set(error, EINVAL,
735 						   RTE_FLOW_ERROR_TYPE_ACTION,
736 						   actions,
737 						   "mark must be defined");
738 				return -rte_errno;
739 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
740 				rte_flow_error_set(error, ENOTSUP,
741 						   RTE_FLOW_ERROR_TYPE_ACTION,
742 						   actions,
743 						   "mark must be between 0"
744 						   " and 16777199");
745 				return -rte_errno;
746 			}
747 			parser->mark = 1;
748 			parser->mark_id = mark->id;
749 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
750 			if (overlap & MARK)
751 				goto exit_action_overlap;
752 			overlap |= MARK;
753 			parser->mark = 1;
754 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
755 			   priv->config.flow_counter_en) {
756 			if (overlap & COUNT)
757 				goto exit_action_overlap;
758 			overlap |= COUNT;
759 			parser->count = 1;
760 		} else {
761 			goto exit_action_not_supported;
762 		}
763 	}
764 	/* When fate is unknown, drop traffic. */
765 	if (!(overlap & FATE))
766 		parser->drop = 1;
767 	if (parser->drop && parser->mark)
768 		parser->mark = 0;
769 	if (!parser->rss_conf.queue_num && !parser->drop) {
770 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
771 				   NULL, "no valid action");
772 		return -rte_errno;
773 	}
774 	return 0;
775 exit_action_not_supported:
776 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
777 			   actions, "action not supported");
778 	return -rte_errno;
779 exit_action_overlap:
780 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
781 			   actions, "overlapping actions are not supported");
782 	return -rte_errno;
783 }
784 
785 /**
786  * Validate items.
787  *
788  * @param[in] items
789  *   Pattern specification (list terminated by the END pattern item).
790  * @param[out] error
791  *   Perform verbose error reporting if not NULL.
792  * @param[in, out] parser
793  *   Internal parser structure.
794  *
795  * @return
796  *   0 on success, a negative errno value otherwise and rte_errno is set.
797  */
798 static int
799 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
800 				 struct rte_flow_error *error,
801 				 struct mlx5_flow_parse *parser)
802 {
803 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
804 	unsigned int i;
805 	int ret = 0;
806 
807 	/* Initialise the offsets to start after verbs attribute. */
808 	for (i = 0; i != hash_rxq_init_n; ++i)
809 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
810 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
811 		const struct mlx5_flow_items *token = NULL;
812 		unsigned int n;
813 
814 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
815 			continue;
816 		for (i = 0;
817 		     cur_item->items &&
818 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
819 		     ++i) {
820 			if (cur_item->items[i] == items->type) {
821 				token = &mlx5_flow_items[items->type];
822 				break;
823 			}
824 		}
825 		if (!token) {
826 			ret = -ENOTSUP;
827 			goto exit_item_not_supported;
828 		}
829 		cur_item = token;
830 		ret = mlx5_flow_item_validate(items,
831 					      (const uint8_t *)cur_item->mask,
832 					      cur_item->mask_sz);
833 		if (ret)
834 			goto exit_item_not_supported;
835 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
836 			if (parser->inner) {
837 				rte_flow_error_set(error, ENOTSUP,
838 						   RTE_FLOW_ERROR_TYPE_ITEM,
839 						   items,
840 						   "cannot recognize multiple"
841 						   " VXLAN encapsulations");
842 				return -rte_errno;
843 			}
844 			parser->inner = IBV_FLOW_SPEC_INNER;
845 		}
846 		if (parser->drop) {
847 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
848 		} else {
849 			for (n = 0; n != hash_rxq_init_n; ++n)
850 				parser->queue[n].offset += cur_item->dst_sz;
851 		}
852 	}
853 	if (parser->drop) {
854 		parser->queue[HASH_RXQ_ETH].offset +=
855 			sizeof(struct ibv_flow_spec_action_drop);
856 	}
857 	if (parser->mark) {
858 		for (i = 0; i != hash_rxq_init_n; ++i)
859 			parser->queue[i].offset +=
860 				sizeof(struct ibv_flow_spec_action_tag);
861 	}
862 	if (parser->count) {
863 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
864 
865 		for (i = 0; i != hash_rxq_init_n; ++i)
866 			parser->queue[i].offset += size;
867 	}
868 	return 0;
869 exit_item_not_supported:
870 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
871 				  items, "item not supported");
872 }
873 
874 /**
875  * Allocate memory space to store verbs flow attributes.
876  *
877  * @param[in] size
878  *   Amount of byte to allocate.
879  * @param[out] error
880  *   Perform verbose error reporting if not NULL.
881  *
882  * @return
883  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
884  */
885 static struct ibv_flow_attr *
886 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
887 {
888 	struct ibv_flow_attr *ibv_attr;
889 
890 	ibv_attr = rte_calloc(__func__, 1, size, 0);
891 	if (!ibv_attr) {
892 		rte_flow_error_set(error, ENOMEM,
893 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
894 				   NULL,
895 				   "cannot allocate verbs spec attributes");
896 		return NULL;
897 	}
898 	return ibv_attr;
899 }
900 
901 /**
902  * Make inner packet matching with an higher priority from the non Inner
903  * matching.
904  *
905  * @param[in, out] parser
906  *   Internal parser structure.
907  * @param attr
908  *   User flow attribute.
909  */
910 static void
911 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
912 			  const struct rte_flow_attr *attr)
913 {
914 	unsigned int i;
915 
916 	if (parser->drop) {
917 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
918 			attr->priority +
919 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
920 		return;
921 	}
922 	for (i = 0; i != hash_rxq_init_n; ++i) {
923 		if (parser->queue[i].ibv_attr) {
924 			parser->queue[i].ibv_attr->priority =
925 				attr->priority +
926 				hash_rxq_init[i].flow_priority -
927 				(parser->inner ? 1 : 0);
928 		}
929 	}
930 }
931 
932 /**
933  * Finalise verbs flow attributes.
934  *
935  * @param[in, out] parser
936  *   Internal parser structure.
937  */
938 static void
939 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
940 {
941 	const unsigned int ipv4 =
942 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
943 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
944 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
945 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
946 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
947 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
948 	unsigned int i;
949 
950 	/* Remove any other flow not matching the pattern. */
951 	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
952 		for (i = 0; i != hash_rxq_init_n; ++i) {
953 			if (i == HASH_RXQ_ETH)
954 				continue;
955 			rte_free(parser->queue[i].ibv_attr);
956 			parser->queue[i].ibv_attr = NULL;
957 		}
958 		return;
959 	}
960 	if (parser->layer == HASH_RXQ_ETH) {
961 		goto fill;
962 	} else {
963 		/*
964 		 * This layer becomes useless as the pattern define under
965 		 * layers.
966 		 */
967 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
968 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
969 	}
970 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
971 	for (i = ohmin; i != (ohmax + 1); ++i) {
972 		if (!parser->queue[i].ibv_attr)
973 			continue;
974 		rte_free(parser->queue[i].ibv_attr);
975 		parser->queue[i].ibv_attr = NULL;
976 	}
977 	/* Remove impossible flow according to the RSS configuration. */
978 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
979 	    parser->rss_conf.types) {
980 		/* Remove any other flow. */
981 		for (i = hmin; i != (hmax + 1); ++i) {
982 			if ((i == parser->layer) ||
983 			     (!parser->queue[i].ibv_attr))
984 				continue;
985 			rte_free(parser->queue[i].ibv_attr);
986 			parser->queue[i].ibv_attr = NULL;
987 		}
988 	} else  if (!parser->queue[ip].ibv_attr) {
989 		/* no RSS possible with the current configuration. */
990 		parser->rss_conf.queue_num = 1;
991 		return;
992 	}
993 fill:
994 	/*
995 	 * Fill missing layers in verbs specifications, or compute the correct
996 	 * offset to allocate the memory space for the attributes and
997 	 * specifications.
998 	 */
999 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1000 		union {
1001 			struct ibv_flow_spec_ipv4_ext ipv4;
1002 			struct ibv_flow_spec_ipv6 ipv6;
1003 			struct ibv_flow_spec_tcp_udp udp_tcp;
1004 		} specs;
1005 		void *dst;
1006 		uint16_t size;
1007 
1008 		if (i == parser->layer)
1009 			continue;
1010 		if (parser->layer == HASH_RXQ_ETH) {
1011 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1012 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
1013 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1014 					.type = IBV_FLOW_SPEC_IPV4_EXT,
1015 					.size = size,
1016 				};
1017 			} else {
1018 				size = sizeof(struct ibv_flow_spec_ipv6);
1019 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
1020 					.type = IBV_FLOW_SPEC_IPV6,
1021 					.size = size,
1022 				};
1023 			}
1024 			if (parser->queue[i].ibv_attr) {
1025 				dst = (void *)((uintptr_t)
1026 					       parser->queue[i].ibv_attr +
1027 					       parser->queue[i].offset);
1028 				memcpy(dst, &specs, size);
1029 				++parser->queue[i].ibv_attr->num_of_specs;
1030 			}
1031 			parser->queue[i].offset += size;
1032 		}
1033 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1034 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1035 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1036 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1037 				.type = ((i == HASH_RXQ_UDPV4 ||
1038 					  i == HASH_RXQ_UDPV6) ?
1039 					 IBV_FLOW_SPEC_UDP :
1040 					 IBV_FLOW_SPEC_TCP),
1041 				.size = size,
1042 			};
1043 			if (parser->queue[i].ibv_attr) {
1044 				dst = (void *)((uintptr_t)
1045 					       parser->queue[i].ibv_attr +
1046 					       parser->queue[i].offset);
1047 				memcpy(dst, &specs, size);
1048 				++parser->queue[i].ibv_attr->num_of_specs;
1049 			}
1050 			parser->queue[i].offset += size;
1051 		}
1052 	}
1053 }
1054 
1055 /**
1056  * Validate and convert a flow supported by the NIC.
1057  *
1058  * @param dev
1059  *   Pointer to Ethernet device.
1060  * @param[in] attr
1061  *   Flow rule attributes.
1062  * @param[in] pattern
1063  *   Pattern specification (list terminated by the END pattern item).
1064  * @param[in] actions
1065  *   Associated actions (list terminated by the END action).
1066  * @param[out] error
1067  *   Perform verbose error reporting if not NULL.
1068  * @param[in, out] parser
1069  *   Internal parser structure.
1070  *
1071  * @return
1072  *   0 on success, a negative errno value otherwise and rte_errno is set.
1073  */
1074 static int
1075 mlx5_flow_convert(struct rte_eth_dev *dev,
1076 		  const struct rte_flow_attr *attr,
1077 		  const struct rte_flow_item items[],
1078 		  const struct rte_flow_action actions[],
1079 		  struct rte_flow_error *error,
1080 		  struct mlx5_flow_parse *parser)
1081 {
1082 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1083 	unsigned int i;
1084 	int ret;
1085 
1086 	/* First step. Validate the attributes, items and actions. */
1087 	*parser = (struct mlx5_flow_parse){
1088 		.create = parser->create,
1089 		.layer = HASH_RXQ_ETH,
1090 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1091 	};
1092 	ret = mlx5_flow_convert_attributes(attr, error);
1093 	if (ret)
1094 		return ret;
1095 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1096 	if (ret)
1097 		return ret;
1098 	ret = mlx5_flow_convert_items_validate(items, error, parser);
1099 	if (ret)
1100 		return ret;
1101 	mlx5_flow_convert_finalise(parser);
1102 	/*
1103 	 * Second step.
1104 	 * Allocate the memory space to store verbs specifications.
1105 	 */
1106 	if (parser->drop) {
1107 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1108 
1109 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1110 			mlx5_flow_convert_allocate(offset, error);
1111 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1112 			goto exit_enomem;
1113 		parser->queue[HASH_RXQ_ETH].offset =
1114 			sizeof(struct ibv_flow_attr);
1115 	} else {
1116 		for (i = 0; i != hash_rxq_init_n; ++i) {
1117 			unsigned int offset;
1118 
1119 			if (!(parser->rss_conf.types &
1120 			      hash_rxq_init[i].dpdk_rss_hf) &&
1121 			    (i != HASH_RXQ_ETH))
1122 				continue;
1123 			offset = parser->queue[i].offset;
1124 			parser->queue[i].ibv_attr =
1125 				mlx5_flow_convert_allocate(offset, error);
1126 			if (!parser->queue[i].ibv_attr)
1127 				goto exit_enomem;
1128 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1129 		}
1130 	}
1131 	/* Third step. Conversion parse, fill the specifications. */
1132 	parser->inner = 0;
1133 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1134 		struct mlx5_flow_data data = {
1135 			.parser = parser,
1136 			.error = error,
1137 		};
1138 
1139 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1140 			continue;
1141 		cur_item = &mlx5_flow_items[items->type];
1142 		ret = cur_item->convert(items,
1143 					(cur_item->default_mask ?
1144 					 cur_item->default_mask :
1145 					 cur_item->mask),
1146 					 &data);
1147 		if (ret)
1148 			goto exit_free;
1149 	}
1150 	if (parser->mark)
1151 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1152 	if (parser->count && parser->create) {
1153 		mlx5_flow_create_count(dev, parser);
1154 		if (!parser->cs)
1155 			goto exit_count_error;
1156 	}
1157 	/*
1158 	 * Last step. Complete missing specification to reach the RSS
1159 	 * configuration.
1160 	 */
1161 	if (!parser->drop)
1162 		mlx5_flow_convert_finalise(parser);
1163 	mlx5_flow_update_priority(parser, attr);
1164 exit_free:
1165 	/* Only verification is expected, all resources should be released. */
1166 	if (!parser->create) {
1167 		for (i = 0; i != hash_rxq_init_n; ++i) {
1168 			if (parser->queue[i].ibv_attr) {
1169 				rte_free(parser->queue[i].ibv_attr);
1170 				parser->queue[i].ibv_attr = NULL;
1171 			}
1172 		}
1173 	}
1174 	return ret;
1175 exit_enomem:
1176 	for (i = 0; i != hash_rxq_init_n; ++i) {
1177 		if (parser->queue[i].ibv_attr) {
1178 			rte_free(parser->queue[i].ibv_attr);
1179 			parser->queue[i].ibv_attr = NULL;
1180 		}
1181 	}
1182 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1183 			   NULL, "cannot allocate verbs spec attributes");
1184 	return -rte_errno;
1185 exit_count_error:
1186 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1187 			   NULL, "cannot create counter");
1188 	return -rte_errno;
1189 }
1190 
1191 /**
1192  * Copy the specification created into the flow.
1193  *
1194  * @param parser
1195  *   Internal parser structure.
1196  * @param src
1197  *   Create specification.
1198  * @param size
1199  *   Size in bytes of the specification to copy.
1200  */
1201 static void
1202 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1203 		      unsigned int size)
1204 {
1205 	unsigned int i;
1206 	void *dst;
1207 
1208 	for (i = 0; i != hash_rxq_init_n; ++i) {
1209 		if (!parser->queue[i].ibv_attr)
1210 			continue;
1211 		/* Specification must be the same l3 type or none. */
1212 		if (parser->layer == HASH_RXQ_ETH ||
1213 		    (hash_rxq_init[parser->layer].ip_version ==
1214 		     hash_rxq_init[i].ip_version) ||
1215 		    (hash_rxq_init[i].ip_version == 0)) {
1216 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1217 					parser->queue[i].offset);
1218 			memcpy(dst, src, size);
1219 			++parser->queue[i].ibv_attr->num_of_specs;
1220 			parser->queue[i].offset += size;
1221 		}
1222 	}
1223 }
1224 
1225 /**
1226  * Convert Ethernet item to Verbs specification.
1227  *
1228  * @param item[in]
1229  *   Item specification.
1230  * @param default_mask[in]
1231  *   Default bit-masks to use when item->mask is not provided.
1232  * @param data[in, out]
1233  *   User structure.
1234  *
1235  * @return
1236  *   0 on success, a negative errno value otherwise and rte_errno is set.
1237  */
1238 static int
1239 mlx5_flow_create_eth(const struct rte_flow_item *item,
1240 		     const void *default_mask,
1241 		     struct mlx5_flow_data *data)
1242 {
1243 	const struct rte_flow_item_eth *spec = item->spec;
1244 	const struct rte_flow_item_eth *mask = item->mask;
1245 	struct mlx5_flow_parse *parser = data->parser;
1246 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1247 	struct ibv_flow_spec_eth eth = {
1248 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1249 		.size = eth_size,
1250 	};
1251 
1252 	/* Don't update layer for the inner pattern. */
1253 	if (!parser->inner)
1254 		parser->layer = HASH_RXQ_ETH;
1255 	if (spec) {
1256 		unsigned int i;
1257 
1258 		if (!mask)
1259 			mask = default_mask;
1260 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1261 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1262 		eth.val.ether_type = spec->type;
1263 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1264 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1265 		eth.mask.ether_type = mask->type;
1266 		/* Remove unwanted bits from values. */
1267 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1268 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1269 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1270 		}
1271 		eth.val.ether_type &= eth.mask.ether_type;
1272 	}
1273 	mlx5_flow_create_copy(parser, &eth, eth_size);
1274 	return 0;
1275 }
1276 
1277 /**
1278  * Convert VLAN item to Verbs specification.
1279  *
1280  * @param item[in]
1281  *   Item specification.
1282  * @param default_mask[in]
1283  *   Default bit-masks to use when item->mask is not provided.
1284  * @param data[in, out]
1285  *   User structure.
1286  *
1287  * @return
1288  *   0 on success, a negative errno value otherwise and rte_errno is set.
1289  */
1290 static int
1291 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1292 		      const void *default_mask,
1293 		      struct mlx5_flow_data *data)
1294 {
1295 	const struct rte_flow_item_vlan *spec = item->spec;
1296 	const struct rte_flow_item_vlan *mask = item->mask;
1297 	struct mlx5_flow_parse *parser = data->parser;
1298 	struct ibv_flow_spec_eth *eth;
1299 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1300 	const char *msg = "VLAN cannot be empty";
1301 
1302 	if (spec) {
1303 		unsigned int i;
1304 		if (!mask)
1305 			mask = default_mask;
1306 
1307 		for (i = 0; i != hash_rxq_init_n; ++i) {
1308 			if (!parser->queue[i].ibv_attr)
1309 				continue;
1310 
1311 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1312 				       parser->queue[i].offset - eth_size);
1313 			eth->val.vlan_tag = spec->tci;
1314 			eth->mask.vlan_tag = mask->tci;
1315 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1316 			/*
1317 			 * From verbs perspective an empty VLAN is equivalent
1318 			 * to a packet without VLAN layer.
1319 			 */
1320 			if (!eth->mask.vlan_tag)
1321 				goto error;
1322 			/* Outer TPID cannot be matched. */
1323 			if (eth->mask.ether_type) {
1324 				msg = "VLAN TPID matching is not supported";
1325 				goto error;
1326 			}
1327 			eth->val.ether_type = spec->inner_type;
1328 			eth->mask.ether_type = mask->inner_type;
1329 			eth->val.ether_type &= eth->mask.ether_type;
1330 		}
1331 		return 0;
1332 	}
1333 error:
1334 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1335 				  item, msg);
1336 }
1337 
1338 /**
1339  * Convert IPv4 item to Verbs specification.
1340  *
1341  * @param item[in]
1342  *   Item specification.
1343  * @param default_mask[in]
1344  *   Default bit-masks to use when item->mask is not provided.
1345  * @param data[in, out]
1346  *   User structure.
1347  *
1348  * @return
1349  *   0 on success, a negative errno value otherwise and rte_errno is set.
1350  */
1351 static int
1352 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1353 		      const void *default_mask,
1354 		      struct mlx5_flow_data *data)
1355 {
1356 	const struct rte_flow_item_ipv4 *spec = item->spec;
1357 	const struct rte_flow_item_ipv4 *mask = item->mask;
1358 	struct mlx5_flow_parse *parser = data->parser;
1359 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1360 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1361 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1362 		.size = ipv4_size,
1363 	};
1364 
1365 	/* Don't update layer for the inner pattern. */
1366 	if (!parser->inner)
1367 		parser->layer = HASH_RXQ_IPV4;
1368 	if (spec) {
1369 		if (!mask)
1370 			mask = default_mask;
1371 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1372 			.src_ip = spec->hdr.src_addr,
1373 			.dst_ip = spec->hdr.dst_addr,
1374 			.proto = spec->hdr.next_proto_id,
1375 			.tos = spec->hdr.type_of_service,
1376 		};
1377 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1378 			.src_ip = mask->hdr.src_addr,
1379 			.dst_ip = mask->hdr.dst_addr,
1380 			.proto = mask->hdr.next_proto_id,
1381 			.tos = mask->hdr.type_of_service,
1382 		};
1383 		/* Remove unwanted bits from values. */
1384 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1385 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1386 		ipv4.val.proto &= ipv4.mask.proto;
1387 		ipv4.val.tos &= ipv4.mask.tos;
1388 	}
1389 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1390 	return 0;
1391 }
1392 
1393 /**
1394  * Convert IPv6 item to Verbs specification.
1395  *
1396  * @param item[in]
1397  *   Item specification.
1398  * @param default_mask[in]
1399  *   Default bit-masks to use when item->mask is not provided.
1400  * @param data[in, out]
1401  *   User structure.
1402  *
1403  * @return
1404  *   0 on success, a negative errno value otherwise and rte_errno is set.
1405  */
1406 static int
1407 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1408 		      const void *default_mask,
1409 		      struct mlx5_flow_data *data)
1410 {
1411 	const struct rte_flow_item_ipv6 *spec = item->spec;
1412 	const struct rte_flow_item_ipv6 *mask = item->mask;
1413 	struct mlx5_flow_parse *parser = data->parser;
1414 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1415 	struct ibv_flow_spec_ipv6 ipv6 = {
1416 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1417 		.size = ipv6_size,
1418 	};
1419 
1420 	/* Don't update layer for the inner pattern. */
1421 	if (!parser->inner)
1422 		parser->layer = HASH_RXQ_IPV6;
1423 	if (spec) {
1424 		unsigned int i;
1425 		uint32_t vtc_flow_val;
1426 		uint32_t vtc_flow_mask;
1427 
1428 		if (!mask)
1429 			mask = default_mask;
1430 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1431 		       RTE_DIM(ipv6.val.src_ip));
1432 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1433 		       RTE_DIM(ipv6.val.dst_ip));
1434 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1435 		       RTE_DIM(ipv6.mask.src_ip));
1436 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1437 		       RTE_DIM(ipv6.mask.dst_ip));
1438 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1439 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1440 		ipv6.val.flow_label =
1441 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1442 					 IPV6_HDR_FL_SHIFT);
1443 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1444 					 IPV6_HDR_TC_SHIFT;
1445 		ipv6.val.next_hdr = spec->hdr.proto;
1446 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1447 		ipv6.mask.flow_label =
1448 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1449 					 IPV6_HDR_FL_SHIFT);
1450 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1451 					  IPV6_HDR_TC_SHIFT;
1452 		ipv6.mask.next_hdr = mask->hdr.proto;
1453 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1454 		/* Remove unwanted bits from values. */
1455 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1456 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1457 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1458 		}
1459 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1460 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1461 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1462 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1463 	}
1464 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1465 	return 0;
1466 }
1467 
1468 /**
1469  * Convert UDP item to Verbs specification.
1470  *
1471  * @param item[in]
1472  *   Item specification.
1473  * @param default_mask[in]
1474  *   Default bit-masks to use when item->mask is not provided.
1475  * @param data[in, out]
1476  *   User structure.
1477  *
1478  * @return
1479  *   0 on success, a negative errno value otherwise and rte_errno is set.
1480  */
1481 static int
1482 mlx5_flow_create_udp(const struct rte_flow_item *item,
1483 		     const void *default_mask,
1484 		     struct mlx5_flow_data *data)
1485 {
1486 	const struct rte_flow_item_udp *spec = item->spec;
1487 	const struct rte_flow_item_udp *mask = item->mask;
1488 	struct mlx5_flow_parse *parser = data->parser;
1489 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1490 	struct ibv_flow_spec_tcp_udp udp = {
1491 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1492 		.size = udp_size,
1493 	};
1494 
1495 	/* Don't update layer for the inner pattern. */
1496 	if (!parser->inner) {
1497 		if (parser->layer == HASH_RXQ_IPV4)
1498 			parser->layer = HASH_RXQ_UDPV4;
1499 		else
1500 			parser->layer = HASH_RXQ_UDPV6;
1501 	}
1502 	if (spec) {
1503 		if (!mask)
1504 			mask = default_mask;
1505 		udp.val.dst_port = spec->hdr.dst_port;
1506 		udp.val.src_port = spec->hdr.src_port;
1507 		udp.mask.dst_port = mask->hdr.dst_port;
1508 		udp.mask.src_port = mask->hdr.src_port;
1509 		/* Remove unwanted bits from values. */
1510 		udp.val.src_port &= udp.mask.src_port;
1511 		udp.val.dst_port &= udp.mask.dst_port;
1512 	}
1513 	mlx5_flow_create_copy(parser, &udp, udp_size);
1514 	return 0;
1515 }
1516 
1517 /**
1518  * Convert TCP item to Verbs specification.
1519  *
1520  * @param item[in]
1521  *   Item specification.
1522  * @param default_mask[in]
1523  *   Default bit-masks to use when item->mask is not provided.
1524  * @param data[in, out]
1525  *   User structure.
1526  *
1527  * @return
1528  *   0 on success, a negative errno value otherwise and rte_errno is set.
1529  */
1530 static int
1531 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1532 		     const void *default_mask,
1533 		     struct mlx5_flow_data *data)
1534 {
1535 	const struct rte_flow_item_tcp *spec = item->spec;
1536 	const struct rte_flow_item_tcp *mask = item->mask;
1537 	struct mlx5_flow_parse *parser = data->parser;
1538 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1539 	struct ibv_flow_spec_tcp_udp tcp = {
1540 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1541 		.size = tcp_size,
1542 	};
1543 
1544 	/* Don't update layer for the inner pattern. */
1545 	if (!parser->inner) {
1546 		if (parser->layer == HASH_RXQ_IPV4)
1547 			parser->layer = HASH_RXQ_TCPV4;
1548 		else
1549 			parser->layer = HASH_RXQ_TCPV6;
1550 	}
1551 	if (spec) {
1552 		if (!mask)
1553 			mask = default_mask;
1554 		tcp.val.dst_port = spec->hdr.dst_port;
1555 		tcp.val.src_port = spec->hdr.src_port;
1556 		tcp.mask.dst_port = mask->hdr.dst_port;
1557 		tcp.mask.src_port = mask->hdr.src_port;
1558 		/* Remove unwanted bits from values. */
1559 		tcp.val.src_port &= tcp.mask.src_port;
1560 		tcp.val.dst_port &= tcp.mask.dst_port;
1561 	}
1562 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1563 	return 0;
1564 }
1565 
1566 /**
1567  * Convert VXLAN item to Verbs specification.
1568  *
1569  * @param item[in]
1570  *   Item specification.
1571  * @param default_mask[in]
1572  *   Default bit-masks to use when item->mask is not provided.
1573  * @param data[in, out]
1574  *   User structure.
1575  *
1576  * @return
1577  *   0 on success, a negative errno value otherwise and rte_errno is set.
1578  */
1579 static int
1580 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1581 		       const void *default_mask,
1582 		       struct mlx5_flow_data *data)
1583 {
1584 	const struct rte_flow_item_vxlan *spec = item->spec;
1585 	const struct rte_flow_item_vxlan *mask = item->mask;
1586 	struct mlx5_flow_parse *parser = data->parser;
1587 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1588 	struct ibv_flow_spec_tunnel vxlan = {
1589 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1590 		.size = size,
1591 	};
1592 	union vni {
1593 		uint32_t vlan_id;
1594 		uint8_t vni[4];
1595 	} id;
1596 
1597 	id.vni[0] = 0;
1598 	parser->inner = IBV_FLOW_SPEC_INNER;
1599 	if (spec) {
1600 		if (!mask)
1601 			mask = default_mask;
1602 		memcpy(&id.vni[1], spec->vni, 3);
1603 		vxlan.val.tunnel_id = id.vlan_id;
1604 		memcpy(&id.vni[1], mask->vni, 3);
1605 		vxlan.mask.tunnel_id = id.vlan_id;
1606 		/* Remove unwanted bits from values. */
1607 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1608 	}
1609 	/*
1610 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1611 	 * layer is defined in the Verbs specification it is interpreted as
1612 	 * wildcard and all packets will match this rule, if it follows a full
1613 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1614 	 * before will also match this rule.
1615 	 * To avoid such situation, VNI 0 is currently refused.
1616 	 */
1617 	if (!vxlan.val.tunnel_id)
1618 		return rte_flow_error_set(data->error, EINVAL,
1619 					  RTE_FLOW_ERROR_TYPE_ITEM,
1620 					  item,
1621 					  "VxLAN vni cannot be 0");
1622 	mlx5_flow_create_copy(parser, &vxlan, size);
1623 	return 0;
1624 }
1625 
1626 /**
1627  * Convert mark/flag action to Verbs specification.
1628  *
1629  * @param parser
1630  *   Internal parser structure.
1631  * @param mark_id
1632  *   Mark identifier.
1633  *
1634  * @return
1635  *   0 on success, a negative errno value otherwise and rte_errno is set.
1636  */
1637 static int
1638 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1639 {
1640 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1641 	struct ibv_flow_spec_action_tag tag = {
1642 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1643 		.size = size,
1644 		.tag_id = mlx5_flow_mark_set(mark_id),
1645 	};
1646 
1647 	assert(parser->mark);
1648 	mlx5_flow_create_copy(parser, &tag, size);
1649 	return 0;
1650 }
1651 
1652 /**
1653  * Convert count action to Verbs specification.
1654  *
1655  * @param dev
1656  *   Pointer to Ethernet device.
1657  * @param parser
1658  *   Pointer to MLX5 flow parser structure.
1659  *
1660  * @return
1661  *   0 on success, a negative errno value otherwise and rte_errno is set.
1662  */
1663 static int
1664 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1665 		       struct mlx5_flow_parse *parser __rte_unused)
1666 {
1667 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1668 	struct priv *priv = dev->data->dev_private;
1669 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1670 	struct ibv_counter_set_init_attr init_attr = {0};
1671 	struct ibv_flow_spec_counter_action counter = {
1672 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1673 		.size = size,
1674 		.counter_set_handle = 0,
1675 	};
1676 
1677 	init_attr.counter_set_id = 0;
1678 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1679 	if (!parser->cs) {
1680 		rte_errno = EINVAL;
1681 		return -rte_errno;
1682 	}
1683 	counter.counter_set_handle = parser->cs->handle;
1684 	mlx5_flow_create_copy(parser, &counter, size);
1685 #endif
1686 	return 0;
1687 }
1688 
1689 /**
1690  * Complete flow rule creation with a drop queue.
1691  *
1692  * @param dev
1693  *   Pointer to Ethernet device.
1694  * @param parser
1695  *   Internal parser structure.
1696  * @param flow
1697  *   Pointer to the rte_flow.
1698  * @param[out] error
1699  *   Perform verbose error reporting if not NULL.
1700  *
1701  * @return
1702  *   0 on success, a negative errno value otherwise and rte_errno is set.
1703  */
1704 static int
1705 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1706 				   struct mlx5_flow_parse *parser,
1707 				   struct rte_flow *flow,
1708 				   struct rte_flow_error *error)
1709 {
1710 	struct priv *priv = dev->data->dev_private;
1711 	struct ibv_flow_spec_action_drop *drop;
1712 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1713 
1714 	assert(priv->pd);
1715 	assert(priv->ctx);
1716 	flow->drop = 1;
1717 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1718 			parser->queue[HASH_RXQ_ETH].offset);
1719 	*drop = (struct ibv_flow_spec_action_drop){
1720 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1721 			.size = size,
1722 	};
1723 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1724 	parser->queue[HASH_RXQ_ETH].offset += size;
1725 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1726 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1727 	if (parser->count)
1728 		flow->cs = parser->cs;
1729 	if (!priv->dev->data->dev_started)
1730 		return 0;
1731 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1732 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1733 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1734 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
1735 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1736 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1737 				   NULL, "flow rule creation failure");
1738 		goto error;
1739 	}
1740 	return 0;
1741 error:
1742 	assert(flow);
1743 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1744 		claim_zero(mlx5_glue->destroy_flow
1745 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1746 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1747 	}
1748 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1749 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1750 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1751 	}
1752 	if (flow->cs) {
1753 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1754 		flow->cs = NULL;
1755 		parser->cs = NULL;
1756 	}
1757 	return -rte_errno;
1758 }
1759 
1760 /**
1761  * Create hash Rx queues when RSS is enabled.
1762  *
1763  * @param dev
1764  *   Pointer to Ethernet device.
1765  * @param parser
1766  *   Internal parser structure.
1767  * @param flow
1768  *   Pointer to the rte_flow.
1769  * @param[out] error
1770  *   Perform verbose error reporting if not NULL.
1771  *
1772  * @return
1773  *   0 on success, a negative errno value otherwise and rte_errno is set.
1774  */
1775 static int
1776 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1777 				  struct mlx5_flow_parse *parser,
1778 				  struct rte_flow *flow,
1779 				  struct rte_flow_error *error)
1780 {
1781 	struct priv *priv = dev->data->dev_private;
1782 	unsigned int i;
1783 
1784 	for (i = 0; i != hash_rxq_init_n; ++i) {
1785 		uint64_t hash_fields;
1786 
1787 		if (!parser->queue[i].ibv_attr)
1788 			continue;
1789 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1790 		parser->queue[i].ibv_attr = NULL;
1791 		hash_fields = hash_rxq_init[i].hash_fields;
1792 		if (!priv->dev->data->dev_started)
1793 			continue;
1794 		flow->frxq[i].hrxq =
1795 			mlx5_hrxq_get(dev,
1796 				      parser->rss_conf.key,
1797 				      parser->rss_conf.key_len,
1798 				      hash_fields,
1799 				      parser->rss_conf.queue,
1800 				      parser->rss_conf.queue_num);
1801 		if (flow->frxq[i].hrxq)
1802 			continue;
1803 		flow->frxq[i].hrxq =
1804 			mlx5_hrxq_new(dev,
1805 				      parser->rss_conf.key,
1806 				      parser->rss_conf.key_len,
1807 				      hash_fields,
1808 				      parser->rss_conf.queue,
1809 				      parser->rss_conf.queue_num);
1810 		if (!flow->frxq[i].hrxq) {
1811 			return rte_flow_error_set(error, ENOMEM,
1812 						  RTE_FLOW_ERROR_TYPE_HANDLE,
1813 						  NULL,
1814 						  "cannot create hash rxq");
1815 		}
1816 	}
1817 	return 0;
1818 }
1819 
1820 /**
1821  * Complete flow rule creation.
1822  *
1823  * @param dev
1824  *   Pointer to Ethernet device.
1825  * @param parser
1826  *   Internal parser structure.
1827  * @param flow
1828  *   Pointer to the rte_flow.
1829  * @param[out] error
1830  *   Perform verbose error reporting if not NULL.
1831  *
1832  * @return
1833  *   0 on success, a negative errno value otherwise and rte_errno is set.
1834  */
1835 static int
1836 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1837 			      struct mlx5_flow_parse *parser,
1838 			      struct rte_flow *flow,
1839 			      struct rte_flow_error *error)
1840 {
1841 	struct priv *priv = dev->data->dev_private;
1842 	int ret;
1843 	unsigned int i;
1844 	unsigned int flows_n = 0;
1845 
1846 	assert(priv->pd);
1847 	assert(priv->ctx);
1848 	assert(!parser->drop);
1849 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1850 	if (ret)
1851 		goto error;
1852 	if (parser->count)
1853 		flow->cs = parser->cs;
1854 	if (!priv->dev->data->dev_started)
1855 		return 0;
1856 	for (i = 0; i != hash_rxq_init_n; ++i) {
1857 		if (!flow->frxq[i].hrxq)
1858 			continue;
1859 		flow->frxq[i].ibv_flow =
1860 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1861 					       flow->frxq[i].ibv_attr);
1862 		if (!flow->frxq[i].ibv_flow) {
1863 			rte_flow_error_set(error, ENOMEM,
1864 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1865 					   NULL, "flow rule creation failure");
1866 			goto error;
1867 		}
1868 		++flows_n;
1869 		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1870 			dev->data->port_id,
1871 			(void *)flow, i,
1872 			(void *)flow->frxq[i].hrxq,
1873 			(void *)flow->frxq[i].ibv_flow);
1874 	}
1875 	if (!flows_n) {
1876 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1877 				   NULL, "internal error in flow creation");
1878 		goto error;
1879 	}
1880 	for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1881 		struct mlx5_rxq_data *q =
1882 			(*priv->rxqs)[parser->rss_conf.queue[i]];
1883 
1884 		q->mark |= parser->mark;
1885 	}
1886 	return 0;
1887 error:
1888 	ret = rte_errno; /* Save rte_errno before cleanup. */
1889 	assert(flow);
1890 	for (i = 0; i != hash_rxq_init_n; ++i) {
1891 		if (flow->frxq[i].ibv_flow) {
1892 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1893 
1894 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1895 		}
1896 		if (flow->frxq[i].hrxq)
1897 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1898 		if (flow->frxq[i].ibv_attr)
1899 			rte_free(flow->frxq[i].ibv_attr);
1900 	}
1901 	if (flow->cs) {
1902 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1903 		flow->cs = NULL;
1904 		parser->cs = NULL;
1905 	}
1906 	rte_errno = ret; /* Restore rte_errno. */
1907 	return -rte_errno;
1908 }
1909 
1910 /**
1911  * Convert a flow.
1912  *
1913  * @param dev
1914  *   Pointer to Ethernet device.
1915  * @param list
1916  *   Pointer to a TAILQ flow list.
1917  * @param[in] attr
1918  *   Flow rule attributes.
1919  * @param[in] pattern
1920  *   Pattern specification (list terminated by the END pattern item).
1921  * @param[in] actions
1922  *   Associated actions (list terminated by the END action).
1923  * @param[out] error
1924  *   Perform verbose error reporting if not NULL.
1925  *
1926  * @return
1927  *   A flow on success, NULL otherwise and rte_errno is set.
1928  */
1929 static struct rte_flow *
1930 mlx5_flow_list_create(struct rte_eth_dev *dev,
1931 		      struct mlx5_flows *list,
1932 		      const struct rte_flow_attr *attr,
1933 		      const struct rte_flow_item items[],
1934 		      const struct rte_flow_action actions[],
1935 		      struct rte_flow_error *error)
1936 {
1937 	struct mlx5_flow_parse parser = { .create = 1, };
1938 	struct rte_flow *flow = NULL;
1939 	unsigned int i;
1940 	int ret;
1941 
1942 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1943 	if (ret)
1944 		goto exit;
1945 	flow = rte_calloc(__func__, 1,
1946 			  sizeof(*flow) +
1947 			  parser.rss_conf.queue_num * sizeof(uint16_t),
1948 			  0);
1949 	if (!flow) {
1950 		rte_flow_error_set(error, ENOMEM,
1951 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1952 				   NULL,
1953 				   "cannot allocate flow memory");
1954 		return NULL;
1955 	}
1956 	/* Copy configuration. */
1957 	flow->queues = (uint16_t (*)[])(flow + 1);
1958 	flow->rss_conf = (struct rte_flow_action_rss){
1959 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1960 		.level = 0,
1961 		.types = parser.rss_conf.types,
1962 		.key_len = parser.rss_conf.key_len,
1963 		.queue_num = parser.rss_conf.queue_num,
1964 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
1965 			      sizeof(*parser.rss_conf.key) *
1966 			      parser.rss_conf.key_len),
1967 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
1968 				sizeof(*parser.rss_conf.queue) *
1969 				parser.rss_conf.queue_num),
1970 	};
1971 	flow->mark = parser.mark;
1972 	/* finalise the flow. */
1973 	if (parser.drop)
1974 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1975 							 error);
1976 	else
1977 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1978 	if (ret)
1979 		goto exit;
1980 	TAILQ_INSERT_TAIL(list, flow, next);
1981 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1982 		(void *)flow);
1983 	return flow;
1984 exit:
1985 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1986 		error->message);
1987 	for (i = 0; i != hash_rxq_init_n; ++i) {
1988 		if (parser.queue[i].ibv_attr)
1989 			rte_free(parser.queue[i].ibv_attr);
1990 	}
1991 	rte_free(flow);
1992 	return NULL;
1993 }
1994 
1995 /**
1996  * Validate a flow supported by the NIC.
1997  *
1998  * @see rte_flow_validate()
1999  * @see rte_flow_ops
2000  */
2001 int
2002 mlx5_flow_validate(struct rte_eth_dev *dev,
2003 		   const struct rte_flow_attr *attr,
2004 		   const struct rte_flow_item items[],
2005 		   const struct rte_flow_action actions[],
2006 		   struct rte_flow_error *error)
2007 {
2008 	struct mlx5_flow_parse parser = { .create = 0, };
2009 
2010 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2011 }
2012 
2013 /**
2014  * Create a flow.
2015  *
2016  * @see rte_flow_create()
2017  * @see rte_flow_ops
2018  */
2019 struct rte_flow *
2020 mlx5_flow_create(struct rte_eth_dev *dev,
2021 		 const struct rte_flow_attr *attr,
2022 		 const struct rte_flow_item items[],
2023 		 const struct rte_flow_action actions[],
2024 		 struct rte_flow_error *error)
2025 {
2026 	struct priv *priv = dev->data->dev_private;
2027 
2028 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2029 				     error);
2030 }
2031 
2032 /**
2033  * Destroy a flow in a list.
2034  *
2035  * @param dev
2036  *   Pointer to Ethernet device.
2037  * @param list
2038  *   Pointer to a TAILQ flow list.
2039  * @param[in] flow
2040  *   Flow to destroy.
2041  */
2042 static void
2043 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2044 		       struct rte_flow *flow)
2045 {
2046 	struct priv *priv = dev->data->dev_private;
2047 	unsigned int i;
2048 
2049 	if (flow->drop || !flow->mark)
2050 		goto free;
2051 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2052 		struct rte_flow *tmp;
2053 		int mark = 0;
2054 
2055 		/*
2056 		 * To remove the mark from the queue, the queue must not be
2057 		 * present in any other marked flow (RSS or not).
2058 		 */
2059 		TAILQ_FOREACH(tmp, list, next) {
2060 			unsigned int j;
2061 			uint16_t *tqs = NULL;
2062 			uint16_t tq_n = 0;
2063 
2064 			if (!tmp->mark)
2065 				continue;
2066 			for (j = 0; j != hash_rxq_init_n; ++j) {
2067 				if (!tmp->frxq[j].hrxq)
2068 					continue;
2069 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2070 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2071 			}
2072 			if (!tq_n)
2073 				continue;
2074 			for (j = 0; (j != tq_n) && !mark; j++)
2075 				if (tqs[j] == (*flow->queues)[i])
2076 					mark = 1;
2077 		}
2078 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2079 	}
2080 free:
2081 	if (flow->drop) {
2082 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2083 			claim_zero(mlx5_glue->destroy_flow
2084 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2085 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2086 	} else {
2087 		for (i = 0; i != hash_rxq_init_n; ++i) {
2088 			struct mlx5_flow *frxq = &flow->frxq[i];
2089 
2090 			if (frxq->ibv_flow)
2091 				claim_zero(mlx5_glue->destroy_flow
2092 					   (frxq->ibv_flow));
2093 			if (frxq->hrxq)
2094 				mlx5_hrxq_release(dev, frxq->hrxq);
2095 			if (frxq->ibv_attr)
2096 				rte_free(frxq->ibv_attr);
2097 		}
2098 	}
2099 	if (flow->cs) {
2100 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2101 		flow->cs = NULL;
2102 	}
2103 	TAILQ_REMOVE(list, flow, next);
2104 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2105 		(void *)flow);
2106 	rte_free(flow);
2107 }
2108 
2109 /**
2110  * Destroy all flows.
2111  *
2112  * @param dev
2113  *   Pointer to Ethernet device.
2114  * @param list
2115  *   Pointer to a TAILQ flow list.
2116  */
2117 void
2118 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2119 {
2120 	while (!TAILQ_EMPTY(list)) {
2121 		struct rte_flow *flow;
2122 
2123 		flow = TAILQ_FIRST(list);
2124 		mlx5_flow_list_destroy(dev, list, flow);
2125 	}
2126 }
2127 
2128 /**
2129  * Create drop queue.
2130  *
2131  * @param dev
2132  *   Pointer to Ethernet device.
2133  *
2134  * @return
2135  *   0 on success, a negative errno value otherwise and rte_errno is set.
2136  */
2137 int
2138 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2139 {
2140 	struct priv *priv = dev->data->dev_private;
2141 	struct mlx5_hrxq_drop *fdq = NULL;
2142 
2143 	assert(priv->pd);
2144 	assert(priv->ctx);
2145 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2146 	if (!fdq) {
2147 		DRV_LOG(WARNING,
2148 			"port %u cannot allocate memory for drop queue",
2149 			dev->data->port_id);
2150 		rte_errno = ENOMEM;
2151 		return -rte_errno;
2152 	}
2153 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2154 	if (!fdq->cq) {
2155 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2156 			dev->data->port_id);
2157 		rte_errno = errno;
2158 		goto error;
2159 	}
2160 	fdq->wq = mlx5_glue->create_wq
2161 		(priv->ctx,
2162 		 &(struct ibv_wq_init_attr){
2163 			.wq_type = IBV_WQT_RQ,
2164 			.max_wr = 1,
2165 			.max_sge = 1,
2166 			.pd = priv->pd,
2167 			.cq = fdq->cq,
2168 		 });
2169 	if (!fdq->wq) {
2170 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2171 			dev->data->port_id);
2172 		rte_errno = errno;
2173 		goto error;
2174 	}
2175 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2176 		(priv->ctx,
2177 		 &(struct ibv_rwq_ind_table_init_attr){
2178 			.log_ind_tbl_size = 0,
2179 			.ind_tbl = &fdq->wq,
2180 			.comp_mask = 0,
2181 		 });
2182 	if (!fdq->ind_table) {
2183 		DRV_LOG(WARNING,
2184 			"port %u cannot allocate indirection table for drop"
2185 			" queue",
2186 			dev->data->port_id);
2187 		rte_errno = errno;
2188 		goto error;
2189 	}
2190 	fdq->qp = mlx5_glue->create_qp_ex
2191 		(priv->ctx,
2192 		 &(struct ibv_qp_init_attr_ex){
2193 			.qp_type = IBV_QPT_RAW_PACKET,
2194 			.comp_mask =
2195 				IBV_QP_INIT_ATTR_PD |
2196 				IBV_QP_INIT_ATTR_IND_TABLE |
2197 				IBV_QP_INIT_ATTR_RX_HASH,
2198 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2199 				.rx_hash_function =
2200 					IBV_RX_HASH_FUNC_TOEPLITZ,
2201 				.rx_hash_key_len = rss_hash_default_key_len,
2202 				.rx_hash_key = rss_hash_default_key,
2203 				.rx_hash_fields_mask = 0,
2204 				},
2205 			.rwq_ind_tbl = fdq->ind_table,
2206 			.pd = priv->pd
2207 		 });
2208 	if (!fdq->qp) {
2209 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2210 			dev->data->port_id);
2211 		rte_errno = errno;
2212 		goto error;
2213 	}
2214 	priv->flow_drop_queue = fdq;
2215 	return 0;
2216 error:
2217 	if (fdq->qp)
2218 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2219 	if (fdq->ind_table)
2220 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2221 	if (fdq->wq)
2222 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2223 	if (fdq->cq)
2224 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2225 	if (fdq)
2226 		rte_free(fdq);
2227 	priv->flow_drop_queue = NULL;
2228 	return -rte_errno;
2229 }
2230 
2231 /**
2232  * Delete drop queue.
2233  *
2234  * @param dev
2235  *   Pointer to Ethernet device.
2236  */
2237 void
2238 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2239 {
2240 	struct priv *priv = dev->data->dev_private;
2241 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2242 
2243 	if (!fdq)
2244 		return;
2245 	if (fdq->qp)
2246 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2247 	if (fdq->ind_table)
2248 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2249 	if (fdq->wq)
2250 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2251 	if (fdq->cq)
2252 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2253 	rte_free(fdq);
2254 	priv->flow_drop_queue = NULL;
2255 }
2256 
2257 /**
2258  * Remove all flows.
2259  *
2260  * @param dev
2261  *   Pointer to Ethernet device.
2262  * @param list
2263  *   Pointer to a TAILQ flow list.
2264  */
2265 void
2266 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2267 {
2268 	struct priv *priv = dev->data->dev_private;
2269 	struct rte_flow *flow;
2270 
2271 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2272 		unsigned int i;
2273 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2274 
2275 		if (flow->drop) {
2276 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2277 				continue;
2278 			claim_zero(mlx5_glue->destroy_flow
2279 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2280 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2281 			DRV_LOG(DEBUG, "port %u flow %p removed",
2282 				dev->data->port_id, (void *)flow);
2283 			/* Next flow. */
2284 			continue;
2285 		}
2286 		/* Verify the flow has not already been cleaned. */
2287 		for (i = 0; i != hash_rxq_init_n; ++i) {
2288 			if (!flow->frxq[i].ibv_flow)
2289 				continue;
2290 			/*
2291 			 * Indirection table may be necessary to remove the
2292 			 * flags in the Rx queues.
2293 			 * This helps to speed-up the process by avoiding
2294 			 * another loop.
2295 			 */
2296 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2297 			break;
2298 		}
2299 		if (i == hash_rxq_init_n)
2300 			return;
2301 		if (flow->mark) {
2302 			assert(ind_tbl);
2303 			for (i = 0; i != ind_tbl->queues_n; ++i)
2304 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2305 		}
2306 		for (i = 0; i != hash_rxq_init_n; ++i) {
2307 			if (!flow->frxq[i].ibv_flow)
2308 				continue;
2309 			claim_zero(mlx5_glue->destroy_flow
2310 				   (flow->frxq[i].ibv_flow));
2311 			flow->frxq[i].ibv_flow = NULL;
2312 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2313 			flow->frxq[i].hrxq = NULL;
2314 		}
2315 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2316 			(void *)flow);
2317 	}
2318 }
2319 
2320 /**
2321  * Add all flows.
2322  *
2323  * @param dev
2324  *   Pointer to Ethernet device.
2325  * @param list
2326  *   Pointer to a TAILQ flow list.
2327  *
2328  * @return
2329  *   0 on success, a negative errno value otherwise and rte_errno is set.
2330  */
2331 int
2332 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2333 {
2334 	struct priv *priv = dev->data->dev_private;
2335 	struct rte_flow *flow;
2336 
2337 	TAILQ_FOREACH(flow, list, next) {
2338 		unsigned int i;
2339 
2340 		if (flow->drop) {
2341 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2342 				mlx5_glue->create_flow
2343 				(priv->flow_drop_queue->qp,
2344 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2345 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2346 				DRV_LOG(DEBUG,
2347 					"port %u flow %p cannot be applied",
2348 					dev->data->port_id, (void *)flow);
2349 				rte_errno = EINVAL;
2350 				return -rte_errno;
2351 			}
2352 			DRV_LOG(DEBUG, "port %u flow %p applied",
2353 				dev->data->port_id, (void *)flow);
2354 			/* Next flow. */
2355 			continue;
2356 		}
2357 		for (i = 0; i != hash_rxq_init_n; ++i) {
2358 			if (!flow->frxq[i].ibv_attr)
2359 				continue;
2360 			flow->frxq[i].hrxq =
2361 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2362 					      flow->rss_conf.key_len,
2363 					      hash_rxq_init[i].hash_fields,
2364 					      flow->rss_conf.queue,
2365 					      flow->rss_conf.queue_num);
2366 			if (flow->frxq[i].hrxq)
2367 				goto flow_create;
2368 			flow->frxq[i].hrxq =
2369 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2370 					      flow->rss_conf.key_len,
2371 					      hash_rxq_init[i].hash_fields,
2372 					      flow->rss_conf.queue,
2373 					      flow->rss_conf.queue_num);
2374 			if (!flow->frxq[i].hrxq) {
2375 				DRV_LOG(DEBUG,
2376 					"port %u flow %p cannot be applied",
2377 					dev->data->port_id, (void *)flow);
2378 				rte_errno = EINVAL;
2379 				return -rte_errno;
2380 			}
2381 flow_create:
2382 			flow->frxq[i].ibv_flow =
2383 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2384 						       flow->frxq[i].ibv_attr);
2385 			if (!flow->frxq[i].ibv_flow) {
2386 				DRV_LOG(DEBUG,
2387 					"port %u flow %p cannot be applied",
2388 					dev->data->port_id, (void *)flow);
2389 				rte_errno = EINVAL;
2390 				return -rte_errno;
2391 			}
2392 			DRV_LOG(DEBUG, "port %u flow %p applied",
2393 				dev->data->port_id, (void *)flow);
2394 		}
2395 		if (!flow->mark)
2396 			continue;
2397 		for (i = 0; i != flow->rss_conf.queue_num; ++i)
2398 			(*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2399 	}
2400 	return 0;
2401 }
2402 
2403 /**
2404  * Verify the flow list is empty
2405  *
2406  * @param dev
2407  *  Pointer to Ethernet device.
2408  *
2409  * @return the number of flows not released.
2410  */
2411 int
2412 mlx5_flow_verify(struct rte_eth_dev *dev)
2413 {
2414 	struct priv *priv = dev->data->dev_private;
2415 	struct rte_flow *flow;
2416 	int ret = 0;
2417 
2418 	TAILQ_FOREACH(flow, &priv->flows, next) {
2419 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2420 			dev->data->port_id, (void *)flow);
2421 		++ret;
2422 	}
2423 	return ret;
2424 }
2425 
2426 /**
2427  * Enable a control flow configured from the control plane.
2428  *
2429  * @param dev
2430  *   Pointer to Ethernet device.
2431  * @param eth_spec
2432  *   An Ethernet flow spec to apply.
2433  * @param eth_mask
2434  *   An Ethernet flow mask to apply.
2435  * @param vlan_spec
2436  *   A VLAN flow spec to apply.
2437  * @param vlan_mask
2438  *   A VLAN flow mask to apply.
2439  *
2440  * @return
2441  *   0 on success, a negative errno value otherwise and rte_errno is set.
2442  */
2443 int
2444 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2445 		    struct rte_flow_item_eth *eth_spec,
2446 		    struct rte_flow_item_eth *eth_mask,
2447 		    struct rte_flow_item_vlan *vlan_spec,
2448 		    struct rte_flow_item_vlan *vlan_mask)
2449 {
2450 	struct priv *priv = dev->data->dev_private;
2451 	const struct rte_flow_attr attr = {
2452 		.ingress = 1,
2453 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2454 	};
2455 	struct rte_flow_item items[] = {
2456 		{
2457 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2458 			.spec = eth_spec,
2459 			.last = NULL,
2460 			.mask = eth_mask,
2461 		},
2462 		{
2463 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2464 				RTE_FLOW_ITEM_TYPE_END,
2465 			.spec = vlan_spec,
2466 			.last = NULL,
2467 			.mask = vlan_mask,
2468 		},
2469 		{
2470 			.type = RTE_FLOW_ITEM_TYPE_END,
2471 		},
2472 	};
2473 	uint16_t queue[priv->reta_idx_n];
2474 	struct rte_flow_action_rss action_rss = {
2475 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2476 		.level = 0,
2477 		.types = priv->rss_conf.rss_hf,
2478 		.key_len = priv->rss_conf.rss_key_len,
2479 		.queue_num = priv->reta_idx_n,
2480 		.key = priv->rss_conf.rss_key,
2481 		.queue = queue,
2482 	};
2483 	struct rte_flow_action actions[] = {
2484 		{
2485 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2486 			.conf = &action_rss,
2487 		},
2488 		{
2489 			.type = RTE_FLOW_ACTION_TYPE_END,
2490 		},
2491 	};
2492 	struct rte_flow *flow;
2493 	struct rte_flow_error error;
2494 	unsigned int i;
2495 
2496 	if (!priv->reta_idx_n) {
2497 		rte_errno = EINVAL;
2498 		return -rte_errno;
2499 	}
2500 	for (i = 0; i != priv->reta_idx_n; ++i)
2501 		queue[i] = (*priv->reta_idx)[i];
2502 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2503 				     actions, &error);
2504 	if (!flow)
2505 		return -rte_errno;
2506 	return 0;
2507 }
2508 
2509 /**
2510  * Enable a flow control configured from the control plane.
2511  *
2512  * @param dev
2513  *   Pointer to Ethernet device.
2514  * @param eth_spec
2515  *   An Ethernet flow spec to apply.
2516  * @param eth_mask
2517  *   An Ethernet flow mask to apply.
2518  *
2519  * @return
2520  *   0 on success, a negative errno value otherwise and rte_errno is set.
2521  */
2522 int
2523 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2524 	       struct rte_flow_item_eth *eth_spec,
2525 	       struct rte_flow_item_eth *eth_mask)
2526 {
2527 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2528 }
2529 
2530 /**
2531  * Destroy a flow.
2532  *
2533  * @see rte_flow_destroy()
2534  * @see rte_flow_ops
2535  */
2536 int
2537 mlx5_flow_destroy(struct rte_eth_dev *dev,
2538 		  struct rte_flow *flow,
2539 		  struct rte_flow_error *error __rte_unused)
2540 {
2541 	struct priv *priv = dev->data->dev_private;
2542 
2543 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2544 	return 0;
2545 }
2546 
2547 /**
2548  * Destroy all flows.
2549  *
2550  * @see rte_flow_flush()
2551  * @see rte_flow_ops
2552  */
2553 int
2554 mlx5_flow_flush(struct rte_eth_dev *dev,
2555 		struct rte_flow_error *error __rte_unused)
2556 {
2557 	struct priv *priv = dev->data->dev_private;
2558 
2559 	mlx5_flow_list_flush(dev, &priv->flows);
2560 	return 0;
2561 }
2562 
2563 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2564 /**
2565  * Query flow counter.
2566  *
2567  * @param cs
2568  *   the counter set.
2569  * @param counter_value
2570  *   returned data from the counter.
2571  *
2572  * @return
2573  *   0 on success, a negative errno value otherwise and rte_errno is set.
2574  */
2575 static int
2576 mlx5_flow_query_count(struct ibv_counter_set *cs,
2577 		      struct mlx5_flow_counter_stats *counter_stats,
2578 		      struct rte_flow_query_count *query_count,
2579 		      struct rte_flow_error *error)
2580 {
2581 	uint64_t counters[2];
2582 	struct ibv_query_counter_set_attr query_cs_attr = {
2583 		.cs = cs,
2584 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2585 	};
2586 	struct ibv_counter_set_data query_out = {
2587 		.out = counters,
2588 		.outlen = 2 * sizeof(uint64_t),
2589 	};
2590 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2591 
2592 	if (err)
2593 		return rte_flow_error_set(error, err,
2594 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2595 					  NULL,
2596 					  "cannot read counter");
2597 	query_count->hits_set = 1;
2598 	query_count->bytes_set = 1;
2599 	query_count->hits = counters[0] - counter_stats->hits;
2600 	query_count->bytes = counters[1] - counter_stats->bytes;
2601 	if (query_count->reset) {
2602 		counter_stats->hits = counters[0];
2603 		counter_stats->bytes = counters[1];
2604 	}
2605 	return 0;
2606 }
2607 
2608 /**
2609  * Query a flows.
2610  *
2611  * @see rte_flow_query()
2612  * @see rte_flow_ops
2613  */
2614 int
2615 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2616 		struct rte_flow *flow,
2617 		enum rte_flow_action_type action __rte_unused,
2618 		void *data,
2619 		struct rte_flow_error *error)
2620 {
2621 	if (flow->cs) {
2622 		int ret;
2623 
2624 		ret = mlx5_flow_query_count(flow->cs,
2625 					    &flow->counter_stats,
2626 					    (struct rte_flow_query_count *)data,
2627 					    error);
2628 		if (ret)
2629 			return ret;
2630 	} else {
2631 		return rte_flow_error_set(error, EINVAL,
2632 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2633 					  NULL,
2634 					  "no counter found for flow");
2635 	}
2636 	return 0;
2637 }
2638 #endif
2639 
2640 /**
2641  * Isolated mode.
2642  *
2643  * @see rte_flow_isolate()
2644  * @see rte_flow_ops
2645  */
2646 int
2647 mlx5_flow_isolate(struct rte_eth_dev *dev,
2648 		  int enable,
2649 		  struct rte_flow_error *error)
2650 {
2651 	struct priv *priv = dev->data->dev_private;
2652 
2653 	if (dev->data->dev_started) {
2654 		rte_flow_error_set(error, EBUSY,
2655 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2656 				   NULL,
2657 				   "port must be stopped first");
2658 		return -rte_errno;
2659 	}
2660 	priv->isolated = !!enable;
2661 	if (enable)
2662 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2663 	else
2664 		priv->dev->dev_ops = &mlx5_dev_ops;
2665 	return 0;
2666 }
2667 
2668 /**
2669  * Convert a flow director filter to a generic flow.
2670  *
2671  * @param dev
2672  *   Pointer to Ethernet device.
2673  * @param fdir_filter
2674  *   Flow director filter to add.
2675  * @param attributes
2676  *   Generic flow parameters structure.
2677  *
2678  * @return
2679  *   0 on success, a negative errno value otherwise and rte_errno is set.
2680  */
2681 static int
2682 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2683 			 const struct rte_eth_fdir_filter *fdir_filter,
2684 			 struct mlx5_fdir *attributes)
2685 {
2686 	struct priv *priv = dev->data->dev_private;
2687 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2688 	const struct rte_eth_fdir_masks *mask =
2689 		&dev->data->dev_conf.fdir_conf.mask;
2690 
2691 	/* Validate queue number. */
2692 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2693 		DRV_LOG(ERR, "port %u invalid queue number %d",
2694 			dev->data->port_id, fdir_filter->action.rx_queue);
2695 		rte_errno = EINVAL;
2696 		return -rte_errno;
2697 	}
2698 	attributes->attr.ingress = 1;
2699 	attributes->items[0] = (struct rte_flow_item) {
2700 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2701 		.spec = &attributes->l2,
2702 		.mask = &attributes->l2_mask,
2703 	};
2704 	switch (fdir_filter->action.behavior) {
2705 	case RTE_ETH_FDIR_ACCEPT:
2706 		attributes->actions[0] = (struct rte_flow_action){
2707 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2708 			.conf = &attributes->queue,
2709 		};
2710 		break;
2711 	case RTE_ETH_FDIR_REJECT:
2712 		attributes->actions[0] = (struct rte_flow_action){
2713 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2714 		};
2715 		break;
2716 	default:
2717 		DRV_LOG(ERR, "port %u invalid behavior %d",
2718 			dev->data->port_id,
2719 			fdir_filter->action.behavior);
2720 		rte_errno = ENOTSUP;
2721 		return -rte_errno;
2722 	}
2723 	attributes->queue.index = fdir_filter->action.rx_queue;
2724 	/* Handle L3. */
2725 	switch (fdir_filter->input.flow_type) {
2726 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2727 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2728 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2729 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2730 			.src_addr = input->flow.ip4_flow.src_ip,
2731 			.dst_addr = input->flow.ip4_flow.dst_ip,
2732 			.time_to_live = input->flow.ip4_flow.ttl,
2733 			.type_of_service = input->flow.ip4_flow.tos,
2734 			.next_proto_id = input->flow.ip4_flow.proto,
2735 		};
2736 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2737 			.src_addr = mask->ipv4_mask.src_ip,
2738 			.dst_addr = mask->ipv4_mask.dst_ip,
2739 			.time_to_live = mask->ipv4_mask.ttl,
2740 			.type_of_service = mask->ipv4_mask.tos,
2741 			.next_proto_id = mask->ipv4_mask.proto,
2742 		};
2743 		attributes->items[1] = (struct rte_flow_item){
2744 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2745 			.spec = &attributes->l3,
2746 			.mask = &attributes->l3_mask,
2747 		};
2748 		break;
2749 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2750 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2751 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2752 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2753 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2754 			.proto = input->flow.ipv6_flow.proto,
2755 		};
2756 
2757 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2758 		       input->flow.ipv6_flow.src_ip,
2759 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2760 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2761 		       input->flow.ipv6_flow.dst_ip,
2762 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2763 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2764 		       mask->ipv6_mask.src_ip,
2765 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2766 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2767 		       mask->ipv6_mask.dst_ip,
2768 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2769 		attributes->items[1] = (struct rte_flow_item){
2770 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2771 			.spec = &attributes->l3,
2772 			.mask = &attributes->l3_mask,
2773 		};
2774 		break;
2775 	default:
2776 		DRV_LOG(ERR, "port %u invalid flow type%d",
2777 			dev->data->port_id, fdir_filter->input.flow_type);
2778 		rte_errno = ENOTSUP;
2779 		return -rte_errno;
2780 	}
2781 	/* Handle L4. */
2782 	switch (fdir_filter->input.flow_type) {
2783 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2784 		attributes->l4.udp.hdr = (struct udp_hdr){
2785 			.src_port = input->flow.udp4_flow.src_port,
2786 			.dst_port = input->flow.udp4_flow.dst_port,
2787 		};
2788 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2789 			.src_port = mask->src_port_mask,
2790 			.dst_port = mask->dst_port_mask,
2791 		};
2792 		attributes->items[2] = (struct rte_flow_item){
2793 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2794 			.spec = &attributes->l4,
2795 			.mask = &attributes->l4_mask,
2796 		};
2797 		break;
2798 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2799 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2800 			.src_port = input->flow.tcp4_flow.src_port,
2801 			.dst_port = input->flow.tcp4_flow.dst_port,
2802 		};
2803 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2804 			.src_port = mask->src_port_mask,
2805 			.dst_port = mask->dst_port_mask,
2806 		};
2807 		attributes->items[2] = (struct rte_flow_item){
2808 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2809 			.spec = &attributes->l4,
2810 			.mask = &attributes->l4_mask,
2811 		};
2812 		break;
2813 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2814 		attributes->l4.udp.hdr = (struct udp_hdr){
2815 			.src_port = input->flow.udp6_flow.src_port,
2816 			.dst_port = input->flow.udp6_flow.dst_port,
2817 		};
2818 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2819 			.src_port = mask->src_port_mask,
2820 			.dst_port = mask->dst_port_mask,
2821 		};
2822 		attributes->items[2] = (struct rte_flow_item){
2823 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2824 			.spec = &attributes->l4,
2825 			.mask = &attributes->l4_mask,
2826 		};
2827 		break;
2828 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2829 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2830 			.src_port = input->flow.tcp6_flow.src_port,
2831 			.dst_port = input->flow.tcp6_flow.dst_port,
2832 		};
2833 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2834 			.src_port = mask->src_port_mask,
2835 			.dst_port = mask->dst_port_mask,
2836 		};
2837 		attributes->items[2] = (struct rte_flow_item){
2838 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2839 			.spec = &attributes->l4,
2840 			.mask = &attributes->l4_mask,
2841 		};
2842 		break;
2843 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2844 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2845 		break;
2846 	default:
2847 		DRV_LOG(ERR, "port %u invalid flow type%d",
2848 			dev->data->port_id, fdir_filter->input.flow_type);
2849 		rte_errno = ENOTSUP;
2850 		return -rte_errno;
2851 	}
2852 	return 0;
2853 }
2854 
2855 /**
2856  * Add new flow director filter and store it in list.
2857  *
2858  * @param dev
2859  *   Pointer to Ethernet device.
2860  * @param fdir_filter
2861  *   Flow director filter to add.
2862  *
2863  * @return
2864  *   0 on success, a negative errno value otherwise and rte_errno is set.
2865  */
2866 static int
2867 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2868 		     const struct rte_eth_fdir_filter *fdir_filter)
2869 {
2870 	struct priv *priv = dev->data->dev_private;
2871 	struct mlx5_fdir attributes = {
2872 		.attr.group = 0,
2873 		.l2_mask = {
2874 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2875 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2876 			.type = 0,
2877 		},
2878 	};
2879 	struct mlx5_flow_parse parser = {
2880 		.layer = HASH_RXQ_ETH,
2881 	};
2882 	struct rte_flow_error error;
2883 	struct rte_flow *flow;
2884 	int ret;
2885 
2886 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2887 	if (ret)
2888 		return ret;
2889 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2890 				attributes.actions, &error, &parser);
2891 	if (ret)
2892 		return ret;
2893 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2894 				     attributes.items, attributes.actions,
2895 				     &error);
2896 	if (flow) {
2897 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2898 			(void *)flow);
2899 		return 0;
2900 	}
2901 	return -rte_errno;
2902 }
2903 
2904 /**
2905  * Delete specific filter.
2906  *
2907  * @param dev
2908  *   Pointer to Ethernet device.
2909  * @param fdir_filter
2910  *   Filter to be deleted.
2911  *
2912  * @return
2913  *   0 on success, a negative errno value otherwise and rte_errno is set.
2914  */
2915 static int
2916 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2917 			const struct rte_eth_fdir_filter *fdir_filter)
2918 {
2919 	struct priv *priv = dev->data->dev_private;
2920 	struct mlx5_fdir attributes = {
2921 		.attr.group = 0,
2922 	};
2923 	struct mlx5_flow_parse parser = {
2924 		.create = 1,
2925 		.layer = HASH_RXQ_ETH,
2926 	};
2927 	struct rte_flow_error error;
2928 	struct rte_flow *flow;
2929 	unsigned int i;
2930 	int ret;
2931 
2932 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2933 	if (ret)
2934 		return ret;
2935 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2936 				attributes.actions, &error, &parser);
2937 	if (ret)
2938 		goto exit;
2939 	/*
2940 	 * Special case for drop action which is only set in the
2941 	 * specifications when the flow is created.  In this situation the
2942 	 * drop specification is missing.
2943 	 */
2944 	if (parser.drop) {
2945 		struct ibv_flow_spec_action_drop *drop;
2946 
2947 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2948 				parser.queue[HASH_RXQ_ETH].offset);
2949 		*drop = (struct ibv_flow_spec_action_drop){
2950 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2951 			.size = sizeof(struct ibv_flow_spec_action_drop),
2952 		};
2953 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2954 	}
2955 	TAILQ_FOREACH(flow, &priv->flows, next) {
2956 		struct ibv_flow_attr *attr;
2957 		struct ibv_spec_header *attr_h;
2958 		void *spec;
2959 		struct ibv_flow_attr *flow_attr;
2960 		struct ibv_spec_header *flow_h;
2961 		void *flow_spec;
2962 		unsigned int specs_n;
2963 
2964 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2965 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2966 		/* Compare first the attributes. */
2967 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2968 			continue;
2969 		if (attr->num_of_specs == 0)
2970 			continue;
2971 		spec = (void *)((uintptr_t)attr +
2972 				sizeof(struct ibv_flow_attr));
2973 		flow_spec = (void *)((uintptr_t)flow_attr +
2974 				     sizeof(struct ibv_flow_attr));
2975 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2976 		for (i = 0; i != specs_n; ++i) {
2977 			attr_h = spec;
2978 			flow_h = flow_spec;
2979 			if (memcmp(spec, flow_spec,
2980 				   RTE_MIN(attr_h->size, flow_h->size)))
2981 				goto wrong_flow;
2982 			spec = (void *)((uintptr_t)spec + attr_h->size);
2983 			flow_spec = (void *)((uintptr_t)flow_spec +
2984 					     flow_h->size);
2985 		}
2986 		/* At this point, the flow match. */
2987 		break;
2988 wrong_flow:
2989 		/* The flow does not match. */
2990 		continue;
2991 	}
2992 	ret = rte_errno; /* Save rte_errno before cleanup. */
2993 	if (flow)
2994 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
2995 exit:
2996 	for (i = 0; i != hash_rxq_init_n; ++i) {
2997 		if (parser.queue[i].ibv_attr)
2998 			rte_free(parser.queue[i].ibv_attr);
2999 	}
3000 	rte_errno = ret; /* Restore rte_errno. */
3001 	return -rte_errno;
3002 }
3003 
3004 /**
3005  * Update queue for specific filter.
3006  *
3007  * @param dev
3008  *   Pointer to Ethernet device.
3009  * @param fdir_filter
3010  *   Filter to be updated.
3011  *
3012  * @return
3013  *   0 on success, a negative errno value otherwise and rte_errno is set.
3014  */
3015 static int
3016 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3017 			const struct rte_eth_fdir_filter *fdir_filter)
3018 {
3019 	int ret;
3020 
3021 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3022 	if (ret)
3023 		return ret;
3024 	return mlx5_fdir_filter_add(dev, fdir_filter);
3025 }
3026 
3027 /**
3028  * Flush all filters.
3029  *
3030  * @param dev
3031  *   Pointer to Ethernet device.
3032  */
3033 static void
3034 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3035 {
3036 	struct priv *priv = dev->data->dev_private;
3037 
3038 	mlx5_flow_list_flush(dev, &priv->flows);
3039 }
3040 
3041 /**
3042  * Get flow director information.
3043  *
3044  * @param dev
3045  *   Pointer to Ethernet device.
3046  * @param[out] fdir_info
3047  *   Resulting flow director information.
3048  */
3049 static void
3050 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3051 {
3052 	struct priv *priv = dev->data->dev_private;
3053 	struct rte_eth_fdir_masks *mask =
3054 		&priv->dev->data->dev_conf.fdir_conf.mask;
3055 
3056 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3057 	fdir_info->guarant_spc = 0;
3058 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3059 	fdir_info->max_flexpayload = 0;
3060 	fdir_info->flow_types_mask[0] = 0;
3061 	fdir_info->flex_payload_unit = 0;
3062 	fdir_info->max_flex_payload_segment_num = 0;
3063 	fdir_info->flex_payload_limit = 0;
3064 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3065 }
3066 
3067 /**
3068  * Deal with flow director operations.
3069  *
3070  * @param dev
3071  *   Pointer to Ethernet device.
3072  * @param filter_op
3073  *   Operation to perform.
3074  * @param arg
3075  *   Pointer to operation-specific structure.
3076  *
3077  * @return
3078  *   0 on success, a negative errno value otherwise and rte_errno is set.
3079  */
3080 static int
3081 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3082 		    void *arg)
3083 {
3084 	struct priv *priv = dev->data->dev_private;
3085 	enum rte_fdir_mode fdir_mode =
3086 		priv->dev->data->dev_conf.fdir_conf.mode;
3087 
3088 	if (filter_op == RTE_ETH_FILTER_NOP)
3089 		return 0;
3090 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3091 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3092 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3093 			dev->data->port_id, fdir_mode);
3094 		rte_errno = EINVAL;
3095 		return -rte_errno;
3096 	}
3097 	switch (filter_op) {
3098 	case RTE_ETH_FILTER_ADD:
3099 		return mlx5_fdir_filter_add(dev, arg);
3100 	case RTE_ETH_FILTER_UPDATE:
3101 		return mlx5_fdir_filter_update(dev, arg);
3102 	case RTE_ETH_FILTER_DELETE:
3103 		return mlx5_fdir_filter_delete(dev, arg);
3104 	case RTE_ETH_FILTER_FLUSH:
3105 		mlx5_fdir_filter_flush(dev);
3106 		break;
3107 	case RTE_ETH_FILTER_INFO:
3108 		mlx5_fdir_info_get(dev, arg);
3109 		break;
3110 	default:
3111 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3112 			dev->data->port_id, filter_op);
3113 		rte_errno = EINVAL;
3114 		return -rte_errno;
3115 	}
3116 	return 0;
3117 }
3118 
3119 /**
3120  * Manage filter operations.
3121  *
3122  * @param dev
3123  *   Pointer to Ethernet device structure.
3124  * @param filter_type
3125  *   Filter type.
3126  * @param filter_op
3127  *   Operation to perform.
3128  * @param arg
3129  *   Pointer to operation-specific structure.
3130  *
3131  * @return
3132  *   0 on success, a negative errno value otherwise and rte_errno is set.
3133  */
3134 int
3135 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3136 		     enum rte_filter_type filter_type,
3137 		     enum rte_filter_op filter_op,
3138 		     void *arg)
3139 {
3140 	switch (filter_type) {
3141 	case RTE_ETH_FILTER_GENERIC:
3142 		if (filter_op != RTE_ETH_FILTER_GET) {
3143 			rte_errno = EINVAL;
3144 			return -rte_errno;
3145 		}
3146 		*(const void **)arg = &mlx5_flow_ops;
3147 		return 0;
3148 	case RTE_ETH_FILTER_FDIR:
3149 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3150 	default:
3151 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3152 			dev->data->port_id, filter_type);
3153 		rte_errno = ENOTSUP;
3154 		return -rte_errno;
3155 	}
3156 	return 0;
3157 }
3158