xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision ac8d22de2394e03ba4a77d8fd24381147aafb1d3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ethdev_driver.h>
22 #include <rte_flow.h>
23 #include <rte_flow_driver.h>
24 #include <rte_malloc.h>
25 #include <rte_ip.h>
26 
27 #include "mlx5.h"
28 #include "mlx5_defs.h"
29 #include "mlx5_prm.h"
30 #include "mlx5_glue.h"
31 
32 /* Define minimal priority for control plane flows. */
33 #define MLX5_CTRL_FLOW_PRIORITY 4
34 
35 /* Internet Protocol versions. */
36 #define MLX5_IPV4 4
37 #define MLX5_IPV6 6
38 
39 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
40 struct ibv_flow_spec_counter_action {
41 	int dummy;
42 };
43 #endif
44 
45 /* Dev ops structure defined in mlx5.c */
46 extern const struct eth_dev_ops mlx5_dev_ops;
47 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
48 
49 /** Structure give to the conversion functions. */
50 struct mlx5_flow_data {
51 	struct mlx5_flow_parse *parser; /** Parser context. */
52 	struct rte_flow_error *error; /** Error context. */
53 };
54 
55 static int
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 		     const void *default_mask,
58 		     struct mlx5_flow_data *data);
59 
60 static int
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 		      const void *default_mask,
63 		      struct mlx5_flow_data *data);
64 
65 static int
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 		      const void *default_mask,
68 		      struct mlx5_flow_data *data);
69 
70 static int
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 		      const void *default_mask,
73 		      struct mlx5_flow_data *data);
74 
75 static int
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 		     const void *default_mask,
78 		     struct mlx5_flow_data *data);
79 
80 static int
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 		     const void *default_mask,
83 		     struct mlx5_flow_data *data);
84 
85 static int
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 		       const void *default_mask,
88 		       struct mlx5_flow_data *data);
89 
90 struct mlx5_flow_parse;
91 
92 static void
93 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
94 		      unsigned int size);
95 
96 static int
97 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
98 
99 static int
100 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
101 
102 /* Hash RX queue types. */
103 enum hash_rxq_type {
104 	HASH_RXQ_TCPV4,
105 	HASH_RXQ_UDPV4,
106 	HASH_RXQ_IPV4,
107 	HASH_RXQ_TCPV6,
108 	HASH_RXQ_UDPV6,
109 	HASH_RXQ_IPV6,
110 	HASH_RXQ_ETH,
111 };
112 
113 /* Initialization data for hash RX queue. */
114 struct hash_rxq_init {
115 	uint64_t hash_fields; /* Fields that participate in the hash. */
116 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
117 	unsigned int flow_priority; /* Flow priority to use. */
118 	unsigned int ip_version; /* Internet protocol. */
119 };
120 
121 /* Initialization data for hash RX queues. */
122 const struct hash_rxq_init hash_rxq_init[] = {
123 	[HASH_RXQ_TCPV4] = {
124 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
125 				IBV_RX_HASH_DST_IPV4 |
126 				IBV_RX_HASH_SRC_PORT_TCP |
127 				IBV_RX_HASH_DST_PORT_TCP),
128 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
129 		.flow_priority = 1,
130 		.ip_version = MLX5_IPV4,
131 	},
132 	[HASH_RXQ_UDPV4] = {
133 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
134 				IBV_RX_HASH_DST_IPV4 |
135 				IBV_RX_HASH_SRC_PORT_UDP |
136 				IBV_RX_HASH_DST_PORT_UDP),
137 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
138 		.flow_priority = 1,
139 		.ip_version = MLX5_IPV4,
140 	},
141 	[HASH_RXQ_IPV4] = {
142 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
143 				IBV_RX_HASH_DST_IPV4),
144 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
145 				ETH_RSS_FRAG_IPV4),
146 		.flow_priority = 2,
147 		.ip_version = MLX5_IPV4,
148 	},
149 	[HASH_RXQ_TCPV6] = {
150 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
151 				IBV_RX_HASH_DST_IPV6 |
152 				IBV_RX_HASH_SRC_PORT_TCP |
153 				IBV_RX_HASH_DST_PORT_TCP),
154 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
155 		.flow_priority = 1,
156 		.ip_version = MLX5_IPV6,
157 	},
158 	[HASH_RXQ_UDPV6] = {
159 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
160 				IBV_RX_HASH_DST_IPV6 |
161 				IBV_RX_HASH_SRC_PORT_UDP |
162 				IBV_RX_HASH_DST_PORT_UDP),
163 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
164 		.flow_priority = 1,
165 		.ip_version = MLX5_IPV6,
166 	},
167 	[HASH_RXQ_IPV6] = {
168 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
169 				IBV_RX_HASH_DST_IPV6),
170 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
171 				ETH_RSS_FRAG_IPV6),
172 		.flow_priority = 2,
173 		.ip_version = MLX5_IPV6,
174 	},
175 	[HASH_RXQ_ETH] = {
176 		.hash_fields = 0,
177 		.dpdk_rss_hf = 0,
178 		.flow_priority = 3,
179 	},
180 };
181 
182 /* Number of entries in hash_rxq_init[]. */
183 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
184 
185 /** Structure for holding counter stats. */
186 struct mlx5_flow_counter_stats {
187 	uint64_t hits; /**< Number of packets matched by the rule. */
188 	uint64_t bytes; /**< Number of bytes matched by the rule. */
189 };
190 
191 /** Structure for Drop queue. */
192 struct mlx5_hrxq_drop {
193 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
194 	struct ibv_qp *qp; /**< Verbs queue pair. */
195 	struct ibv_wq *wq; /**< Verbs work queue. */
196 	struct ibv_cq *cq; /**< Verbs completion queue. */
197 };
198 
199 /* Flows structures. */
200 struct mlx5_flow {
201 	uint64_t hash_fields; /**< Fields that participate in the hash. */
202 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
203 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
204 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
205 };
206 
207 /* Drop flows structures. */
208 struct mlx5_flow_drop {
209 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
210 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
211 };
212 
213 struct rte_flow {
214 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
215 	uint32_t mark:1; /**< Set if the flow is marked. */
216 	uint32_t drop:1; /**< Drop queue. */
217 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
218 	uint16_t (*queues)[]; /**< Queues indexes to use. */
219 	uint8_t rss_key[40]; /**< copy of the RSS key. */
220 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
221 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
222 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
223 	/**< Flow with Rx queue. */
224 };
225 
226 /** Static initializer for items. */
227 #define ITEMS(...) \
228 	(const enum rte_flow_item_type []){ \
229 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
230 	}
231 
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234 	/** List of possible actions for these items. */
235 	const enum rte_flow_action_type *const actions;
236 	/** Bit-masks corresponding to the possibilities for the item. */
237 	const void *mask;
238 	/**
239 	 * Default bit-masks to use when item->mask is not provided. When
240 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
241 	 * used instead.
242 	 */
243 	const void *default_mask;
244 	/** Bit-masks size in bytes. */
245 	const unsigned int mask_sz;
246 	/**
247 	 * Conversion function from rte_flow to NIC specific flow.
248 	 *
249 	 * @param item
250 	 *   rte_flow item to convert.
251 	 * @param default_mask
252 	 *   Default bit-masks to use when item->mask is not provided.
253 	 * @param data
254 	 *   Internal structure to store the conversion.
255 	 *
256 	 * @return
257 	 *   0 on success, a negative errno value otherwise and rte_errno is
258 	 *   set.
259 	 */
260 	int (*convert)(const struct rte_flow_item *item,
261 		       const void *default_mask,
262 		       struct mlx5_flow_data *data);
263 	/** Size in bytes of the destination structure. */
264 	const unsigned int dst_sz;
265 	/** List of possible following items.  */
266 	const enum rte_flow_item_type *const items;
267 };
268 
269 /** Valid action for this PMD. */
270 static const enum rte_flow_action_type valid_actions[] = {
271 	RTE_FLOW_ACTION_TYPE_DROP,
272 	RTE_FLOW_ACTION_TYPE_QUEUE,
273 	RTE_FLOW_ACTION_TYPE_MARK,
274 	RTE_FLOW_ACTION_TYPE_FLAG,
275 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
276 	RTE_FLOW_ACTION_TYPE_COUNT,
277 #endif
278 	RTE_FLOW_ACTION_TYPE_END,
279 };
280 
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283 	[RTE_FLOW_ITEM_TYPE_END] = {
284 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285 			       RTE_FLOW_ITEM_TYPE_VXLAN),
286 	},
287 	[RTE_FLOW_ITEM_TYPE_ETH] = {
288 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289 			       RTE_FLOW_ITEM_TYPE_IPV4,
290 			       RTE_FLOW_ITEM_TYPE_IPV6),
291 		.actions = valid_actions,
292 		.mask = &(const struct rte_flow_item_eth){
293 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 			.type = -1,
296 		},
297 		.default_mask = &rte_flow_item_eth_mask,
298 		.mask_sz = sizeof(struct rte_flow_item_eth),
299 		.convert = mlx5_flow_create_eth,
300 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
301 	},
302 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
303 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304 			       RTE_FLOW_ITEM_TYPE_IPV6),
305 		.actions = valid_actions,
306 		.mask = &(const struct rte_flow_item_vlan){
307 			.tci = -1,
308 		},
309 		.default_mask = &rte_flow_item_vlan_mask,
310 		.mask_sz = sizeof(struct rte_flow_item_vlan),
311 		.convert = mlx5_flow_create_vlan,
312 		.dst_sz = 0,
313 	},
314 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
315 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316 			       RTE_FLOW_ITEM_TYPE_TCP),
317 		.actions = valid_actions,
318 		.mask = &(const struct rte_flow_item_ipv4){
319 			.hdr = {
320 				.src_addr = -1,
321 				.dst_addr = -1,
322 				.type_of_service = -1,
323 				.next_proto_id = -1,
324 			},
325 		},
326 		.default_mask = &rte_flow_item_ipv4_mask,
327 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
328 		.convert = mlx5_flow_create_ipv4,
329 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
330 	},
331 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
332 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333 			       RTE_FLOW_ITEM_TYPE_TCP),
334 		.actions = valid_actions,
335 		.mask = &(const struct rte_flow_item_ipv6){
336 			.hdr = {
337 				.src_addr = {
338 					0xff, 0xff, 0xff, 0xff,
339 					0xff, 0xff, 0xff, 0xff,
340 					0xff, 0xff, 0xff, 0xff,
341 					0xff, 0xff, 0xff, 0xff,
342 				},
343 				.dst_addr = {
344 					0xff, 0xff, 0xff, 0xff,
345 					0xff, 0xff, 0xff, 0xff,
346 					0xff, 0xff, 0xff, 0xff,
347 					0xff, 0xff, 0xff, 0xff,
348 				},
349 				.vtc_flow = -1,
350 				.proto = -1,
351 				.hop_limits = -1,
352 			},
353 		},
354 		.default_mask = &rte_flow_item_ipv6_mask,
355 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
356 		.convert = mlx5_flow_create_ipv6,
357 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
358 	},
359 	[RTE_FLOW_ITEM_TYPE_UDP] = {
360 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361 		.actions = valid_actions,
362 		.mask = &(const struct rte_flow_item_udp){
363 			.hdr = {
364 				.src_port = -1,
365 				.dst_port = -1,
366 			},
367 		},
368 		.default_mask = &rte_flow_item_udp_mask,
369 		.mask_sz = sizeof(struct rte_flow_item_udp),
370 		.convert = mlx5_flow_create_udp,
371 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
372 	},
373 	[RTE_FLOW_ITEM_TYPE_TCP] = {
374 		.actions = valid_actions,
375 		.mask = &(const struct rte_flow_item_tcp){
376 			.hdr = {
377 				.src_port = -1,
378 				.dst_port = -1,
379 			},
380 		},
381 		.default_mask = &rte_flow_item_tcp_mask,
382 		.mask_sz = sizeof(struct rte_flow_item_tcp),
383 		.convert = mlx5_flow_create_tcp,
384 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
385 	},
386 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
387 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388 		.actions = valid_actions,
389 		.mask = &(const struct rte_flow_item_vxlan){
390 			.vni = "\xff\xff\xff",
391 		},
392 		.default_mask = &rte_flow_item_vxlan_mask,
393 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
394 		.convert = mlx5_flow_create_vxlan,
395 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
396 	},
397 };
398 
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401 	uint32_t inner; /**< Set once VXLAN is encountered. */
402 	uint32_t create:1;
403 	/**< Whether resources should remain after a validate. */
404 	uint32_t drop:1; /**< Target is a drop queue. */
405 	uint32_t mark:1; /**< Mark is present in the flow. */
406 	uint32_t count:1; /**< Count is present in the flow. */
407 	uint32_t mark_id; /**< Mark identifier. */
408 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
409 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
410 	uint8_t rss_key[40]; /**< copy of the RSS key. */
411 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
412 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
413 	struct {
414 		struct ibv_flow_attr *ibv_attr;
415 		/**< Pointer to Verbs attributes. */
416 		unsigned int offset;
417 		/**< Current position or total size of the attribute. */
418 	} queue[RTE_DIM(hash_rxq_init)];
419 };
420 
421 static const struct rte_flow_ops mlx5_flow_ops = {
422 	.validate = mlx5_flow_validate,
423 	.create = mlx5_flow_create,
424 	.destroy = mlx5_flow_destroy,
425 	.flush = mlx5_flow_flush,
426 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
427 	.query = mlx5_flow_query,
428 #else
429 	.query = NULL,
430 #endif
431 	.isolate = mlx5_flow_isolate,
432 };
433 
434 /* Convert FDIR request to Generic flow. */
435 struct mlx5_fdir {
436 	struct rte_flow_attr attr;
437 	struct rte_flow_action actions[2];
438 	struct rte_flow_item items[4];
439 	struct rte_flow_item_eth l2;
440 	struct rte_flow_item_eth l2_mask;
441 	union {
442 		struct rte_flow_item_ipv4 ipv4;
443 		struct rte_flow_item_ipv6 ipv6;
444 	} l3;
445 	union {
446 		struct rte_flow_item_ipv4 ipv4;
447 		struct rte_flow_item_ipv6 ipv6;
448 	} l3_mask;
449 	union {
450 		struct rte_flow_item_udp udp;
451 		struct rte_flow_item_tcp tcp;
452 	} l4;
453 	union {
454 		struct rte_flow_item_udp udp;
455 		struct rte_flow_item_tcp tcp;
456 	} l4_mask;
457 	struct rte_flow_action_queue queue;
458 };
459 
460 /* Verbs specification header. */
461 struct ibv_spec_header {
462 	enum ibv_flow_spec_type type;
463 	uint16_t size;
464 };
465 
466 /**
467  * Check support for a given item.
468  *
469  * @param item[in]
470  *   Item specification.
471  * @param mask[in]
472  *   Bit-masks covering supported fields to compare with spec, last and mask in
473  *   \item.
474  * @param size
475  *   Bit-Mask size in bytes.
476  *
477  * @return
478  *   0 on success, a negative errno value otherwise and rte_errno is set.
479  */
480 static int
481 mlx5_flow_item_validate(const struct rte_flow_item *item,
482 			const uint8_t *mask, unsigned int size)
483 {
484 	if (!item->spec && (item->mask || item->last)) {
485 		rte_errno = EINVAL;
486 		return -rte_errno;
487 	}
488 	if (item->spec && !item->mask) {
489 		unsigned int i;
490 		const uint8_t *spec = item->spec;
491 
492 		for (i = 0; i < size; ++i)
493 			if ((spec[i] | mask[i]) != mask[i]) {
494 				rte_errno = EINVAL;
495 				return -rte_errno;
496 			}
497 	}
498 	if (item->last && !item->mask) {
499 		unsigned int i;
500 		const uint8_t *spec = item->last;
501 
502 		for (i = 0; i < size; ++i)
503 			if ((spec[i] | mask[i]) != mask[i]) {
504 				rte_errno = EINVAL;
505 				return -rte_errno;
506 			}
507 	}
508 	if (item->mask) {
509 		unsigned int i;
510 		const uint8_t *spec = item->spec;
511 
512 		for (i = 0; i < size; ++i)
513 			if ((spec[i] | mask[i]) != mask[i]) {
514 				rte_errno = EINVAL;
515 				return -rte_errno;
516 			}
517 	}
518 	if (item->spec && item->last) {
519 		uint8_t spec[size];
520 		uint8_t last[size];
521 		const uint8_t *apply = mask;
522 		unsigned int i;
523 		int ret;
524 
525 		if (item->mask)
526 			apply = item->mask;
527 		for (i = 0; i < size; ++i) {
528 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
529 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
530 		}
531 		ret = memcmp(spec, last, size);
532 		if (ret != 0) {
533 			rte_errno = EINVAL;
534 			return -rte_errno;
535 		}
536 	}
537 	return 0;
538 }
539 
540 /**
541  * Extract attribute to the parser.
542  *
543  * @param[in] attr
544  *   Flow rule attributes.
545  * @param[out] error
546  *   Perform verbose error reporting if not NULL.
547  *
548  * @return
549  *   0 on success, a negative errno value otherwise and rte_errno is set.
550  */
551 static int
552 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
553 			     struct rte_flow_error *error)
554 {
555 	if (attr->group) {
556 		rte_flow_error_set(error, ENOTSUP,
557 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
558 				   NULL,
559 				   "groups are not supported");
560 		return -rte_errno;
561 	}
562 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
563 		rte_flow_error_set(error, ENOTSUP,
564 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
565 				   NULL,
566 				   "priorities are not supported");
567 		return -rte_errno;
568 	}
569 	if (attr->egress) {
570 		rte_flow_error_set(error, ENOTSUP,
571 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
572 				   NULL,
573 				   "egress is not supported");
574 		return -rte_errno;
575 	}
576 	if (!attr->ingress) {
577 		rte_flow_error_set(error, ENOTSUP,
578 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
579 				   NULL,
580 				   "only ingress is supported");
581 		return -rte_errno;
582 	}
583 	return 0;
584 }
585 
586 /**
587  * Extract actions request to the parser.
588  *
589  * @param dev
590  *   Pointer to Ethernet device.
591  * @param[in] actions
592  *   Associated actions (list terminated by the END action).
593  * @param[out] error
594  *   Perform verbose error reporting if not NULL.
595  * @param[in, out] parser
596  *   Internal parser structure.
597  *
598  * @return
599  *   0 on success, a negative errno value otherwise and rte_errno is set.
600  */
601 static int
602 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
603 			  const struct rte_flow_action actions[],
604 			  struct rte_flow_error *error,
605 			  struct mlx5_flow_parse *parser)
606 {
607 	enum { FATE = 1, MARK = 2, COUNT = 4, };
608 	uint32_t overlap = 0;
609 	struct priv *priv = dev->data->dev_private;
610 
611 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
612 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
613 			continue;
614 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
615 			if (overlap & FATE)
616 				goto exit_action_overlap;
617 			overlap |= FATE;
618 			parser->drop = 1;
619 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
620 			const struct rte_flow_action_queue *queue =
621 				(const struct rte_flow_action_queue *)
622 				actions->conf;
623 
624 			if (overlap & FATE)
625 				goto exit_action_overlap;
626 			overlap |= FATE;
627 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
628 				goto exit_action_not_supported;
629 			parser->queues[0] = queue->index;
630 			parser->rss_conf = (struct rte_flow_action_rss){
631 				.queue_num = 1,
632 				.queue = parser->queues,
633 			};
634 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
635 			const struct rte_flow_action_rss *rss =
636 				(const struct rte_flow_action_rss *)
637 				actions->conf;
638 			const uint8_t *rss_key;
639 			uint32_t rss_key_len;
640 			uint16_t n;
641 
642 			if (overlap & FATE)
643 				goto exit_action_overlap;
644 			overlap |= FATE;
645 			if (rss->types & MLX5_RSS_HF_MASK) {
646 				rte_flow_error_set(error, EINVAL,
647 						   RTE_FLOW_ERROR_TYPE_ACTION,
648 						   actions,
649 						   "unsupported RSS type"
650 						   " requested");
651 				return -rte_errno;
652 			}
653 			if (rss->key_len) {
654 				rss_key_len = rss->key_len;
655 				rss_key = rss->key;
656 			} else {
657 				rss_key_len = rss_hash_default_key_len;
658 				rss_key = rss_hash_default_key;
659 			}
660 			if (rss_key_len != RTE_DIM(parser->rss_key)) {
661 				rte_flow_error_set(error, EINVAL,
662 						   RTE_FLOW_ERROR_TYPE_ACTION,
663 						   actions,
664 						   "RSS hash key must be"
665 						   " exactly 40 bytes long");
666 				return -rte_errno;
667 			}
668 			if (!rss->queue_num) {
669 				rte_flow_error_set(error, EINVAL,
670 						   RTE_FLOW_ERROR_TYPE_ACTION,
671 						   actions,
672 						   "no valid queues");
673 				return -rte_errno;
674 			}
675 			if (rss->queue_num > RTE_DIM(parser->queues)) {
676 				rte_flow_error_set(error, EINVAL,
677 						   RTE_FLOW_ERROR_TYPE_ACTION,
678 						   actions,
679 						   "too many queues for RSS"
680 						   " context");
681 				return -rte_errno;
682 			}
683 			for (n = 0; n < rss->queue_num; ++n) {
684 				if (rss->queue[n] >= priv->rxqs_n) {
685 					rte_flow_error_set(error, EINVAL,
686 						   RTE_FLOW_ERROR_TYPE_ACTION,
687 						   actions,
688 						   "queue id > number of"
689 						   " queues");
690 					return -rte_errno;
691 				}
692 			}
693 			parser->rss_conf = (struct rte_flow_action_rss){
694 				.types = rss->types,
695 				.key_len = rss_key_len,
696 				.queue_num = rss->queue_num,
697 				.key = memcpy(parser->rss_key, rss_key,
698 					      sizeof(*rss_key) * rss_key_len),
699 				.queue = memcpy(parser->queues, rss->queue,
700 						sizeof(*rss->queue) *
701 						rss->queue_num),
702 			};
703 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
704 			const struct rte_flow_action_mark *mark =
705 				(const struct rte_flow_action_mark *)
706 				actions->conf;
707 
708 			if (overlap & MARK)
709 				goto exit_action_overlap;
710 			overlap |= MARK;
711 			if (!mark) {
712 				rte_flow_error_set(error, EINVAL,
713 						   RTE_FLOW_ERROR_TYPE_ACTION,
714 						   actions,
715 						   "mark must be defined");
716 				return -rte_errno;
717 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
718 				rte_flow_error_set(error, ENOTSUP,
719 						   RTE_FLOW_ERROR_TYPE_ACTION,
720 						   actions,
721 						   "mark must be between 0"
722 						   " and 16777199");
723 				return -rte_errno;
724 			}
725 			parser->mark = 1;
726 			parser->mark_id = mark->id;
727 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
728 			if (overlap & MARK)
729 				goto exit_action_overlap;
730 			overlap |= MARK;
731 			parser->mark = 1;
732 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
733 			   priv->config.flow_counter_en) {
734 			if (overlap & COUNT)
735 				goto exit_action_overlap;
736 			overlap |= COUNT;
737 			parser->count = 1;
738 		} else {
739 			goto exit_action_not_supported;
740 		}
741 	}
742 	/* When fate is unknown, drop traffic. */
743 	if (!(overlap & FATE))
744 		parser->drop = 1;
745 	if (parser->drop && parser->mark)
746 		parser->mark = 0;
747 	if (!parser->rss_conf.queue_num && !parser->drop) {
748 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
749 				   NULL, "no valid action");
750 		return -rte_errno;
751 	}
752 	return 0;
753 exit_action_not_supported:
754 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
755 			   actions, "action not supported");
756 	return -rte_errno;
757 exit_action_overlap:
758 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
759 			   actions, "overlapping actions are not supported");
760 	return -rte_errno;
761 }
762 
763 /**
764  * Validate items.
765  *
766  * @param[in] items
767  *   Pattern specification (list terminated by the END pattern item).
768  * @param[out] error
769  *   Perform verbose error reporting if not NULL.
770  * @param[in, out] parser
771  *   Internal parser structure.
772  *
773  * @return
774  *   0 on success, a negative errno value otherwise and rte_errno is set.
775  */
776 static int
777 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
778 				 struct rte_flow_error *error,
779 				 struct mlx5_flow_parse *parser)
780 {
781 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
782 	unsigned int i;
783 	int ret = 0;
784 
785 	/* Initialise the offsets to start after verbs attribute. */
786 	for (i = 0; i != hash_rxq_init_n; ++i)
787 		parser->queue[i].offset = sizeof(struct ibv_flow_attr);
788 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
789 		const struct mlx5_flow_items *token = NULL;
790 		unsigned int n;
791 
792 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
793 			continue;
794 		for (i = 0;
795 		     cur_item->items &&
796 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
797 		     ++i) {
798 			if (cur_item->items[i] == items->type) {
799 				token = &mlx5_flow_items[items->type];
800 				break;
801 			}
802 		}
803 		if (!token) {
804 			ret = -ENOTSUP;
805 			goto exit_item_not_supported;
806 		}
807 		cur_item = token;
808 		ret = mlx5_flow_item_validate(items,
809 					      (const uint8_t *)cur_item->mask,
810 					      cur_item->mask_sz);
811 		if (ret)
812 			goto exit_item_not_supported;
813 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
814 			if (parser->inner) {
815 				rte_flow_error_set(error, ENOTSUP,
816 						   RTE_FLOW_ERROR_TYPE_ITEM,
817 						   items,
818 						   "cannot recognize multiple"
819 						   " VXLAN encapsulations");
820 				return -rte_errno;
821 			}
822 			parser->inner = IBV_FLOW_SPEC_INNER;
823 		}
824 		if (parser->drop) {
825 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
826 		} else {
827 			for (n = 0; n != hash_rxq_init_n; ++n)
828 				parser->queue[n].offset += cur_item->dst_sz;
829 		}
830 	}
831 	if (parser->drop) {
832 		parser->queue[HASH_RXQ_ETH].offset +=
833 			sizeof(struct ibv_flow_spec_action_drop);
834 	}
835 	if (parser->mark) {
836 		for (i = 0; i != hash_rxq_init_n; ++i)
837 			parser->queue[i].offset +=
838 				sizeof(struct ibv_flow_spec_action_tag);
839 	}
840 	if (parser->count) {
841 		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
842 
843 		for (i = 0; i != hash_rxq_init_n; ++i)
844 			parser->queue[i].offset += size;
845 	}
846 	return 0;
847 exit_item_not_supported:
848 	return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
849 				  items, "item not supported");
850 }
851 
852 /**
853  * Allocate memory space to store verbs flow attributes.
854  *
855  * @param[in] size
856  *   Amount of byte to allocate.
857  * @param[out] error
858  *   Perform verbose error reporting if not NULL.
859  *
860  * @return
861  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
862  */
863 static struct ibv_flow_attr *
864 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
865 {
866 	struct ibv_flow_attr *ibv_attr;
867 
868 	ibv_attr = rte_calloc(__func__, 1, size, 0);
869 	if (!ibv_attr) {
870 		rte_flow_error_set(error, ENOMEM,
871 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
872 				   NULL,
873 				   "cannot allocate verbs spec attributes");
874 		return NULL;
875 	}
876 	return ibv_attr;
877 }
878 
879 /**
880  * Make inner packet matching with an higher priority from the non Inner
881  * matching.
882  *
883  * @param[in, out] parser
884  *   Internal parser structure.
885  * @param attr
886  *   User flow attribute.
887  */
888 static void
889 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
890 			  const struct rte_flow_attr *attr)
891 {
892 	unsigned int i;
893 
894 	if (parser->drop) {
895 		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
896 			attr->priority +
897 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
898 		return;
899 	}
900 	for (i = 0; i != hash_rxq_init_n; ++i) {
901 		if (parser->queue[i].ibv_attr) {
902 			parser->queue[i].ibv_attr->priority =
903 				attr->priority +
904 				hash_rxq_init[i].flow_priority -
905 				(parser->inner ? 1 : 0);
906 		}
907 	}
908 }
909 
910 /**
911  * Finalise verbs flow attributes.
912  *
913  * @param[in, out] parser
914  *   Internal parser structure.
915  */
916 static void
917 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
918 {
919 	const unsigned int ipv4 =
920 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
921 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
922 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
923 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
924 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
925 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
926 	unsigned int i;
927 
928 	/* Remove any other flow not matching the pattern. */
929 	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
930 		for (i = 0; i != hash_rxq_init_n; ++i) {
931 			if (i == HASH_RXQ_ETH)
932 				continue;
933 			rte_free(parser->queue[i].ibv_attr);
934 			parser->queue[i].ibv_attr = NULL;
935 		}
936 		return;
937 	}
938 	if (parser->layer == HASH_RXQ_ETH) {
939 		goto fill;
940 	} else {
941 		/*
942 		 * This layer becomes useless as the pattern define under
943 		 * layers.
944 		 */
945 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
946 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
947 	}
948 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
949 	for (i = ohmin; i != (ohmax + 1); ++i) {
950 		if (!parser->queue[i].ibv_attr)
951 			continue;
952 		rte_free(parser->queue[i].ibv_attr);
953 		parser->queue[i].ibv_attr = NULL;
954 	}
955 	/* Remove impossible flow according to the RSS configuration. */
956 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
957 	    parser->rss_conf.types) {
958 		/* Remove any other flow. */
959 		for (i = hmin; i != (hmax + 1); ++i) {
960 			if ((i == parser->layer) ||
961 			     (!parser->queue[i].ibv_attr))
962 				continue;
963 			rte_free(parser->queue[i].ibv_attr);
964 			parser->queue[i].ibv_attr = NULL;
965 		}
966 	} else  if (!parser->queue[ip].ibv_attr) {
967 		/* no RSS possible with the current configuration. */
968 		parser->rss_conf.queue_num = 1;
969 		return;
970 	}
971 fill:
972 	/*
973 	 * Fill missing layers in verbs specifications, or compute the correct
974 	 * offset to allocate the memory space for the attributes and
975 	 * specifications.
976 	 */
977 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
978 		union {
979 			struct ibv_flow_spec_ipv4_ext ipv4;
980 			struct ibv_flow_spec_ipv6 ipv6;
981 			struct ibv_flow_spec_tcp_udp udp_tcp;
982 		} specs;
983 		void *dst;
984 		uint16_t size;
985 
986 		if (i == parser->layer)
987 			continue;
988 		if (parser->layer == HASH_RXQ_ETH) {
989 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
990 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
991 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
992 					.type = IBV_FLOW_SPEC_IPV4_EXT,
993 					.size = size,
994 				};
995 			} else {
996 				size = sizeof(struct ibv_flow_spec_ipv6);
997 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
998 					.type = IBV_FLOW_SPEC_IPV6,
999 					.size = size,
1000 				};
1001 			}
1002 			if (parser->queue[i].ibv_attr) {
1003 				dst = (void *)((uintptr_t)
1004 					       parser->queue[i].ibv_attr +
1005 					       parser->queue[i].offset);
1006 				memcpy(dst, &specs, size);
1007 				++parser->queue[i].ibv_attr->num_of_specs;
1008 			}
1009 			parser->queue[i].offset += size;
1010 		}
1011 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1012 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1013 			size = sizeof(struct ibv_flow_spec_tcp_udp);
1014 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1015 				.type = ((i == HASH_RXQ_UDPV4 ||
1016 					  i == HASH_RXQ_UDPV6) ?
1017 					 IBV_FLOW_SPEC_UDP :
1018 					 IBV_FLOW_SPEC_TCP),
1019 				.size = size,
1020 			};
1021 			if (parser->queue[i].ibv_attr) {
1022 				dst = (void *)((uintptr_t)
1023 					       parser->queue[i].ibv_attr +
1024 					       parser->queue[i].offset);
1025 				memcpy(dst, &specs, size);
1026 				++parser->queue[i].ibv_attr->num_of_specs;
1027 			}
1028 			parser->queue[i].offset += size;
1029 		}
1030 	}
1031 }
1032 
1033 /**
1034  * Validate and convert a flow supported by the NIC.
1035  *
1036  * @param dev
1037  *   Pointer to Ethernet device.
1038  * @param[in] attr
1039  *   Flow rule attributes.
1040  * @param[in] pattern
1041  *   Pattern specification (list terminated by the END pattern item).
1042  * @param[in] actions
1043  *   Associated actions (list terminated by the END action).
1044  * @param[out] error
1045  *   Perform verbose error reporting if not NULL.
1046  * @param[in, out] parser
1047  *   Internal parser structure.
1048  *
1049  * @return
1050  *   0 on success, a negative errno value otherwise and rte_errno is set.
1051  */
1052 static int
1053 mlx5_flow_convert(struct rte_eth_dev *dev,
1054 		  const struct rte_flow_attr *attr,
1055 		  const struct rte_flow_item items[],
1056 		  const struct rte_flow_action actions[],
1057 		  struct rte_flow_error *error,
1058 		  struct mlx5_flow_parse *parser)
1059 {
1060 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1061 	unsigned int i;
1062 	int ret;
1063 
1064 	/* First step. Validate the attributes, items and actions. */
1065 	*parser = (struct mlx5_flow_parse){
1066 		.create = parser->create,
1067 		.layer = HASH_RXQ_ETH,
1068 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1069 	};
1070 	ret = mlx5_flow_convert_attributes(attr, error);
1071 	if (ret)
1072 		return ret;
1073 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1074 	if (ret)
1075 		return ret;
1076 	ret = mlx5_flow_convert_items_validate(items, error, parser);
1077 	if (ret)
1078 		return ret;
1079 	mlx5_flow_convert_finalise(parser);
1080 	/*
1081 	 * Second step.
1082 	 * Allocate the memory space to store verbs specifications.
1083 	 */
1084 	if (parser->drop) {
1085 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1086 
1087 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1088 			mlx5_flow_convert_allocate(offset, error);
1089 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1090 			goto exit_enomem;
1091 		parser->queue[HASH_RXQ_ETH].offset =
1092 			sizeof(struct ibv_flow_attr);
1093 	} else {
1094 		for (i = 0; i != hash_rxq_init_n; ++i) {
1095 			unsigned int offset;
1096 
1097 			if (!(parser->rss_conf.types &
1098 			      hash_rxq_init[i].dpdk_rss_hf) &&
1099 			    (i != HASH_RXQ_ETH))
1100 				continue;
1101 			offset = parser->queue[i].offset;
1102 			parser->queue[i].ibv_attr =
1103 				mlx5_flow_convert_allocate(offset, error);
1104 			if (!parser->queue[i].ibv_attr)
1105 				goto exit_enomem;
1106 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1107 		}
1108 	}
1109 	/* Third step. Conversion parse, fill the specifications. */
1110 	parser->inner = 0;
1111 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1112 		struct mlx5_flow_data data = {
1113 			.parser = parser,
1114 			.error = error,
1115 		};
1116 
1117 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1118 			continue;
1119 		cur_item = &mlx5_flow_items[items->type];
1120 		ret = cur_item->convert(items,
1121 					(cur_item->default_mask ?
1122 					 cur_item->default_mask :
1123 					 cur_item->mask),
1124 					 &data);
1125 		if (ret)
1126 			goto exit_free;
1127 	}
1128 	if (parser->mark)
1129 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1130 	if (parser->count && parser->create) {
1131 		mlx5_flow_create_count(dev, parser);
1132 		if (!parser->cs)
1133 			goto exit_count_error;
1134 	}
1135 	/*
1136 	 * Last step. Complete missing specification to reach the RSS
1137 	 * configuration.
1138 	 */
1139 	if (!parser->drop)
1140 		mlx5_flow_convert_finalise(parser);
1141 	mlx5_flow_update_priority(parser, attr);
1142 exit_free:
1143 	/* Only verification is expected, all resources should be released. */
1144 	if (!parser->create) {
1145 		for (i = 0; i != hash_rxq_init_n; ++i) {
1146 			if (parser->queue[i].ibv_attr) {
1147 				rte_free(parser->queue[i].ibv_attr);
1148 				parser->queue[i].ibv_attr = NULL;
1149 			}
1150 		}
1151 	}
1152 	return ret;
1153 exit_enomem:
1154 	for (i = 0; i != hash_rxq_init_n; ++i) {
1155 		if (parser->queue[i].ibv_attr) {
1156 			rte_free(parser->queue[i].ibv_attr);
1157 			parser->queue[i].ibv_attr = NULL;
1158 		}
1159 	}
1160 	rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1161 			   NULL, "cannot allocate verbs spec attributes");
1162 	return -rte_errno;
1163 exit_count_error:
1164 	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1165 			   NULL, "cannot create counter");
1166 	return -rte_errno;
1167 }
1168 
1169 /**
1170  * Copy the specification created into the flow.
1171  *
1172  * @param parser
1173  *   Internal parser structure.
1174  * @param src
1175  *   Create specification.
1176  * @param size
1177  *   Size in bytes of the specification to copy.
1178  */
1179 static void
1180 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1181 		      unsigned int size)
1182 {
1183 	unsigned int i;
1184 	void *dst;
1185 
1186 	for (i = 0; i != hash_rxq_init_n; ++i) {
1187 		if (!parser->queue[i].ibv_attr)
1188 			continue;
1189 		/* Specification must be the same l3 type or none. */
1190 		if (parser->layer == HASH_RXQ_ETH ||
1191 		    (hash_rxq_init[parser->layer].ip_version ==
1192 		     hash_rxq_init[i].ip_version) ||
1193 		    (hash_rxq_init[i].ip_version == 0)) {
1194 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1195 					parser->queue[i].offset);
1196 			memcpy(dst, src, size);
1197 			++parser->queue[i].ibv_attr->num_of_specs;
1198 			parser->queue[i].offset += size;
1199 		}
1200 	}
1201 }
1202 
1203 /**
1204  * Convert Ethernet item to Verbs specification.
1205  *
1206  * @param item[in]
1207  *   Item specification.
1208  * @param default_mask[in]
1209  *   Default bit-masks to use when item->mask is not provided.
1210  * @param data[in, out]
1211  *   User structure.
1212  *
1213  * @return
1214  *   0 on success, a negative errno value otherwise and rte_errno is set.
1215  */
1216 static int
1217 mlx5_flow_create_eth(const struct rte_flow_item *item,
1218 		     const void *default_mask,
1219 		     struct mlx5_flow_data *data)
1220 {
1221 	const struct rte_flow_item_eth *spec = item->spec;
1222 	const struct rte_flow_item_eth *mask = item->mask;
1223 	struct mlx5_flow_parse *parser = data->parser;
1224 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1225 	struct ibv_flow_spec_eth eth = {
1226 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1227 		.size = eth_size,
1228 	};
1229 
1230 	/* Don't update layer for the inner pattern. */
1231 	if (!parser->inner)
1232 		parser->layer = HASH_RXQ_ETH;
1233 	if (spec) {
1234 		unsigned int i;
1235 
1236 		if (!mask)
1237 			mask = default_mask;
1238 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1239 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1240 		eth.val.ether_type = spec->type;
1241 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1242 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1243 		eth.mask.ether_type = mask->type;
1244 		/* Remove unwanted bits from values. */
1245 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1246 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1247 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1248 		}
1249 		eth.val.ether_type &= eth.mask.ether_type;
1250 	}
1251 	mlx5_flow_create_copy(parser, &eth, eth_size);
1252 	return 0;
1253 }
1254 
1255 /**
1256  * Convert VLAN item to Verbs specification.
1257  *
1258  * @param item[in]
1259  *   Item specification.
1260  * @param default_mask[in]
1261  *   Default bit-masks to use when item->mask is not provided.
1262  * @param data[in, out]
1263  *   User structure.
1264  *
1265  * @return
1266  *   0 on success, a negative errno value otherwise and rte_errno is set.
1267  */
1268 static int
1269 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1270 		      const void *default_mask,
1271 		      struct mlx5_flow_data *data)
1272 {
1273 	const struct rte_flow_item_vlan *spec = item->spec;
1274 	const struct rte_flow_item_vlan *mask = item->mask;
1275 	struct mlx5_flow_parse *parser = data->parser;
1276 	struct ibv_flow_spec_eth *eth;
1277 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1278 
1279 	if (spec) {
1280 		unsigned int i;
1281 		if (!mask)
1282 			mask = default_mask;
1283 
1284 		for (i = 0; i != hash_rxq_init_n; ++i) {
1285 			if (!parser->queue[i].ibv_attr)
1286 				continue;
1287 
1288 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1289 				       parser->queue[i].offset - eth_size);
1290 			eth->val.vlan_tag = spec->tci;
1291 			eth->mask.vlan_tag = mask->tci;
1292 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1293 			/*
1294 			 * From verbs perspective an empty VLAN is equivalent
1295 			 * to a packet without VLAN layer.
1296 			 */
1297 			if (!eth->mask.vlan_tag)
1298 				goto error;
1299 		}
1300 		return 0;
1301 	}
1302 error:
1303 	return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1304 				  item, "VLAN cannot be empty");
1305 }
1306 
1307 /**
1308  * Convert IPv4 item to Verbs specification.
1309  *
1310  * @param item[in]
1311  *   Item specification.
1312  * @param default_mask[in]
1313  *   Default bit-masks to use when item->mask is not provided.
1314  * @param data[in, out]
1315  *   User structure.
1316  *
1317  * @return
1318  *   0 on success, a negative errno value otherwise and rte_errno is set.
1319  */
1320 static int
1321 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1322 		      const void *default_mask,
1323 		      struct mlx5_flow_data *data)
1324 {
1325 	const struct rte_flow_item_ipv4 *spec = item->spec;
1326 	const struct rte_flow_item_ipv4 *mask = item->mask;
1327 	struct mlx5_flow_parse *parser = data->parser;
1328 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1329 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1330 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1331 		.size = ipv4_size,
1332 	};
1333 
1334 	/* Don't update layer for the inner pattern. */
1335 	if (!parser->inner)
1336 		parser->layer = HASH_RXQ_IPV4;
1337 	if (spec) {
1338 		if (!mask)
1339 			mask = default_mask;
1340 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1341 			.src_ip = spec->hdr.src_addr,
1342 			.dst_ip = spec->hdr.dst_addr,
1343 			.proto = spec->hdr.next_proto_id,
1344 			.tos = spec->hdr.type_of_service,
1345 		};
1346 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1347 			.src_ip = mask->hdr.src_addr,
1348 			.dst_ip = mask->hdr.dst_addr,
1349 			.proto = mask->hdr.next_proto_id,
1350 			.tos = mask->hdr.type_of_service,
1351 		};
1352 		/* Remove unwanted bits from values. */
1353 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1354 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1355 		ipv4.val.proto &= ipv4.mask.proto;
1356 		ipv4.val.tos &= ipv4.mask.tos;
1357 	}
1358 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1359 	return 0;
1360 }
1361 
1362 /**
1363  * Convert IPv6 item to Verbs specification.
1364  *
1365  * @param item[in]
1366  *   Item specification.
1367  * @param default_mask[in]
1368  *   Default bit-masks to use when item->mask is not provided.
1369  * @param data[in, out]
1370  *   User structure.
1371  *
1372  * @return
1373  *   0 on success, a negative errno value otherwise and rte_errno is set.
1374  */
1375 static int
1376 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1377 		      const void *default_mask,
1378 		      struct mlx5_flow_data *data)
1379 {
1380 	const struct rte_flow_item_ipv6 *spec = item->spec;
1381 	const struct rte_flow_item_ipv6 *mask = item->mask;
1382 	struct mlx5_flow_parse *parser = data->parser;
1383 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1384 	struct ibv_flow_spec_ipv6 ipv6 = {
1385 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1386 		.size = ipv6_size,
1387 	};
1388 
1389 	/* Don't update layer for the inner pattern. */
1390 	if (!parser->inner)
1391 		parser->layer = HASH_RXQ_IPV6;
1392 	if (spec) {
1393 		unsigned int i;
1394 		uint32_t vtc_flow_val;
1395 		uint32_t vtc_flow_mask;
1396 
1397 		if (!mask)
1398 			mask = default_mask;
1399 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1400 		       RTE_DIM(ipv6.val.src_ip));
1401 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1402 		       RTE_DIM(ipv6.val.dst_ip));
1403 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1404 		       RTE_DIM(ipv6.mask.src_ip));
1405 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1406 		       RTE_DIM(ipv6.mask.dst_ip));
1407 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1408 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1409 		ipv6.val.flow_label =
1410 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1411 					 IPV6_HDR_FL_SHIFT);
1412 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1413 					 IPV6_HDR_TC_SHIFT;
1414 		ipv6.val.next_hdr = spec->hdr.proto;
1415 		ipv6.val.hop_limit = spec->hdr.hop_limits;
1416 		ipv6.mask.flow_label =
1417 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1418 					 IPV6_HDR_FL_SHIFT);
1419 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1420 					  IPV6_HDR_TC_SHIFT;
1421 		ipv6.mask.next_hdr = mask->hdr.proto;
1422 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1423 		/* Remove unwanted bits from values. */
1424 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1425 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1426 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1427 		}
1428 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1429 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1430 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1431 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1432 	}
1433 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1434 	return 0;
1435 }
1436 
1437 /**
1438  * Convert UDP item to Verbs specification.
1439  *
1440  * @param item[in]
1441  *   Item specification.
1442  * @param default_mask[in]
1443  *   Default bit-masks to use when item->mask is not provided.
1444  * @param data[in, out]
1445  *   User structure.
1446  *
1447  * @return
1448  *   0 on success, a negative errno value otherwise and rte_errno is set.
1449  */
1450 static int
1451 mlx5_flow_create_udp(const struct rte_flow_item *item,
1452 		     const void *default_mask,
1453 		     struct mlx5_flow_data *data)
1454 {
1455 	const struct rte_flow_item_udp *spec = item->spec;
1456 	const struct rte_flow_item_udp *mask = item->mask;
1457 	struct mlx5_flow_parse *parser = data->parser;
1458 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1459 	struct ibv_flow_spec_tcp_udp udp = {
1460 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1461 		.size = udp_size,
1462 	};
1463 
1464 	/* Don't update layer for the inner pattern. */
1465 	if (!parser->inner) {
1466 		if (parser->layer == HASH_RXQ_IPV4)
1467 			parser->layer = HASH_RXQ_UDPV4;
1468 		else
1469 			parser->layer = HASH_RXQ_UDPV6;
1470 	}
1471 	if (spec) {
1472 		if (!mask)
1473 			mask = default_mask;
1474 		udp.val.dst_port = spec->hdr.dst_port;
1475 		udp.val.src_port = spec->hdr.src_port;
1476 		udp.mask.dst_port = mask->hdr.dst_port;
1477 		udp.mask.src_port = mask->hdr.src_port;
1478 		/* Remove unwanted bits from values. */
1479 		udp.val.src_port &= udp.mask.src_port;
1480 		udp.val.dst_port &= udp.mask.dst_port;
1481 	}
1482 	mlx5_flow_create_copy(parser, &udp, udp_size);
1483 	return 0;
1484 }
1485 
1486 /**
1487  * Convert TCP item to Verbs specification.
1488  *
1489  * @param item[in]
1490  *   Item specification.
1491  * @param default_mask[in]
1492  *   Default bit-masks to use when item->mask is not provided.
1493  * @param data[in, out]
1494  *   User structure.
1495  *
1496  * @return
1497  *   0 on success, a negative errno value otherwise and rte_errno is set.
1498  */
1499 static int
1500 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1501 		     const void *default_mask,
1502 		     struct mlx5_flow_data *data)
1503 {
1504 	const struct rte_flow_item_tcp *spec = item->spec;
1505 	const struct rte_flow_item_tcp *mask = item->mask;
1506 	struct mlx5_flow_parse *parser = data->parser;
1507 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1508 	struct ibv_flow_spec_tcp_udp tcp = {
1509 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1510 		.size = tcp_size,
1511 	};
1512 
1513 	/* Don't update layer for the inner pattern. */
1514 	if (!parser->inner) {
1515 		if (parser->layer == HASH_RXQ_IPV4)
1516 			parser->layer = HASH_RXQ_TCPV4;
1517 		else
1518 			parser->layer = HASH_RXQ_TCPV6;
1519 	}
1520 	if (spec) {
1521 		if (!mask)
1522 			mask = default_mask;
1523 		tcp.val.dst_port = spec->hdr.dst_port;
1524 		tcp.val.src_port = spec->hdr.src_port;
1525 		tcp.mask.dst_port = mask->hdr.dst_port;
1526 		tcp.mask.src_port = mask->hdr.src_port;
1527 		/* Remove unwanted bits from values. */
1528 		tcp.val.src_port &= tcp.mask.src_port;
1529 		tcp.val.dst_port &= tcp.mask.dst_port;
1530 	}
1531 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1532 	return 0;
1533 }
1534 
1535 /**
1536  * Convert VXLAN item to Verbs specification.
1537  *
1538  * @param item[in]
1539  *   Item specification.
1540  * @param default_mask[in]
1541  *   Default bit-masks to use when item->mask is not provided.
1542  * @param data[in, out]
1543  *   User structure.
1544  *
1545  * @return
1546  *   0 on success, a negative errno value otherwise and rte_errno is set.
1547  */
1548 static int
1549 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1550 		       const void *default_mask,
1551 		       struct mlx5_flow_data *data)
1552 {
1553 	const struct rte_flow_item_vxlan *spec = item->spec;
1554 	const struct rte_flow_item_vxlan *mask = item->mask;
1555 	struct mlx5_flow_parse *parser = data->parser;
1556 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1557 	struct ibv_flow_spec_tunnel vxlan = {
1558 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1559 		.size = size,
1560 	};
1561 	union vni {
1562 		uint32_t vlan_id;
1563 		uint8_t vni[4];
1564 	} id;
1565 
1566 	id.vni[0] = 0;
1567 	parser->inner = IBV_FLOW_SPEC_INNER;
1568 	if (spec) {
1569 		if (!mask)
1570 			mask = default_mask;
1571 		memcpy(&id.vni[1], spec->vni, 3);
1572 		vxlan.val.tunnel_id = id.vlan_id;
1573 		memcpy(&id.vni[1], mask->vni, 3);
1574 		vxlan.mask.tunnel_id = id.vlan_id;
1575 		/* Remove unwanted bits from values. */
1576 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1577 	}
1578 	/*
1579 	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1580 	 * layer is defined in the Verbs specification it is interpreted as
1581 	 * wildcard and all packets will match this rule, if it follows a full
1582 	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1583 	 * before will also match this rule.
1584 	 * To avoid such situation, VNI 0 is currently refused.
1585 	 */
1586 	if (!vxlan.val.tunnel_id)
1587 		return rte_flow_error_set(data->error, EINVAL,
1588 					  RTE_FLOW_ERROR_TYPE_ITEM,
1589 					  item,
1590 					  "VxLAN vni cannot be 0");
1591 	mlx5_flow_create_copy(parser, &vxlan, size);
1592 	return 0;
1593 }
1594 
1595 /**
1596  * Convert mark/flag action to Verbs specification.
1597  *
1598  * @param parser
1599  *   Internal parser structure.
1600  * @param mark_id
1601  *   Mark identifier.
1602  *
1603  * @return
1604  *   0 on success, a negative errno value otherwise and rte_errno is set.
1605  */
1606 static int
1607 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1608 {
1609 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1610 	struct ibv_flow_spec_action_tag tag = {
1611 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1612 		.size = size,
1613 		.tag_id = mlx5_flow_mark_set(mark_id),
1614 	};
1615 
1616 	assert(parser->mark);
1617 	mlx5_flow_create_copy(parser, &tag, size);
1618 	return 0;
1619 }
1620 
1621 /**
1622  * Convert count action to Verbs specification.
1623  *
1624  * @param dev
1625  *   Pointer to Ethernet device.
1626  * @param parser
1627  *   Pointer to MLX5 flow parser structure.
1628  *
1629  * @return
1630  *   0 on success, a negative errno value otherwise and rte_errno is set.
1631  */
1632 static int
1633 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1634 		       struct mlx5_flow_parse *parser __rte_unused)
1635 {
1636 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1637 	struct priv *priv = dev->data->dev_private;
1638 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1639 	struct ibv_counter_set_init_attr init_attr = {0};
1640 	struct ibv_flow_spec_counter_action counter = {
1641 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1642 		.size = size,
1643 		.counter_set_handle = 0,
1644 	};
1645 
1646 	init_attr.counter_set_id = 0;
1647 	parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1648 	if (!parser->cs) {
1649 		rte_errno = EINVAL;
1650 		return -rte_errno;
1651 	}
1652 	counter.counter_set_handle = parser->cs->handle;
1653 	mlx5_flow_create_copy(parser, &counter, size);
1654 #endif
1655 	return 0;
1656 }
1657 
1658 /**
1659  * Complete flow rule creation with a drop queue.
1660  *
1661  * @param dev
1662  *   Pointer to Ethernet device.
1663  * @param parser
1664  *   Internal parser structure.
1665  * @param flow
1666  *   Pointer to the rte_flow.
1667  * @param[out] error
1668  *   Perform verbose error reporting if not NULL.
1669  *
1670  * @return
1671  *   0 on success, a negative errno value otherwise and rte_errno is set.
1672  */
1673 static int
1674 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1675 				   struct mlx5_flow_parse *parser,
1676 				   struct rte_flow *flow,
1677 				   struct rte_flow_error *error)
1678 {
1679 	struct priv *priv = dev->data->dev_private;
1680 	struct ibv_flow_spec_action_drop *drop;
1681 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1682 
1683 	assert(priv->pd);
1684 	assert(priv->ctx);
1685 	flow->drop = 1;
1686 	drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1687 			parser->queue[HASH_RXQ_ETH].offset);
1688 	*drop = (struct ibv_flow_spec_action_drop){
1689 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1690 			.size = size,
1691 	};
1692 	++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1693 	parser->queue[HASH_RXQ_ETH].offset += size;
1694 	flow->frxq[HASH_RXQ_ETH].ibv_attr =
1695 		parser->queue[HASH_RXQ_ETH].ibv_attr;
1696 	if (parser->count)
1697 		flow->cs = parser->cs;
1698 	if (!priv->dev->data->dev_started)
1699 		return 0;
1700 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1701 	flow->frxq[HASH_RXQ_ETH].ibv_flow =
1702 		mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1703 				       flow->frxq[HASH_RXQ_ETH].ibv_attr);
1704 	if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1705 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1706 				   NULL, "flow rule creation failure");
1707 		goto error;
1708 	}
1709 	return 0;
1710 error:
1711 	assert(flow);
1712 	if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1713 		claim_zero(mlx5_glue->destroy_flow
1714 			   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1715 		flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1716 	}
1717 	if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1718 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1719 		flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1720 	}
1721 	if (flow->cs) {
1722 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1723 		flow->cs = NULL;
1724 		parser->cs = NULL;
1725 	}
1726 	return -rte_errno;
1727 }
1728 
1729 /**
1730  * Create hash Rx queues when RSS is enabled.
1731  *
1732  * @param dev
1733  *   Pointer to Ethernet device.
1734  * @param parser
1735  *   Internal parser structure.
1736  * @param flow
1737  *   Pointer to the rte_flow.
1738  * @param[out] error
1739  *   Perform verbose error reporting if not NULL.
1740  *
1741  * @return
1742  *   0 on success, a negative errno value otherwise and rte_errno is set.
1743  */
1744 static int
1745 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1746 				  struct mlx5_flow_parse *parser,
1747 				  struct rte_flow *flow,
1748 				  struct rte_flow_error *error)
1749 {
1750 	struct priv *priv = dev->data->dev_private;
1751 	unsigned int i;
1752 
1753 	for (i = 0; i != hash_rxq_init_n; ++i) {
1754 		uint64_t hash_fields;
1755 
1756 		if (!parser->queue[i].ibv_attr)
1757 			continue;
1758 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1759 		parser->queue[i].ibv_attr = NULL;
1760 		hash_fields = hash_rxq_init[i].hash_fields;
1761 		if (!priv->dev->data->dev_started)
1762 			continue;
1763 		flow->frxq[i].hrxq =
1764 			mlx5_hrxq_get(dev,
1765 				      parser->rss_conf.key,
1766 				      parser->rss_conf.key_len,
1767 				      hash_fields,
1768 				      parser->rss_conf.queue,
1769 				      parser->rss_conf.queue_num);
1770 		if (flow->frxq[i].hrxq)
1771 			continue;
1772 		flow->frxq[i].hrxq =
1773 			mlx5_hrxq_new(dev,
1774 				      parser->rss_conf.key,
1775 				      parser->rss_conf.key_len,
1776 				      hash_fields,
1777 				      parser->rss_conf.queue,
1778 				      parser->rss_conf.queue_num);
1779 		if (!flow->frxq[i].hrxq) {
1780 			return rte_flow_error_set(error, ENOMEM,
1781 						  RTE_FLOW_ERROR_TYPE_HANDLE,
1782 						  NULL,
1783 						  "cannot create hash rxq");
1784 		}
1785 	}
1786 	return 0;
1787 }
1788 
1789 /**
1790  * Complete flow rule creation.
1791  *
1792  * @param dev
1793  *   Pointer to Ethernet device.
1794  * @param parser
1795  *   Internal parser structure.
1796  * @param flow
1797  *   Pointer to the rte_flow.
1798  * @param[out] error
1799  *   Perform verbose error reporting if not NULL.
1800  *
1801  * @return
1802  *   0 on success, a negative errno value otherwise and rte_errno is set.
1803  */
1804 static int
1805 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1806 			      struct mlx5_flow_parse *parser,
1807 			      struct rte_flow *flow,
1808 			      struct rte_flow_error *error)
1809 {
1810 	struct priv *priv = dev->data->dev_private;
1811 	int ret;
1812 	unsigned int i;
1813 	unsigned int flows_n = 0;
1814 
1815 	assert(priv->pd);
1816 	assert(priv->ctx);
1817 	assert(!parser->drop);
1818 	ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1819 	if (ret)
1820 		goto error;
1821 	if (parser->count)
1822 		flow->cs = parser->cs;
1823 	if (!priv->dev->data->dev_started)
1824 		return 0;
1825 	for (i = 0; i != hash_rxq_init_n; ++i) {
1826 		if (!flow->frxq[i].hrxq)
1827 			continue;
1828 		flow->frxq[i].ibv_flow =
1829 			mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1830 					       flow->frxq[i].ibv_attr);
1831 		if (!flow->frxq[i].ibv_flow) {
1832 			rte_flow_error_set(error, ENOMEM,
1833 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1834 					   NULL, "flow rule creation failure");
1835 			goto error;
1836 		}
1837 		++flows_n;
1838 		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1839 			dev->data->port_id,
1840 			(void *)flow, i,
1841 			(void *)flow->frxq[i].hrxq,
1842 			(void *)flow->frxq[i].ibv_flow);
1843 	}
1844 	if (!flows_n) {
1845 		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1846 				   NULL, "internal error in flow creation");
1847 		goto error;
1848 	}
1849 	for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1850 		struct mlx5_rxq_data *q =
1851 			(*priv->rxqs)[parser->rss_conf.queue[i]];
1852 
1853 		q->mark |= parser->mark;
1854 	}
1855 	return 0;
1856 error:
1857 	ret = rte_errno; /* Save rte_errno before cleanup. */
1858 	assert(flow);
1859 	for (i = 0; i != hash_rxq_init_n; ++i) {
1860 		if (flow->frxq[i].ibv_flow) {
1861 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1862 
1863 			claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1864 		}
1865 		if (flow->frxq[i].hrxq)
1866 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1867 		if (flow->frxq[i].ibv_attr)
1868 			rte_free(flow->frxq[i].ibv_attr);
1869 	}
1870 	if (flow->cs) {
1871 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1872 		flow->cs = NULL;
1873 		parser->cs = NULL;
1874 	}
1875 	rte_errno = ret; /* Restore rte_errno. */
1876 	return -rte_errno;
1877 }
1878 
1879 /**
1880  * Convert a flow.
1881  *
1882  * @param dev
1883  *   Pointer to Ethernet device.
1884  * @param list
1885  *   Pointer to a TAILQ flow list.
1886  * @param[in] attr
1887  *   Flow rule attributes.
1888  * @param[in] pattern
1889  *   Pattern specification (list terminated by the END pattern item).
1890  * @param[in] actions
1891  *   Associated actions (list terminated by the END action).
1892  * @param[out] error
1893  *   Perform verbose error reporting if not NULL.
1894  *
1895  * @return
1896  *   A flow on success, NULL otherwise and rte_errno is set.
1897  */
1898 static struct rte_flow *
1899 mlx5_flow_list_create(struct rte_eth_dev *dev,
1900 		      struct mlx5_flows *list,
1901 		      const struct rte_flow_attr *attr,
1902 		      const struct rte_flow_item items[],
1903 		      const struct rte_flow_action actions[],
1904 		      struct rte_flow_error *error)
1905 {
1906 	struct mlx5_flow_parse parser = { .create = 1, };
1907 	struct rte_flow *flow = NULL;
1908 	unsigned int i;
1909 	int ret;
1910 
1911 	ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1912 	if (ret)
1913 		goto exit;
1914 	flow = rte_calloc(__func__, 1,
1915 			  sizeof(*flow) +
1916 			  parser.rss_conf.queue_num * sizeof(uint16_t),
1917 			  0);
1918 	if (!flow) {
1919 		rte_flow_error_set(error, ENOMEM,
1920 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1921 				   NULL,
1922 				   "cannot allocate flow memory");
1923 		return NULL;
1924 	}
1925 	/* Copy configuration. */
1926 	flow->queues = (uint16_t (*)[])(flow + 1);
1927 	flow->rss_conf = (struct rte_flow_action_rss){
1928 		.types = parser.rss_conf.types,
1929 		.key_len = parser.rss_conf.key_len,
1930 		.queue_num = parser.rss_conf.queue_num,
1931 		.key = memcpy(flow->rss_key, parser.rss_conf.key,
1932 			      sizeof(*parser.rss_conf.key) *
1933 			      parser.rss_conf.key_len),
1934 		.queue = memcpy(flow->queues, parser.rss_conf.queue,
1935 				sizeof(*parser.rss_conf.queue) *
1936 				parser.rss_conf.queue_num),
1937 	};
1938 	flow->mark = parser.mark;
1939 	/* finalise the flow. */
1940 	if (parser.drop)
1941 		ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1942 							 error);
1943 	else
1944 		ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1945 	if (ret)
1946 		goto exit;
1947 	TAILQ_INSERT_TAIL(list, flow, next);
1948 	DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1949 		(void *)flow);
1950 	return flow;
1951 exit:
1952 	DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1953 		error->message);
1954 	for (i = 0; i != hash_rxq_init_n; ++i) {
1955 		if (parser.queue[i].ibv_attr)
1956 			rte_free(parser.queue[i].ibv_attr);
1957 	}
1958 	rte_free(flow);
1959 	return NULL;
1960 }
1961 
1962 /**
1963  * Validate a flow supported by the NIC.
1964  *
1965  * @see rte_flow_validate()
1966  * @see rte_flow_ops
1967  */
1968 int
1969 mlx5_flow_validate(struct rte_eth_dev *dev,
1970 		   const struct rte_flow_attr *attr,
1971 		   const struct rte_flow_item items[],
1972 		   const struct rte_flow_action actions[],
1973 		   struct rte_flow_error *error)
1974 {
1975 	struct mlx5_flow_parse parser = { .create = 0, };
1976 
1977 	return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1978 }
1979 
1980 /**
1981  * Create a flow.
1982  *
1983  * @see rte_flow_create()
1984  * @see rte_flow_ops
1985  */
1986 struct rte_flow *
1987 mlx5_flow_create(struct rte_eth_dev *dev,
1988 		 const struct rte_flow_attr *attr,
1989 		 const struct rte_flow_item items[],
1990 		 const struct rte_flow_action actions[],
1991 		 struct rte_flow_error *error)
1992 {
1993 	struct priv *priv = dev->data->dev_private;
1994 
1995 	return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
1996 				     error);
1997 }
1998 
1999 /**
2000  * Destroy a flow in a list.
2001  *
2002  * @param dev
2003  *   Pointer to Ethernet device.
2004  * @param list
2005  *   Pointer to a TAILQ flow list.
2006  * @param[in] flow
2007  *   Flow to destroy.
2008  */
2009 static void
2010 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2011 		       struct rte_flow *flow)
2012 {
2013 	struct priv *priv = dev->data->dev_private;
2014 	unsigned int i;
2015 
2016 	if (flow->drop || !flow->mark)
2017 		goto free;
2018 	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2019 		struct rte_flow *tmp;
2020 		int mark = 0;
2021 
2022 		/*
2023 		 * To remove the mark from the queue, the queue must not be
2024 		 * present in any other marked flow (RSS or not).
2025 		 */
2026 		TAILQ_FOREACH(tmp, list, next) {
2027 			unsigned int j;
2028 			uint16_t *tqs = NULL;
2029 			uint16_t tq_n = 0;
2030 
2031 			if (!tmp->mark)
2032 				continue;
2033 			for (j = 0; j != hash_rxq_init_n; ++j) {
2034 				if (!tmp->frxq[j].hrxq)
2035 					continue;
2036 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
2037 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2038 			}
2039 			if (!tq_n)
2040 				continue;
2041 			for (j = 0; (j != tq_n) && !mark; j++)
2042 				if (tqs[j] == (*flow->queues)[i])
2043 					mark = 1;
2044 		}
2045 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2046 	}
2047 free:
2048 	if (flow->drop) {
2049 		if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2050 			claim_zero(mlx5_glue->destroy_flow
2051 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2052 		rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2053 	} else {
2054 		for (i = 0; i != hash_rxq_init_n; ++i) {
2055 			struct mlx5_flow *frxq = &flow->frxq[i];
2056 
2057 			if (frxq->ibv_flow)
2058 				claim_zero(mlx5_glue->destroy_flow
2059 					   (frxq->ibv_flow));
2060 			if (frxq->hrxq)
2061 				mlx5_hrxq_release(dev, frxq->hrxq);
2062 			if (frxq->ibv_attr)
2063 				rte_free(frxq->ibv_attr);
2064 		}
2065 	}
2066 	if (flow->cs) {
2067 		claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2068 		flow->cs = NULL;
2069 	}
2070 	TAILQ_REMOVE(list, flow, next);
2071 	DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2072 		(void *)flow);
2073 	rte_free(flow);
2074 }
2075 
2076 /**
2077  * Destroy all flows.
2078  *
2079  * @param dev
2080  *   Pointer to Ethernet device.
2081  * @param list
2082  *   Pointer to a TAILQ flow list.
2083  */
2084 void
2085 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2086 {
2087 	while (!TAILQ_EMPTY(list)) {
2088 		struct rte_flow *flow;
2089 
2090 		flow = TAILQ_FIRST(list);
2091 		mlx5_flow_list_destroy(dev, list, flow);
2092 	}
2093 }
2094 
2095 /**
2096  * Create drop queue.
2097  *
2098  * @param dev
2099  *   Pointer to Ethernet device.
2100  *
2101  * @return
2102  *   0 on success, a negative errno value otherwise and rte_errno is set.
2103  */
2104 int
2105 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2106 {
2107 	struct priv *priv = dev->data->dev_private;
2108 	struct mlx5_hrxq_drop *fdq = NULL;
2109 
2110 	assert(priv->pd);
2111 	assert(priv->ctx);
2112 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2113 	if (!fdq) {
2114 		DRV_LOG(WARNING,
2115 			"port %u cannot allocate memory for drop queue",
2116 			dev->data->port_id);
2117 		rte_errno = ENOMEM;
2118 		return -rte_errno;
2119 	}
2120 	fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2121 	if (!fdq->cq) {
2122 		DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2123 			dev->data->port_id);
2124 		rte_errno = errno;
2125 		goto error;
2126 	}
2127 	fdq->wq = mlx5_glue->create_wq
2128 		(priv->ctx,
2129 		 &(struct ibv_wq_init_attr){
2130 			.wq_type = IBV_WQT_RQ,
2131 			.max_wr = 1,
2132 			.max_sge = 1,
2133 			.pd = priv->pd,
2134 			.cq = fdq->cq,
2135 		 });
2136 	if (!fdq->wq) {
2137 		DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2138 			dev->data->port_id);
2139 		rte_errno = errno;
2140 		goto error;
2141 	}
2142 	fdq->ind_table = mlx5_glue->create_rwq_ind_table
2143 		(priv->ctx,
2144 		 &(struct ibv_rwq_ind_table_init_attr){
2145 			.log_ind_tbl_size = 0,
2146 			.ind_tbl = &fdq->wq,
2147 			.comp_mask = 0,
2148 		 });
2149 	if (!fdq->ind_table) {
2150 		DRV_LOG(WARNING,
2151 			"port %u cannot allocate indirection table for drop"
2152 			" queue",
2153 			dev->data->port_id);
2154 		rte_errno = errno;
2155 		goto error;
2156 	}
2157 	fdq->qp = mlx5_glue->create_qp_ex
2158 		(priv->ctx,
2159 		 &(struct ibv_qp_init_attr_ex){
2160 			.qp_type = IBV_QPT_RAW_PACKET,
2161 			.comp_mask =
2162 				IBV_QP_INIT_ATTR_PD |
2163 				IBV_QP_INIT_ATTR_IND_TABLE |
2164 				IBV_QP_INIT_ATTR_RX_HASH,
2165 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2166 				.rx_hash_function =
2167 					IBV_RX_HASH_FUNC_TOEPLITZ,
2168 				.rx_hash_key_len = rss_hash_default_key_len,
2169 				.rx_hash_key = rss_hash_default_key,
2170 				.rx_hash_fields_mask = 0,
2171 				},
2172 			.rwq_ind_tbl = fdq->ind_table,
2173 			.pd = priv->pd
2174 		 });
2175 	if (!fdq->qp) {
2176 		DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2177 			dev->data->port_id);
2178 		rte_errno = errno;
2179 		goto error;
2180 	}
2181 	priv->flow_drop_queue = fdq;
2182 	return 0;
2183 error:
2184 	if (fdq->qp)
2185 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2186 	if (fdq->ind_table)
2187 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2188 	if (fdq->wq)
2189 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2190 	if (fdq->cq)
2191 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2192 	if (fdq)
2193 		rte_free(fdq);
2194 	priv->flow_drop_queue = NULL;
2195 	return -rte_errno;
2196 }
2197 
2198 /**
2199  * Delete drop queue.
2200  *
2201  * @param dev
2202  *   Pointer to Ethernet device.
2203  */
2204 void
2205 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2206 {
2207 	struct priv *priv = dev->data->dev_private;
2208 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2209 
2210 	if (!fdq)
2211 		return;
2212 	if (fdq->qp)
2213 		claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2214 	if (fdq->ind_table)
2215 		claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2216 	if (fdq->wq)
2217 		claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2218 	if (fdq->cq)
2219 		claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2220 	rte_free(fdq);
2221 	priv->flow_drop_queue = NULL;
2222 }
2223 
2224 /**
2225  * Remove all flows.
2226  *
2227  * @param dev
2228  *   Pointer to Ethernet device.
2229  * @param list
2230  *   Pointer to a TAILQ flow list.
2231  */
2232 void
2233 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2234 {
2235 	struct priv *priv = dev->data->dev_private;
2236 	struct rte_flow *flow;
2237 
2238 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2239 		unsigned int i;
2240 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
2241 
2242 		if (flow->drop) {
2243 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2244 				continue;
2245 			claim_zero(mlx5_glue->destroy_flow
2246 				   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2247 			flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2248 			DRV_LOG(DEBUG, "port %u flow %p removed",
2249 				dev->data->port_id, (void *)flow);
2250 			/* Next flow. */
2251 			continue;
2252 		}
2253 		/* Verify the flow has not already been cleaned. */
2254 		for (i = 0; i != hash_rxq_init_n; ++i) {
2255 			if (!flow->frxq[i].ibv_flow)
2256 				continue;
2257 			/*
2258 			 * Indirection table may be necessary to remove the
2259 			 * flags in the Rx queues.
2260 			 * This helps to speed-up the process by avoiding
2261 			 * another loop.
2262 			 */
2263 			ind_tbl = flow->frxq[i].hrxq->ind_table;
2264 			break;
2265 		}
2266 		if (i == hash_rxq_init_n)
2267 			return;
2268 		if (flow->mark) {
2269 			assert(ind_tbl);
2270 			for (i = 0; i != ind_tbl->queues_n; ++i)
2271 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2272 		}
2273 		for (i = 0; i != hash_rxq_init_n; ++i) {
2274 			if (!flow->frxq[i].ibv_flow)
2275 				continue;
2276 			claim_zero(mlx5_glue->destroy_flow
2277 				   (flow->frxq[i].ibv_flow));
2278 			flow->frxq[i].ibv_flow = NULL;
2279 			mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2280 			flow->frxq[i].hrxq = NULL;
2281 		}
2282 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2283 			(void *)flow);
2284 	}
2285 }
2286 
2287 /**
2288  * Add all flows.
2289  *
2290  * @param dev
2291  *   Pointer to Ethernet device.
2292  * @param list
2293  *   Pointer to a TAILQ flow list.
2294  *
2295  * @return
2296  *   0 on success, a negative errno value otherwise and rte_errno is set.
2297  */
2298 int
2299 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2300 {
2301 	struct priv *priv = dev->data->dev_private;
2302 	struct rte_flow *flow;
2303 
2304 	TAILQ_FOREACH(flow, list, next) {
2305 		unsigned int i;
2306 
2307 		if (flow->drop) {
2308 			flow->frxq[HASH_RXQ_ETH].ibv_flow =
2309 				mlx5_glue->create_flow
2310 				(priv->flow_drop_queue->qp,
2311 				 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2312 			if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2313 				DRV_LOG(DEBUG,
2314 					"port %u flow %p cannot be applied",
2315 					dev->data->port_id, (void *)flow);
2316 				rte_errno = EINVAL;
2317 				return -rte_errno;
2318 			}
2319 			DRV_LOG(DEBUG, "port %u flow %p applied",
2320 				dev->data->port_id, (void *)flow);
2321 			/* Next flow. */
2322 			continue;
2323 		}
2324 		for (i = 0; i != hash_rxq_init_n; ++i) {
2325 			if (!flow->frxq[i].ibv_attr)
2326 				continue;
2327 			flow->frxq[i].hrxq =
2328 				mlx5_hrxq_get(dev, flow->rss_conf.key,
2329 					      flow->rss_conf.key_len,
2330 					      hash_rxq_init[i].hash_fields,
2331 					      flow->rss_conf.queue,
2332 					      flow->rss_conf.queue_num);
2333 			if (flow->frxq[i].hrxq)
2334 				goto flow_create;
2335 			flow->frxq[i].hrxq =
2336 				mlx5_hrxq_new(dev, flow->rss_conf.key,
2337 					      flow->rss_conf.key_len,
2338 					      hash_rxq_init[i].hash_fields,
2339 					      flow->rss_conf.queue,
2340 					      flow->rss_conf.queue_num);
2341 			if (!flow->frxq[i].hrxq) {
2342 				DRV_LOG(DEBUG,
2343 					"port %u flow %p cannot be applied",
2344 					dev->data->port_id, (void *)flow);
2345 				rte_errno = EINVAL;
2346 				return -rte_errno;
2347 			}
2348 flow_create:
2349 			flow->frxq[i].ibv_flow =
2350 				mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2351 						       flow->frxq[i].ibv_attr);
2352 			if (!flow->frxq[i].ibv_flow) {
2353 				DRV_LOG(DEBUG,
2354 					"port %u flow %p cannot be applied",
2355 					dev->data->port_id, (void *)flow);
2356 				rte_errno = EINVAL;
2357 				return -rte_errno;
2358 			}
2359 			DRV_LOG(DEBUG, "port %u flow %p applied",
2360 				dev->data->port_id, (void *)flow);
2361 		}
2362 		if (!flow->mark)
2363 			continue;
2364 		for (i = 0; i != flow->rss_conf.queue_num; ++i)
2365 			(*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2366 	}
2367 	return 0;
2368 }
2369 
2370 /**
2371  * Verify the flow list is empty
2372  *
2373  * @param dev
2374  *  Pointer to Ethernet device.
2375  *
2376  * @return the number of flows not released.
2377  */
2378 int
2379 mlx5_flow_verify(struct rte_eth_dev *dev)
2380 {
2381 	struct priv *priv = dev->data->dev_private;
2382 	struct rte_flow *flow;
2383 	int ret = 0;
2384 
2385 	TAILQ_FOREACH(flow, &priv->flows, next) {
2386 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
2387 			dev->data->port_id, (void *)flow);
2388 		++ret;
2389 	}
2390 	return ret;
2391 }
2392 
2393 /**
2394  * Enable a control flow configured from the control plane.
2395  *
2396  * @param dev
2397  *   Pointer to Ethernet device.
2398  * @param eth_spec
2399  *   An Ethernet flow spec to apply.
2400  * @param eth_mask
2401  *   An Ethernet flow mask to apply.
2402  * @param vlan_spec
2403  *   A VLAN flow spec to apply.
2404  * @param vlan_mask
2405  *   A VLAN flow mask to apply.
2406  *
2407  * @return
2408  *   0 on success, a negative errno value otherwise and rte_errno is set.
2409  */
2410 int
2411 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2412 		    struct rte_flow_item_eth *eth_spec,
2413 		    struct rte_flow_item_eth *eth_mask,
2414 		    struct rte_flow_item_vlan *vlan_spec,
2415 		    struct rte_flow_item_vlan *vlan_mask)
2416 {
2417 	struct priv *priv = dev->data->dev_private;
2418 	const struct rte_flow_attr attr = {
2419 		.ingress = 1,
2420 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2421 	};
2422 	struct rte_flow_item items[] = {
2423 		{
2424 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2425 			.spec = eth_spec,
2426 			.last = NULL,
2427 			.mask = eth_mask,
2428 		},
2429 		{
2430 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2431 				RTE_FLOW_ITEM_TYPE_END,
2432 			.spec = vlan_spec,
2433 			.last = NULL,
2434 			.mask = vlan_mask,
2435 		},
2436 		{
2437 			.type = RTE_FLOW_ITEM_TYPE_END,
2438 		},
2439 	};
2440 	uint16_t queue[priv->reta_idx_n];
2441 	struct rte_flow_action_rss action_rss = {
2442 		.types = priv->rss_conf.rss_hf,
2443 		.key_len = priv->rss_conf.rss_key_len,
2444 		.queue_num = priv->reta_idx_n,
2445 		.key = priv->rss_conf.rss_key,
2446 		.queue = queue,
2447 	};
2448 	struct rte_flow_action actions[] = {
2449 		{
2450 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2451 			.conf = &action_rss,
2452 		},
2453 		{
2454 			.type = RTE_FLOW_ACTION_TYPE_END,
2455 		},
2456 	};
2457 	struct rte_flow *flow;
2458 	struct rte_flow_error error;
2459 	unsigned int i;
2460 
2461 	if (!priv->reta_idx_n) {
2462 		rte_errno = EINVAL;
2463 		return -rte_errno;
2464 	}
2465 	for (i = 0; i != priv->reta_idx_n; ++i)
2466 		queue[i] = (*priv->reta_idx)[i];
2467 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2468 				     actions, &error);
2469 	if (!flow)
2470 		return -rte_errno;
2471 	return 0;
2472 }
2473 
2474 /**
2475  * Enable a flow control configured from the control plane.
2476  *
2477  * @param dev
2478  *   Pointer to Ethernet device.
2479  * @param eth_spec
2480  *   An Ethernet flow spec to apply.
2481  * @param eth_mask
2482  *   An Ethernet flow mask to apply.
2483  *
2484  * @return
2485  *   0 on success, a negative errno value otherwise and rte_errno is set.
2486  */
2487 int
2488 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2489 	       struct rte_flow_item_eth *eth_spec,
2490 	       struct rte_flow_item_eth *eth_mask)
2491 {
2492 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2493 }
2494 
2495 /**
2496  * Destroy a flow.
2497  *
2498  * @see rte_flow_destroy()
2499  * @see rte_flow_ops
2500  */
2501 int
2502 mlx5_flow_destroy(struct rte_eth_dev *dev,
2503 		  struct rte_flow *flow,
2504 		  struct rte_flow_error *error __rte_unused)
2505 {
2506 	struct priv *priv = dev->data->dev_private;
2507 
2508 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
2509 	return 0;
2510 }
2511 
2512 /**
2513  * Destroy all flows.
2514  *
2515  * @see rte_flow_flush()
2516  * @see rte_flow_ops
2517  */
2518 int
2519 mlx5_flow_flush(struct rte_eth_dev *dev,
2520 		struct rte_flow_error *error __rte_unused)
2521 {
2522 	struct priv *priv = dev->data->dev_private;
2523 
2524 	mlx5_flow_list_flush(dev, &priv->flows);
2525 	return 0;
2526 }
2527 
2528 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2529 /**
2530  * Query flow counter.
2531  *
2532  * @param cs
2533  *   the counter set.
2534  * @param counter_value
2535  *   returned data from the counter.
2536  *
2537  * @return
2538  *   0 on success, a negative errno value otherwise and rte_errno is set.
2539  */
2540 static int
2541 mlx5_flow_query_count(struct ibv_counter_set *cs,
2542 		      struct mlx5_flow_counter_stats *counter_stats,
2543 		      struct rte_flow_query_count *query_count,
2544 		      struct rte_flow_error *error)
2545 {
2546 	uint64_t counters[2];
2547 	struct ibv_query_counter_set_attr query_cs_attr = {
2548 		.cs = cs,
2549 		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2550 	};
2551 	struct ibv_counter_set_data query_out = {
2552 		.out = counters,
2553 		.outlen = 2 * sizeof(uint64_t),
2554 	};
2555 	int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2556 
2557 	if (err)
2558 		return rte_flow_error_set(error, err,
2559 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2560 					  NULL,
2561 					  "cannot read counter");
2562 	query_count->hits_set = 1;
2563 	query_count->bytes_set = 1;
2564 	query_count->hits = counters[0] - counter_stats->hits;
2565 	query_count->bytes = counters[1] - counter_stats->bytes;
2566 	if (query_count->reset) {
2567 		counter_stats->hits = counters[0];
2568 		counter_stats->bytes = counters[1];
2569 	}
2570 	return 0;
2571 }
2572 
2573 /**
2574  * Query a flows.
2575  *
2576  * @see rte_flow_query()
2577  * @see rte_flow_ops
2578  */
2579 int
2580 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2581 		struct rte_flow *flow,
2582 		enum rte_flow_action_type action __rte_unused,
2583 		void *data,
2584 		struct rte_flow_error *error)
2585 {
2586 	if (flow->cs) {
2587 		int ret;
2588 
2589 		ret = mlx5_flow_query_count(flow->cs,
2590 					    &flow->counter_stats,
2591 					    (struct rte_flow_query_count *)data,
2592 					    error);
2593 		if (ret)
2594 			return ret;
2595 	} else {
2596 		return rte_flow_error_set(error, EINVAL,
2597 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2598 					  NULL,
2599 					  "no counter found for flow");
2600 	}
2601 	return 0;
2602 }
2603 #endif
2604 
2605 /**
2606  * Isolated mode.
2607  *
2608  * @see rte_flow_isolate()
2609  * @see rte_flow_ops
2610  */
2611 int
2612 mlx5_flow_isolate(struct rte_eth_dev *dev,
2613 		  int enable,
2614 		  struct rte_flow_error *error)
2615 {
2616 	struct priv *priv = dev->data->dev_private;
2617 
2618 	if (dev->data->dev_started) {
2619 		rte_flow_error_set(error, EBUSY,
2620 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2621 				   NULL,
2622 				   "port must be stopped first");
2623 		return -rte_errno;
2624 	}
2625 	priv->isolated = !!enable;
2626 	if (enable)
2627 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2628 	else
2629 		priv->dev->dev_ops = &mlx5_dev_ops;
2630 	return 0;
2631 }
2632 
2633 /**
2634  * Convert a flow director filter to a generic flow.
2635  *
2636  * @param dev
2637  *   Pointer to Ethernet device.
2638  * @param fdir_filter
2639  *   Flow director filter to add.
2640  * @param attributes
2641  *   Generic flow parameters structure.
2642  *
2643  * @return
2644  *   0 on success, a negative errno value otherwise and rte_errno is set.
2645  */
2646 static int
2647 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2648 			 const struct rte_eth_fdir_filter *fdir_filter,
2649 			 struct mlx5_fdir *attributes)
2650 {
2651 	struct priv *priv = dev->data->dev_private;
2652 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2653 	const struct rte_eth_fdir_masks *mask =
2654 		&dev->data->dev_conf.fdir_conf.mask;
2655 
2656 	/* Validate queue number. */
2657 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2658 		DRV_LOG(ERR, "port %u invalid queue number %d",
2659 			dev->data->port_id, fdir_filter->action.rx_queue);
2660 		rte_errno = EINVAL;
2661 		return -rte_errno;
2662 	}
2663 	attributes->attr.ingress = 1;
2664 	attributes->items[0] = (struct rte_flow_item) {
2665 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2666 		.spec = &attributes->l2,
2667 		.mask = &attributes->l2_mask,
2668 	};
2669 	switch (fdir_filter->action.behavior) {
2670 	case RTE_ETH_FDIR_ACCEPT:
2671 		attributes->actions[0] = (struct rte_flow_action){
2672 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2673 			.conf = &attributes->queue,
2674 		};
2675 		break;
2676 	case RTE_ETH_FDIR_REJECT:
2677 		attributes->actions[0] = (struct rte_flow_action){
2678 			.type = RTE_FLOW_ACTION_TYPE_DROP,
2679 		};
2680 		break;
2681 	default:
2682 		DRV_LOG(ERR, "port %u invalid behavior %d",
2683 			dev->data->port_id,
2684 			fdir_filter->action.behavior);
2685 		rte_errno = ENOTSUP;
2686 		return -rte_errno;
2687 	}
2688 	attributes->queue.index = fdir_filter->action.rx_queue;
2689 	/* Handle L3. */
2690 	switch (fdir_filter->input.flow_type) {
2691 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2692 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2693 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2694 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2695 			.src_addr = input->flow.ip4_flow.src_ip,
2696 			.dst_addr = input->flow.ip4_flow.dst_ip,
2697 			.time_to_live = input->flow.ip4_flow.ttl,
2698 			.type_of_service = input->flow.ip4_flow.tos,
2699 			.next_proto_id = input->flow.ip4_flow.proto,
2700 		};
2701 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2702 			.src_addr = mask->ipv4_mask.src_ip,
2703 			.dst_addr = mask->ipv4_mask.dst_ip,
2704 			.time_to_live = mask->ipv4_mask.ttl,
2705 			.type_of_service = mask->ipv4_mask.tos,
2706 			.next_proto_id = mask->ipv4_mask.proto,
2707 		};
2708 		attributes->items[1] = (struct rte_flow_item){
2709 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2710 			.spec = &attributes->l3,
2711 			.mask = &attributes->l3_mask,
2712 		};
2713 		break;
2714 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2715 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2716 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2717 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2718 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2719 			.proto = input->flow.ipv6_flow.proto,
2720 		};
2721 
2722 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2723 		       input->flow.ipv6_flow.src_ip,
2724 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2725 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2726 		       input->flow.ipv6_flow.dst_ip,
2727 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2728 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2729 		       mask->ipv6_mask.src_ip,
2730 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2731 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2732 		       mask->ipv6_mask.dst_ip,
2733 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2734 		attributes->items[1] = (struct rte_flow_item){
2735 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2736 			.spec = &attributes->l3,
2737 			.mask = &attributes->l3_mask,
2738 		};
2739 		break;
2740 	default:
2741 		DRV_LOG(ERR, "port %u invalid flow type%d",
2742 			dev->data->port_id, fdir_filter->input.flow_type);
2743 		rte_errno = ENOTSUP;
2744 		return -rte_errno;
2745 	}
2746 	/* Handle L4. */
2747 	switch (fdir_filter->input.flow_type) {
2748 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2749 		attributes->l4.udp.hdr = (struct udp_hdr){
2750 			.src_port = input->flow.udp4_flow.src_port,
2751 			.dst_port = input->flow.udp4_flow.dst_port,
2752 		};
2753 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2754 			.src_port = mask->src_port_mask,
2755 			.dst_port = mask->dst_port_mask,
2756 		};
2757 		attributes->items[2] = (struct rte_flow_item){
2758 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2759 			.spec = &attributes->l4,
2760 			.mask = &attributes->l4_mask,
2761 		};
2762 		break;
2763 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2764 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2765 			.src_port = input->flow.tcp4_flow.src_port,
2766 			.dst_port = input->flow.tcp4_flow.dst_port,
2767 		};
2768 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2769 			.src_port = mask->src_port_mask,
2770 			.dst_port = mask->dst_port_mask,
2771 		};
2772 		attributes->items[2] = (struct rte_flow_item){
2773 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2774 			.spec = &attributes->l4,
2775 			.mask = &attributes->l4_mask,
2776 		};
2777 		break;
2778 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2779 		attributes->l4.udp.hdr = (struct udp_hdr){
2780 			.src_port = input->flow.udp6_flow.src_port,
2781 			.dst_port = input->flow.udp6_flow.dst_port,
2782 		};
2783 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2784 			.src_port = mask->src_port_mask,
2785 			.dst_port = mask->dst_port_mask,
2786 		};
2787 		attributes->items[2] = (struct rte_flow_item){
2788 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2789 			.spec = &attributes->l4,
2790 			.mask = &attributes->l4_mask,
2791 		};
2792 		break;
2793 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2794 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2795 			.src_port = input->flow.tcp6_flow.src_port,
2796 			.dst_port = input->flow.tcp6_flow.dst_port,
2797 		};
2798 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2799 			.src_port = mask->src_port_mask,
2800 			.dst_port = mask->dst_port_mask,
2801 		};
2802 		attributes->items[2] = (struct rte_flow_item){
2803 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2804 			.spec = &attributes->l4,
2805 			.mask = &attributes->l4_mask,
2806 		};
2807 		break;
2808 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2809 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2810 		break;
2811 	default:
2812 		DRV_LOG(ERR, "port %u invalid flow type%d",
2813 			dev->data->port_id, fdir_filter->input.flow_type);
2814 		rte_errno = ENOTSUP;
2815 		return -rte_errno;
2816 	}
2817 	return 0;
2818 }
2819 
2820 /**
2821  * Add new flow director filter and store it in list.
2822  *
2823  * @param dev
2824  *   Pointer to Ethernet device.
2825  * @param fdir_filter
2826  *   Flow director filter to add.
2827  *
2828  * @return
2829  *   0 on success, a negative errno value otherwise and rte_errno is set.
2830  */
2831 static int
2832 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2833 		     const struct rte_eth_fdir_filter *fdir_filter)
2834 {
2835 	struct priv *priv = dev->data->dev_private;
2836 	struct mlx5_fdir attributes = {
2837 		.attr.group = 0,
2838 		.l2_mask = {
2839 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2840 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2841 			.type = 0,
2842 		},
2843 	};
2844 	struct mlx5_flow_parse parser = {
2845 		.layer = HASH_RXQ_ETH,
2846 	};
2847 	struct rte_flow_error error;
2848 	struct rte_flow *flow;
2849 	int ret;
2850 
2851 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2852 	if (ret)
2853 		return ret;
2854 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2855 				attributes.actions, &error, &parser);
2856 	if (ret)
2857 		return ret;
2858 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2859 				     attributes.items, attributes.actions,
2860 				     &error);
2861 	if (flow) {
2862 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2863 			(void *)flow);
2864 		return 0;
2865 	}
2866 	return -rte_errno;
2867 }
2868 
2869 /**
2870  * Delete specific filter.
2871  *
2872  * @param dev
2873  *   Pointer to Ethernet device.
2874  * @param fdir_filter
2875  *   Filter to be deleted.
2876  *
2877  * @return
2878  *   0 on success, a negative errno value otherwise and rte_errno is set.
2879  */
2880 static int
2881 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2882 			const struct rte_eth_fdir_filter *fdir_filter)
2883 {
2884 	struct priv *priv = dev->data->dev_private;
2885 	struct mlx5_fdir attributes = {
2886 		.attr.group = 0,
2887 	};
2888 	struct mlx5_flow_parse parser = {
2889 		.create = 1,
2890 		.layer = HASH_RXQ_ETH,
2891 	};
2892 	struct rte_flow_error error;
2893 	struct rte_flow *flow;
2894 	unsigned int i;
2895 	int ret;
2896 
2897 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2898 	if (ret)
2899 		return ret;
2900 	ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2901 				attributes.actions, &error, &parser);
2902 	if (ret)
2903 		goto exit;
2904 	/*
2905 	 * Special case for drop action which is only set in the
2906 	 * specifications when the flow is created.  In this situation the
2907 	 * drop specification is missing.
2908 	 */
2909 	if (parser.drop) {
2910 		struct ibv_flow_spec_action_drop *drop;
2911 
2912 		drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2913 				parser.queue[HASH_RXQ_ETH].offset);
2914 		*drop = (struct ibv_flow_spec_action_drop){
2915 			.type = IBV_FLOW_SPEC_ACTION_DROP,
2916 			.size = sizeof(struct ibv_flow_spec_action_drop),
2917 		};
2918 		parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2919 	}
2920 	TAILQ_FOREACH(flow, &priv->flows, next) {
2921 		struct ibv_flow_attr *attr;
2922 		struct ibv_spec_header *attr_h;
2923 		void *spec;
2924 		struct ibv_flow_attr *flow_attr;
2925 		struct ibv_spec_header *flow_h;
2926 		void *flow_spec;
2927 		unsigned int specs_n;
2928 
2929 		attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2930 		flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2931 		/* Compare first the attributes. */
2932 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2933 			continue;
2934 		if (attr->num_of_specs == 0)
2935 			continue;
2936 		spec = (void *)((uintptr_t)attr +
2937 				sizeof(struct ibv_flow_attr));
2938 		flow_spec = (void *)((uintptr_t)flow_attr +
2939 				     sizeof(struct ibv_flow_attr));
2940 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2941 		for (i = 0; i != specs_n; ++i) {
2942 			attr_h = spec;
2943 			flow_h = flow_spec;
2944 			if (memcmp(spec, flow_spec,
2945 				   RTE_MIN(attr_h->size, flow_h->size)))
2946 				goto wrong_flow;
2947 			spec = (void *)((uintptr_t)spec + attr_h->size);
2948 			flow_spec = (void *)((uintptr_t)flow_spec +
2949 					     flow_h->size);
2950 		}
2951 		/* At this point, the flow match. */
2952 		break;
2953 wrong_flow:
2954 		/* The flow does not match. */
2955 		continue;
2956 	}
2957 	ret = rte_errno; /* Save rte_errno before cleanup. */
2958 	if (flow)
2959 		mlx5_flow_list_destroy(dev, &priv->flows, flow);
2960 exit:
2961 	for (i = 0; i != hash_rxq_init_n; ++i) {
2962 		if (parser.queue[i].ibv_attr)
2963 			rte_free(parser.queue[i].ibv_attr);
2964 	}
2965 	rte_errno = ret; /* Restore rte_errno. */
2966 	return -rte_errno;
2967 }
2968 
2969 /**
2970  * Update queue for specific filter.
2971  *
2972  * @param dev
2973  *   Pointer to Ethernet device.
2974  * @param fdir_filter
2975  *   Filter to be updated.
2976  *
2977  * @return
2978  *   0 on success, a negative errno value otherwise and rte_errno is set.
2979  */
2980 static int
2981 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2982 			const struct rte_eth_fdir_filter *fdir_filter)
2983 {
2984 	int ret;
2985 
2986 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2987 	if (ret)
2988 		return ret;
2989 	return mlx5_fdir_filter_add(dev, fdir_filter);
2990 }
2991 
2992 /**
2993  * Flush all filters.
2994  *
2995  * @param dev
2996  *   Pointer to Ethernet device.
2997  */
2998 static void
2999 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3000 {
3001 	struct priv *priv = dev->data->dev_private;
3002 
3003 	mlx5_flow_list_flush(dev, &priv->flows);
3004 }
3005 
3006 /**
3007  * Get flow director information.
3008  *
3009  * @param dev
3010  *   Pointer to Ethernet device.
3011  * @param[out] fdir_info
3012  *   Resulting flow director information.
3013  */
3014 static void
3015 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3016 {
3017 	struct priv *priv = dev->data->dev_private;
3018 	struct rte_eth_fdir_masks *mask =
3019 		&priv->dev->data->dev_conf.fdir_conf.mask;
3020 
3021 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3022 	fdir_info->guarant_spc = 0;
3023 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3024 	fdir_info->max_flexpayload = 0;
3025 	fdir_info->flow_types_mask[0] = 0;
3026 	fdir_info->flex_payload_unit = 0;
3027 	fdir_info->max_flex_payload_segment_num = 0;
3028 	fdir_info->flex_payload_limit = 0;
3029 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3030 }
3031 
3032 /**
3033  * Deal with flow director operations.
3034  *
3035  * @param dev
3036  *   Pointer to Ethernet device.
3037  * @param filter_op
3038  *   Operation to perform.
3039  * @param arg
3040  *   Pointer to operation-specific structure.
3041  *
3042  * @return
3043  *   0 on success, a negative errno value otherwise and rte_errno is set.
3044  */
3045 static int
3046 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3047 		    void *arg)
3048 {
3049 	struct priv *priv = dev->data->dev_private;
3050 	enum rte_fdir_mode fdir_mode =
3051 		priv->dev->data->dev_conf.fdir_conf.mode;
3052 
3053 	if (filter_op == RTE_ETH_FILTER_NOP)
3054 		return 0;
3055 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3056 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3057 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
3058 			dev->data->port_id, fdir_mode);
3059 		rte_errno = EINVAL;
3060 		return -rte_errno;
3061 	}
3062 	switch (filter_op) {
3063 	case RTE_ETH_FILTER_ADD:
3064 		return mlx5_fdir_filter_add(dev, arg);
3065 	case RTE_ETH_FILTER_UPDATE:
3066 		return mlx5_fdir_filter_update(dev, arg);
3067 	case RTE_ETH_FILTER_DELETE:
3068 		return mlx5_fdir_filter_delete(dev, arg);
3069 	case RTE_ETH_FILTER_FLUSH:
3070 		mlx5_fdir_filter_flush(dev);
3071 		break;
3072 	case RTE_ETH_FILTER_INFO:
3073 		mlx5_fdir_info_get(dev, arg);
3074 		break;
3075 	default:
3076 		DRV_LOG(DEBUG, "port %u unknown operation %u",
3077 			dev->data->port_id, filter_op);
3078 		rte_errno = EINVAL;
3079 		return -rte_errno;
3080 	}
3081 	return 0;
3082 }
3083 
3084 /**
3085  * Manage filter operations.
3086  *
3087  * @param dev
3088  *   Pointer to Ethernet device structure.
3089  * @param filter_type
3090  *   Filter type.
3091  * @param filter_op
3092  *   Operation to perform.
3093  * @param arg
3094  *   Pointer to operation-specific structure.
3095  *
3096  * @return
3097  *   0 on success, a negative errno value otherwise and rte_errno is set.
3098  */
3099 int
3100 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3101 		     enum rte_filter_type filter_type,
3102 		     enum rte_filter_op filter_op,
3103 		     void *arg)
3104 {
3105 	switch (filter_type) {
3106 	case RTE_ETH_FILTER_GENERIC:
3107 		if (filter_op != RTE_ETH_FILTER_GET) {
3108 			rte_errno = EINVAL;
3109 			return -rte_errno;
3110 		}
3111 		*(const void **)arg = &mlx5_flow_ops;
3112 		return 0;
3113 	case RTE_ETH_FILTER_FDIR:
3114 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3115 	default:
3116 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
3117 			dev->data->port_id, filter_type);
3118 		rte_errno = ENOTSUP;
3119 		return -rte_errno;
3120 	}
3121 	return 0;
3122 }
3123