xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 5747b170b3e75044d89a93434b6e17d1514a9d9d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37 
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45 /* Masks. */
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49 	(MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
50 
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
54 
55 /** Handles information leading to a drop fate. */
56 struct mlx5_flow_verbs {
57 	unsigned int size; /**< Size of the attribute. */
58 	struct {
59 		struct ibv_flow_attr *attr;
60 		/**< Pointer to the Specification buffer. */
61 		uint8_t *specs; /**< Pointer to the specifications. */
62 	};
63 	struct ibv_flow *flow; /**< Verbs flow pointer. */
64 	struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
65 };
66 
67 /* Flow structure. */
68 struct rte_flow {
69 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
70 	struct rte_flow_attr attributes; /**< User flow attribute. */
71 	uint32_t layers;
72 	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
73 	uint32_t fate;
74 	/**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
75 	struct mlx5_flow_verbs verbs; /* Verbs flow. */
76 	uint16_t queue; /**< Destination queue to redirect traffic to. */
77 };
78 
79 static const struct rte_flow_ops mlx5_flow_ops = {
80 	.validate = mlx5_flow_validate,
81 	.create = mlx5_flow_create,
82 	.destroy = mlx5_flow_destroy,
83 	.flush = mlx5_flow_flush,
84 	.isolate = mlx5_flow_isolate,
85 };
86 
87 /* Convert FDIR request to Generic flow. */
88 struct mlx5_fdir {
89 	struct rte_flow_attr attr;
90 	struct rte_flow_action actions[2];
91 	struct rte_flow_item items[4];
92 	struct rte_flow_item_eth l2;
93 	struct rte_flow_item_eth l2_mask;
94 	union {
95 		struct rte_flow_item_ipv4 ipv4;
96 		struct rte_flow_item_ipv6 ipv6;
97 	} l3;
98 	union {
99 		struct rte_flow_item_ipv4 ipv4;
100 		struct rte_flow_item_ipv6 ipv6;
101 	} l3_mask;
102 	union {
103 		struct rte_flow_item_udp udp;
104 		struct rte_flow_item_tcp tcp;
105 	} l4;
106 	union {
107 		struct rte_flow_item_udp udp;
108 		struct rte_flow_item_tcp tcp;
109 	} l4_mask;
110 	struct rte_flow_action_queue queue;
111 };
112 
113 /* Verbs specification header. */
114 struct ibv_spec_header {
115 	enum ibv_flow_spec_type type;
116 	uint16_t size;
117 };
118 
119  /**
120   * Discover the maximum number of priority available.
121   *
122   * @param[in] dev
123   *   Pointer to Ethernet device.
124   *
125   * @return
126   *   number of supported flow priority on success, a negative errno value
127   *   otherwise and rte_errno is set.
128   */
129 int
130 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
131 {
132 	struct {
133 		struct ibv_flow_attr attr;
134 		struct ibv_flow_spec_eth eth;
135 		struct ibv_flow_spec_action_drop drop;
136 	} flow_attr = {
137 		.attr = {
138 			.num_of_specs = 2,
139 		},
140 		.eth = {
141 			.type = IBV_FLOW_SPEC_ETH,
142 			.size = sizeof(struct ibv_flow_spec_eth),
143 		},
144 		.drop = {
145 			.size = sizeof(struct ibv_flow_spec_action_drop),
146 			.type = IBV_FLOW_SPEC_ACTION_DROP,
147 		},
148 	};
149 	struct ibv_flow *flow;
150 	struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
151 	uint16_t vprio[] = { 8, 16 };
152 	int i;
153 
154 	if (!drop) {
155 		rte_errno = ENOTSUP;
156 		return -rte_errno;
157 	}
158 	for (i = 0; i != RTE_DIM(vprio); i++) {
159 		flow_attr.attr.priority = vprio[i] - 1;
160 		flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
161 		if (!flow)
162 			break;
163 		claim_zero(mlx5_glue->destroy_flow(flow));
164 	}
165 	mlx5_hrxq_drop_release(dev);
166 	DRV_LOG(INFO, "port %u flow maximum priority: %d",
167 		dev->data->port_id, vprio[i - 1]);
168 	return vprio[i - 1];
169 }
170 
171 /**
172  * Verify the @p attributes will be correctly understood by the NIC and store
173  * them in the @p flow if everything is correct.
174  *
175  * @param[in] dev
176  *   Pointer to Ethernet device.
177  * @param[in] attributes
178  *   Pointer to flow attributes
179  * @param[in, out] flow
180  *   Pointer to the rte_flow structure.
181  * @param[out] error
182  *   Pointer to error structure.
183  *
184  * @return
185  *   0 on success, a negative errno value otherwise and rte_errno is set.
186  */
187 static int
188 mlx5_flow_attributes(struct rte_eth_dev *dev,
189 		     const struct rte_flow_attr *attributes,
190 		     struct rte_flow *flow,
191 		     struct rte_flow_error *error)
192 {
193 	uint32_t priority_max =
194 		((struct priv *)dev->data->dev_private)->config.flow_prio;
195 
196 	if (attributes->group)
197 		return rte_flow_error_set(error, ENOTSUP,
198 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
199 					  NULL,
200 					  "groups is not supported");
201 	if (attributes->priority >= priority_max)
202 		return rte_flow_error_set(error, ENOTSUP,
203 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
204 					  NULL,
205 					  "priority out of range");
206 	if (attributes->egress)
207 		return rte_flow_error_set(error, ENOTSUP,
208 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
209 					  NULL,
210 					  "egress is not supported");
211 	if (attributes->transfer)
212 		return rte_flow_error_set(error, ENOTSUP,
213 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
214 					  NULL,
215 					  "transfer is not supported");
216 	if (!attributes->ingress)
217 		return rte_flow_error_set(error, ENOTSUP,
218 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
219 					  NULL,
220 					  "ingress attribute is mandatory");
221 	flow->attributes = *attributes;
222 	return 0;
223 }
224 
225 /**
226  * Verify the @p item specifications (spec, last, mask) are compatible with the
227  * NIC capabilities.
228  *
229  * @param[in] item
230  *   Item specification.
231  * @param[in] mask
232  *   @p item->mask or flow default bit-masks.
233  * @param[in] nic_mask
234  *   Bit-masks covering supported fields by the NIC to compare with user mask.
235  * @param[in] size
236  *   Bit-masks size in bytes.
237  * @param[out] error
238  *   Pointer to error structure.
239  *
240  * @return
241  *   0 on success, a negative errno value otherwise and rte_errno is set.
242  */
243 static int
244 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
245 			  const uint8_t *mask,
246 			  const uint8_t *nic_mask,
247 			  unsigned int size,
248 			  struct rte_flow_error *error)
249 {
250 	unsigned int i;
251 
252 	assert(nic_mask);
253 	for (i = 0; i < size; ++i)
254 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
255 			return rte_flow_error_set(error, ENOTSUP,
256 						  RTE_FLOW_ERROR_TYPE_ITEM,
257 						  item,
258 						  "mask enables non supported"
259 						  " bits");
260 	if (!item->spec && (item->mask || item->last))
261 		return rte_flow_error_set(error, EINVAL,
262 					  RTE_FLOW_ERROR_TYPE_ITEM,
263 					  item,
264 					  "mask/last without a spec is not"
265 					  " supported");
266 	if (item->spec && item->last) {
267 		uint8_t spec[size];
268 		uint8_t last[size];
269 		unsigned int i;
270 		int ret;
271 
272 		for (i = 0; i < size; ++i) {
273 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
274 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
275 		}
276 		ret = memcmp(spec, last, size);
277 		if (ret != 0)
278 			return rte_flow_error_set(error, ENOTSUP,
279 						  RTE_FLOW_ERROR_TYPE_ITEM,
280 						  item,
281 						  "range is not supported");
282 	}
283 	return 0;
284 }
285 
286 /**
287  * Add a verbs specification into @p flow.
288  *
289  * @param[in, out] flow
290  *   Pointer to flow structure.
291  * @param[in] src
292  *   Create specification.
293  * @param[in] size
294  *   Size in bytes of the specification to copy.
295  */
296 static void
297 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
298 {
299 	if (flow->verbs.specs) {
300 		void *dst;
301 
302 		dst = (void *)(flow->verbs.specs + flow->verbs.size);
303 		memcpy(dst, src, size);
304 		++flow->verbs.attr->num_of_specs;
305 	}
306 	flow->verbs.size += size;
307 }
308 
309 /**
310  * Convert the @p item into a Verbs specification after ensuring the NIC
311  * will understand and process it correctly.
312  * If the necessary size for the conversion is greater than the @p flow_size,
313  * nothing is written in @p flow, the validation is still performed.
314  *
315  * @param[in] item
316  *   Item specification.
317  * @param[in, out] flow
318  *   Pointer to flow structure.
319  * @param[in] flow_size
320  *   Size in bytes of the available space in @p flow, if too small, nothing is
321  *   written.
322  * @param[out] error
323  *   Pointer to error structure.
324  *
325  * @return
326  *   On success the number of bytes consumed/necessary, if the returned value
327  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
328  *   otherwise another call with this returned memory size should be done.
329  *   On error, a negative errno value is returned and rte_errno is set.
330  */
331 static int
332 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
333 		   const size_t flow_size, struct rte_flow_error *error)
334 {
335 	const struct rte_flow_item_eth *spec = item->spec;
336 	const struct rte_flow_item_eth *mask = item->mask;
337 	const struct rte_flow_item_eth nic_mask = {
338 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
339 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
340 		.type = RTE_BE16(0xffff),
341 	};
342 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
343 	struct ibv_flow_spec_eth eth = {
344 		.type = IBV_FLOW_SPEC_ETH,
345 		.size = size,
346 	};
347 	int ret;
348 
349 	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
350 		return rte_flow_error_set(error, ENOTSUP,
351 					  RTE_FLOW_ERROR_TYPE_ITEM,
352 					  item,
353 					  "L2 layers already configured");
354 	if (!mask)
355 		mask = &rte_flow_item_eth_mask;
356 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
357 					(const uint8_t *)&nic_mask,
358 					sizeof(struct rte_flow_item_eth),
359 					error);
360 	if (ret)
361 		return ret;
362 	flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
363 	if (size > flow_size)
364 		return size;
365 	if (spec) {
366 		unsigned int i;
367 
368 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
369 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
370 		eth.val.ether_type = spec->type;
371 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
372 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
373 		eth.mask.ether_type = mask->type;
374 		/* Remove unwanted bits from values. */
375 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
376 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
377 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
378 		}
379 		eth.val.ether_type &= eth.mask.ether_type;
380 	}
381 	mlx5_flow_spec_verbs_add(flow, &eth, size);
382 	return size;
383 }
384 
385 /**
386  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
387  * will understand and process it correctly.
388  * The conversion is performed item per item, each of them is written into
389  * the @p flow if its size is lesser or equal to @p flow_size.
390  * Validation and memory consumption computation are still performed until the
391  * end of @p pattern, unless an error is encountered.
392  *
393  * @param[in] pattern
394  *   Flow pattern.
395  * @param[in, out] flow
396  *   Pointer to the rte_flow structure.
397  * @param[in] flow_size
398  *   Size in bytes of the available space in @p flow, if too small some
399  *   garbage may be present.
400  * @param[out] error
401  *   Pointer to error structure.
402  *
403  * @return
404  *   On success the number of bytes consumed/necessary, if the returned value
405  *   is lesser or equal to @p flow_size, the @pattern  has fully been
406  *   converted, otherwise another call with this returned memory size should
407  *   be done.
408  *   On error, a negative errno value is returned and rte_errno is set.
409  */
410 static int
411 mlx5_flow_items(const struct rte_flow_item pattern[],
412 		struct rte_flow *flow, const size_t flow_size,
413 		struct rte_flow_error *error)
414 {
415 	int remain = flow_size;
416 	size_t size = 0;
417 
418 	for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
419 		int ret = 0;
420 
421 		switch (pattern->type) {
422 		case RTE_FLOW_ITEM_TYPE_VOID:
423 			break;
424 		case RTE_FLOW_ITEM_TYPE_ETH:
425 			ret = mlx5_flow_item_eth(pattern, flow, remain, error);
426 			break;
427 		default:
428 			return rte_flow_error_set(error, ENOTSUP,
429 						  RTE_FLOW_ERROR_TYPE_ITEM,
430 						  pattern,
431 						  "item not supported");
432 		}
433 		if (ret < 0)
434 			return ret;
435 		if (remain > ret)
436 			remain -= ret;
437 		else
438 			remain = 0;
439 		size += ret;
440 	}
441 	if (!flow->layers) {
442 		const struct rte_flow_item item = {
443 			.type = RTE_FLOW_ITEM_TYPE_ETH,
444 		};
445 
446 		return mlx5_flow_item_eth(&item, flow, flow_size, error);
447 	}
448 	return size;
449 }
450 
451 /**
452  * Convert the @p action into a Verbs specification after ensuring the NIC
453  * will understand and process it correctly.
454  * If the necessary size for the conversion is greater than the @p flow_size,
455  * nothing is written in @p flow, the validation is still performed.
456  *
457  * @param[in] action
458  *   Action configuration.
459  * @param[in, out] flow
460  *   Pointer to flow structure.
461  * @param[in] flow_size
462  *   Size in bytes of the available space in @p flow, if too small, nothing is
463  *   written.
464  * @param[out] error
465  *   Pointer to error structure.
466  *
467  * @return
468  *   On success the number of bytes consumed/necessary, if the returned value
469  *   is lesser or equal to @p flow_size, the @p action has fully been
470  *   converted, otherwise another call with this returned memory size should
471  *   be done.
472  *   On error, a negative errno value is returned and rte_errno is set.
473  */
474 static int
475 mlx5_flow_action_drop(const struct rte_flow_action *action,
476 		      struct rte_flow *flow, const size_t flow_size,
477 		      struct rte_flow_error *error)
478 {
479 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
480 	struct ibv_flow_spec_action_drop drop = {
481 			.type = IBV_FLOW_SPEC_ACTION_DROP,
482 			.size = size,
483 	};
484 
485 	if (flow->fate)
486 		return rte_flow_error_set(error, ENOTSUP,
487 					  RTE_FLOW_ERROR_TYPE_ACTION,
488 					  action,
489 					  "multiple fate actions are not"
490 					  " supported");
491 	if (size < flow_size)
492 		mlx5_flow_spec_verbs_add(flow, &drop, size);
493 	flow->fate |= MLX5_FLOW_FATE_DROP;
494 	return size;
495 }
496 
497 /**
498  * Convert the @p action into @p flow after ensuring the NIC will understand
499  * and process it correctly.
500  *
501  * @param[in] dev
502  *   Pointer to Ethernet device structure.
503  * @param[in] action
504  *   Action configuration.
505  * @param[in, out] flow
506  *   Pointer to flow structure.
507  * @param[out] error
508  *   Pointer to error structure.
509  *
510  * @return
511  *   0 on success, a negative errno value otherwise and rte_errno is set.
512  */
513 static int
514 mlx5_flow_action_queue(struct rte_eth_dev *dev,
515 		       const struct rte_flow_action *action,
516 		       struct rte_flow *flow,
517 		       struct rte_flow_error *error)
518 {
519 	struct priv *priv = dev->data->dev_private;
520 	const struct rte_flow_action_queue *queue = action->conf;
521 
522 	if (flow->fate)
523 		return rte_flow_error_set(error, ENOTSUP,
524 					  RTE_FLOW_ERROR_TYPE_ACTION,
525 					  action,
526 					  "multiple fate actions are not"
527 					  " supported");
528 	if (queue->index >= priv->rxqs_n)
529 		return rte_flow_error_set(error, EINVAL,
530 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
531 					  &queue->index,
532 					  "queue index out of range");
533 	if (!(*priv->rxqs)[queue->index])
534 		return rte_flow_error_set(error, EINVAL,
535 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
536 					  &queue->index,
537 					  "queue is not configured");
538 	flow->queue = queue->index;
539 	flow->fate |= MLX5_FLOW_FATE_QUEUE;
540 	return 0;
541 }
542 
543 /**
544  * Convert the @p action into @p flow after ensuring the NIC will understand
545  * and process it correctly.
546  * The conversion is performed action per action, each of them is written into
547  * the @p flow if its size is lesser or equal to @p flow_size.
548  * Validation and memory consumption computation are still performed until the
549  * end of @p action, unless an error is encountered.
550  *
551  * @param[in] dev
552  *   Pointer to Ethernet device structure.
553  * @param[in] actions
554  *   Pointer to flow actions array.
555  * @param[in, out] flow
556  *   Pointer to the rte_flow structure.
557  * @param[in] flow_size
558  *   Size in bytes of the available space in @p flow, if too small some
559  *   garbage may be present.
560  * @param[out] error
561  *   Pointer to error structure.
562  *
563  * @return
564  *   On success the number of bytes consumed/necessary, if the returned value
565  *   is lesser or equal to @p flow_size, the @p actions has fully been
566  *   converted, otherwise another call with this returned memory size should
567  *   be done.
568  *   On error, a negative errno value is returned and rte_errno is set.
569  */
570 static int
571 mlx5_flow_actions(struct rte_eth_dev *dev,
572 		  const struct rte_flow_action actions[],
573 		  struct rte_flow *flow, const size_t flow_size,
574 		  struct rte_flow_error *error)
575 {
576 	size_t size = 0;
577 	int remain = flow_size;
578 	int ret = 0;
579 
580 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
581 		switch (actions->type) {
582 		case RTE_FLOW_ACTION_TYPE_VOID:
583 			break;
584 		case RTE_FLOW_ACTION_TYPE_DROP:
585 			ret = mlx5_flow_action_drop(actions, flow, remain,
586 						    error);
587 			break;
588 		case RTE_FLOW_ACTION_TYPE_QUEUE:
589 			ret = mlx5_flow_action_queue(dev, actions, flow, error);
590 			break;
591 		default:
592 			return rte_flow_error_set(error, ENOTSUP,
593 						  RTE_FLOW_ERROR_TYPE_ACTION,
594 						  actions,
595 						  "action not supported");
596 		}
597 		if (ret < 0)
598 			return ret;
599 		if (remain > ret)
600 			remain -= ret;
601 		else
602 			remain = 0;
603 		size += ret;
604 	}
605 	if (!flow->fate)
606 		return rte_flow_error_set(error, ENOTSUP,
607 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
608 					  NULL,
609 					  "no fate action found");
610 	return size;
611 }
612 
613 /**
614  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
615  * after ensuring the NIC will understand and process it correctly.
616  * The conversion is only performed item/action per item/action, each of
617  * them is written into the @p flow if its size is lesser or equal to @p
618  * flow_size.
619  * Validation and memory consumption computation are still performed until the
620  * end, unless an error is encountered.
621  *
622  * @param[in] dev
623  *   Pointer to Ethernet device.
624  * @param[in, out] flow
625  *   Pointer to flow structure.
626  * @param[in] flow_size
627  *   Size in bytes of the available space in @p flow, if too small some
628  *   garbage may be present.
629  * @param[in] attributes
630  *   Flow rule attributes.
631  * @param[in] pattern
632  *   Pattern specification (list terminated by the END pattern item).
633  * @param[in] actions
634  *   Associated actions (list terminated by the END action).
635  * @param[out] error
636  *   Perform verbose error reporting if not NULL.
637  *
638  * @return
639  *   On success the number of bytes consumed/necessary, if the returned value
640  *   is lesser or equal to @p flow_size, the flow has fully been converted and
641  *   can be applied, otherwise another call with this returned memory size
642  *   should be done.
643  *   On error, a negative errno value is returned and rte_errno is set.
644  */
645 static int
646 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
647 		const size_t flow_size,
648 		const struct rte_flow_attr *attributes,
649 		const struct rte_flow_item pattern[],
650 		const struct rte_flow_action actions[],
651 		struct rte_flow_error *error)
652 {
653 	struct rte_flow local_flow = { .layers = 0, };
654 	size_t size = sizeof(*flow) + sizeof(struct ibv_flow_attr);
655 	int remain = (flow_size > size) ? flow_size - size : 0;
656 	int ret;
657 
658 	if (!remain)
659 		flow = &local_flow;
660 	ret = mlx5_flow_attributes(dev, attributes, flow, error);
661 	if (ret < 0)
662 		return ret;
663 	ret = mlx5_flow_items(pattern, flow, remain, error);
664 	if (ret < 0)
665 		return ret;
666 	size += ret;
667 	remain = (flow_size > size) ? flow_size - size : 0;
668 	ret = mlx5_flow_actions(dev, actions, flow, remain, error);
669 	if (ret < 0)
670 		return ret;
671 	size += ret;
672 	if (size <= flow_size)
673 		flow->verbs.attr->priority = flow->attributes.priority;
674 	return size;
675 }
676 
677 /**
678  * Validate a flow supported by the NIC.
679  *
680  * @see rte_flow_validate()
681  * @see rte_flow_ops
682  */
683 int
684 mlx5_flow_validate(struct rte_eth_dev *dev,
685 		   const struct rte_flow_attr *attr,
686 		   const struct rte_flow_item items[],
687 		   const struct rte_flow_action actions[],
688 		   struct rte_flow_error *error)
689 {
690 	int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
691 
692 	if (ret < 0)
693 		return ret;
694 	return 0;
695 }
696 
697 /**
698  * Remove the flow.
699  *
700  * @param[in] dev
701  *   Pointer to Ethernet device.
702  * @param[in, out] flow
703  *   Pointer to flow structure.
704  */
705 static void
706 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
707 {
708 	if (flow->fate & MLX5_FLOW_FATE_DROP) {
709 		if (flow->verbs.flow) {
710 			claim_zero(mlx5_glue->destroy_flow(flow->verbs.flow));
711 			flow->verbs.flow = NULL;
712 		}
713 	}
714 	if (flow->verbs.hrxq) {
715 		if (flow->fate & MLX5_FLOW_FATE_DROP)
716 			mlx5_hrxq_drop_release(dev);
717 		else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
718 			mlx5_hrxq_release(dev, flow->verbs.hrxq);
719 		flow->verbs.hrxq = NULL;
720 	}
721 }
722 
723 /**
724  * Apply the flow.
725  *
726  * @param[in] dev
727  *   Pointer to Ethernet device structure.
728  * @param[in, out] flow
729  *   Pointer to flow structure.
730  * @param[out] error
731  *   Pointer to error structure.
732  *
733  * @return
734  *   0 on success, a negative errno value otherwise and rte_errno is set.
735  */
736 static int
737 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
738 		struct rte_flow_error *error)
739 {
740 	if (flow->fate & MLX5_FLOW_FATE_DROP) {
741 		flow->verbs.hrxq = mlx5_hrxq_drop_new(dev);
742 		if (!flow->verbs.hrxq)
743 			return rte_flow_error_set
744 				(error, errno,
745 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
746 				 NULL,
747 				 "cannot allocate Drop queue");
748 	} else if (flow->fate & MLX5_FLOW_FATE_QUEUE) {
749 		struct mlx5_hrxq *hrxq;
750 
751 		hrxq = mlx5_hrxq_get(dev, rss_hash_default_key,
752 				     rss_hash_default_key_len, 0,
753 				     &flow->queue, 1, 0, 0);
754 		if (!hrxq)
755 			hrxq = mlx5_hrxq_new(dev, rss_hash_default_key,
756 					     rss_hash_default_key_len, 0,
757 					     &flow->queue, 1, 0, 0);
758 		if (!hrxq)
759 			return rte_flow_error_set(error, rte_errno,
760 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
761 					NULL,
762 					"cannot create flow");
763 		flow->verbs.hrxq = hrxq;
764 	}
765 	flow->verbs.flow =
766 		mlx5_glue->create_flow(flow->verbs.hrxq->qp, flow->verbs.attr);
767 	if (!flow->verbs.flow) {
768 		if (flow->fate & MLX5_FLOW_FATE_DROP)
769 			mlx5_hrxq_drop_release(dev);
770 		else
771 			mlx5_hrxq_release(dev, flow->verbs.hrxq);
772 		flow->verbs.hrxq = NULL;
773 		return rte_flow_error_set(error, errno,
774 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
775 					  NULL,
776 					  "kernel module refuses to create"
777 					  " flow");
778 	}
779 	return 0;
780 }
781 
782 /**
783  * Create a flow and add it to @p list.
784  *
785  * @param dev
786  *   Pointer to Ethernet device.
787  * @param list
788  *   Pointer to a TAILQ flow list.
789  * @param[in] attr
790  *   Flow rule attributes.
791  * @param[in] items
792  *   Pattern specification (list terminated by the END pattern item).
793  * @param[in] actions
794  *   Associated actions (list terminated by the END action).
795  * @param[out] error
796  *   Perform verbose error reporting if not NULL.
797  *
798  * @return
799  *   A flow on success, NULL otherwise and rte_errno is set.
800  */
801 static struct rte_flow *
802 mlx5_flow_list_create(struct rte_eth_dev *dev,
803 		      struct mlx5_flows *list,
804 		      const struct rte_flow_attr *attr,
805 		      const struct rte_flow_item items[],
806 		      const struct rte_flow_action actions[],
807 		      struct rte_flow_error *error)
808 {
809 	struct rte_flow *flow;
810 	size_t size;
811 	int ret;
812 
813 	ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
814 	if (ret < 0)
815 		return NULL;
816 	size = ret;
817 	flow = rte_zmalloc(__func__, size, 0);
818 	if (!flow) {
819 		rte_flow_error_set(error, ENOMEM,
820 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
821 				   NULL,
822 				   "cannot allocate memory");
823 		return NULL;
824 	}
825 	flow->verbs.attr = (struct ibv_flow_attr *)(flow + 1);
826 	flow->verbs.specs = (uint8_t *)(flow->verbs.attr + 1);
827 	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
828 	if (ret < 0)
829 		goto error;
830 	assert((size_t)ret == size);
831 	if (dev->data->dev_started) {
832 		ret = mlx5_flow_apply(dev, flow, error);
833 		if (ret < 0)
834 			goto error;
835 	}
836 	TAILQ_INSERT_TAIL(list, flow, next);
837 	return flow;
838 error:
839 	ret = rte_errno; /* Save rte_errno before cleanup. */
840 	mlx5_flow_remove(dev, flow);
841 	rte_free(flow);
842 	rte_errno = ret; /* Restore rte_errno. */
843 	return NULL;
844 }
845 
846 /**
847  * Create a flow.
848  *
849  * @see rte_flow_create()
850  * @see rte_flow_ops
851  */
852 struct rte_flow *
853 mlx5_flow_create(struct rte_eth_dev *dev,
854 		 const struct rte_flow_attr *attr,
855 		 const struct rte_flow_item items[],
856 		 const struct rte_flow_action actions[],
857 		 struct rte_flow_error *error)
858 {
859 	return mlx5_flow_list_create
860 		(dev, &((struct priv *)dev->data->dev_private)->flows,
861 		 attr, items, actions, error);
862 }
863 
864 /**
865  * Destroy a flow in a list.
866  *
867  * @param dev
868  *   Pointer to Ethernet device.
869  * @param list
870  *   Pointer to a TAILQ flow list.
871  * @param[in] flow
872  *   Flow to destroy.
873  */
874 static void
875 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
876 		       struct rte_flow *flow)
877 {
878 	mlx5_flow_remove(dev, flow);
879 	TAILQ_REMOVE(list, flow, next);
880 	rte_free(flow);
881 }
882 
883 /**
884  * Destroy all flows.
885  *
886  * @param dev
887  *   Pointer to Ethernet device.
888  * @param list
889  *   Pointer to a TAILQ flow list.
890  */
891 void
892 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
893 {
894 	while (!TAILQ_EMPTY(list)) {
895 		struct rte_flow *flow;
896 
897 		flow = TAILQ_FIRST(list);
898 		mlx5_flow_list_destroy(dev, list, flow);
899 	}
900 }
901 
902 /**
903  * Remove all flows.
904  *
905  * @param dev
906  *   Pointer to Ethernet device.
907  * @param list
908  *   Pointer to a TAILQ flow list.
909  */
910 void
911 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
912 {
913 	struct rte_flow *flow;
914 
915 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
916 		mlx5_flow_remove(dev, flow);
917 }
918 
919 /**
920  * Add all flows.
921  *
922  * @param dev
923  *   Pointer to Ethernet device.
924  * @param list
925  *   Pointer to a TAILQ flow list.
926  *
927  * @return
928  *   0 on success, a negative errno value otherwise and rte_errno is set.
929  */
930 int
931 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
932 {
933 	struct rte_flow *flow;
934 	struct rte_flow_error error;
935 	int ret = 0;
936 
937 	TAILQ_FOREACH(flow, list, next) {
938 		ret = mlx5_flow_apply(dev, flow, &error);
939 		if (ret < 0)
940 			goto error;
941 	}
942 	return 0;
943 error:
944 	ret = rte_errno; /* Save rte_errno before cleanup. */
945 	mlx5_flow_stop(dev, list);
946 	rte_errno = ret; /* Restore rte_errno. */
947 	return -rte_errno;
948 }
949 
950 /**
951  * Verify the flow list is empty
952  *
953  * @param dev
954  *  Pointer to Ethernet device.
955  *
956  * @return the number of flows not released.
957  */
958 int
959 mlx5_flow_verify(struct rte_eth_dev *dev)
960 {
961 	struct priv *priv = dev->data->dev_private;
962 	struct rte_flow *flow;
963 	int ret = 0;
964 
965 	TAILQ_FOREACH(flow, &priv->flows, next) {
966 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
967 			dev->data->port_id, (void *)flow);
968 		++ret;
969 	}
970 	return ret;
971 }
972 
973 /**
974  * Enable a control flow configured from the control plane.
975  *
976  * @param dev
977  *   Pointer to Ethernet device.
978  * @param eth_spec
979  *   An Ethernet flow spec to apply.
980  * @param eth_mask
981  *   An Ethernet flow mask to apply.
982  * @param vlan_spec
983  *   A VLAN flow spec to apply.
984  * @param vlan_mask
985  *   A VLAN flow mask to apply.
986  *
987  * @return
988  *   0 on success, a negative errno value otherwise and rte_errno is set.
989  */
990 int
991 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
992 		    struct rte_flow_item_eth *eth_spec,
993 		    struct rte_flow_item_eth *eth_mask,
994 		    struct rte_flow_item_vlan *vlan_spec,
995 		    struct rte_flow_item_vlan *vlan_mask)
996 {
997 	struct priv *priv = dev->data->dev_private;
998 	const struct rte_flow_attr attr = {
999 		.ingress = 1,
1000 		.priority = priv->config.flow_prio - 1,
1001 	};
1002 	struct rte_flow_item items[] = {
1003 		{
1004 			.type = RTE_FLOW_ITEM_TYPE_ETH,
1005 			.spec = eth_spec,
1006 			.last = NULL,
1007 			.mask = eth_mask,
1008 		},
1009 		{
1010 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
1011 				RTE_FLOW_ITEM_TYPE_END,
1012 			.spec = vlan_spec,
1013 			.last = NULL,
1014 			.mask = vlan_mask,
1015 		},
1016 		{
1017 			.type = RTE_FLOW_ITEM_TYPE_END,
1018 		},
1019 	};
1020 	uint16_t queue[priv->reta_idx_n];
1021 	struct rte_flow_action_rss action_rss = {
1022 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1023 		.level = 0,
1024 		.types = priv->rss_conf.rss_hf,
1025 		.key_len = priv->rss_conf.rss_key_len,
1026 		.queue_num = priv->reta_idx_n,
1027 		.key = priv->rss_conf.rss_key,
1028 		.queue = queue,
1029 	};
1030 	struct rte_flow_action actions[] = {
1031 		{
1032 			.type = RTE_FLOW_ACTION_TYPE_RSS,
1033 			.conf = &action_rss,
1034 		},
1035 		{
1036 			.type = RTE_FLOW_ACTION_TYPE_END,
1037 		},
1038 	};
1039 	struct rte_flow *flow;
1040 	struct rte_flow_error error;
1041 	unsigned int i;
1042 
1043 	if (!priv->reta_idx_n) {
1044 		rte_errno = EINVAL;
1045 		return -rte_errno;
1046 	}
1047 	for (i = 0; i != priv->reta_idx_n; ++i)
1048 		queue[i] = (*priv->reta_idx)[i];
1049 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
1050 				     actions, &error);
1051 	if (!flow)
1052 		return -rte_errno;
1053 	return 0;
1054 }
1055 
1056 /**
1057  * Enable a flow control configured from the control plane.
1058  *
1059  * @param dev
1060  *   Pointer to Ethernet device.
1061  * @param eth_spec
1062  *   An Ethernet flow spec to apply.
1063  * @param eth_mask
1064  *   An Ethernet flow mask to apply.
1065  *
1066  * @return
1067  *   0 on success, a negative errno value otherwise and rte_errno is set.
1068  */
1069 int
1070 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1071 	       struct rte_flow_item_eth *eth_spec,
1072 	       struct rte_flow_item_eth *eth_mask)
1073 {
1074 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
1075 }
1076 
1077 /**
1078  * Destroy a flow.
1079  *
1080  * @see rte_flow_destroy()
1081  * @see rte_flow_ops
1082  */
1083 int
1084 mlx5_flow_destroy(struct rte_eth_dev *dev,
1085 		  struct rte_flow *flow,
1086 		  struct rte_flow_error *error __rte_unused)
1087 {
1088 	struct priv *priv = dev->data->dev_private;
1089 
1090 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
1091 	return 0;
1092 }
1093 
1094 /**
1095  * Destroy all flows.
1096  *
1097  * @see rte_flow_flush()
1098  * @see rte_flow_ops
1099  */
1100 int
1101 mlx5_flow_flush(struct rte_eth_dev *dev,
1102 		struct rte_flow_error *error __rte_unused)
1103 {
1104 	struct priv *priv = dev->data->dev_private;
1105 
1106 	mlx5_flow_list_flush(dev, &priv->flows);
1107 	return 0;
1108 }
1109 
1110 /**
1111  * Isolated mode.
1112  *
1113  * @see rte_flow_isolate()
1114  * @see rte_flow_ops
1115  */
1116 int
1117 mlx5_flow_isolate(struct rte_eth_dev *dev,
1118 		  int enable,
1119 		  struct rte_flow_error *error)
1120 {
1121 	struct priv *priv = dev->data->dev_private;
1122 
1123 	if (dev->data->dev_started) {
1124 		rte_flow_error_set(error, EBUSY,
1125 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1126 				   NULL,
1127 				   "port must be stopped first");
1128 		return -rte_errno;
1129 	}
1130 	priv->isolated = !!enable;
1131 	if (enable)
1132 		dev->dev_ops = &mlx5_dev_ops_isolate;
1133 	else
1134 		dev->dev_ops = &mlx5_dev_ops;
1135 	return 0;
1136 }
1137 
1138 /**
1139  * Convert a flow director filter to a generic flow.
1140  *
1141  * @param dev
1142  *   Pointer to Ethernet device.
1143  * @param fdir_filter
1144  *   Flow director filter to add.
1145  * @param attributes
1146  *   Generic flow parameters structure.
1147  *
1148  * @return
1149  *   0 on success, a negative errno value otherwise and rte_errno is set.
1150  */
1151 static int
1152 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
1153 			 const struct rte_eth_fdir_filter *fdir_filter,
1154 			 struct mlx5_fdir *attributes)
1155 {
1156 	struct priv *priv = dev->data->dev_private;
1157 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
1158 	const struct rte_eth_fdir_masks *mask =
1159 		&dev->data->dev_conf.fdir_conf.mask;
1160 
1161 	/* Validate queue number. */
1162 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
1163 		DRV_LOG(ERR, "port %u invalid queue number %d",
1164 			dev->data->port_id, fdir_filter->action.rx_queue);
1165 		rte_errno = EINVAL;
1166 		return -rte_errno;
1167 	}
1168 	attributes->attr.ingress = 1;
1169 	attributes->items[0] = (struct rte_flow_item) {
1170 		.type = RTE_FLOW_ITEM_TYPE_ETH,
1171 		.spec = &attributes->l2,
1172 		.mask = &attributes->l2_mask,
1173 	};
1174 	switch (fdir_filter->action.behavior) {
1175 	case RTE_ETH_FDIR_ACCEPT:
1176 		attributes->actions[0] = (struct rte_flow_action){
1177 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
1178 			.conf = &attributes->queue,
1179 		};
1180 		break;
1181 	case RTE_ETH_FDIR_REJECT:
1182 		attributes->actions[0] = (struct rte_flow_action){
1183 			.type = RTE_FLOW_ACTION_TYPE_DROP,
1184 		};
1185 		break;
1186 	default:
1187 		DRV_LOG(ERR, "port %u invalid behavior %d",
1188 			dev->data->port_id,
1189 			fdir_filter->action.behavior);
1190 		rte_errno = ENOTSUP;
1191 		return -rte_errno;
1192 	}
1193 	attributes->queue.index = fdir_filter->action.rx_queue;
1194 	/* Handle L3. */
1195 	switch (fdir_filter->input.flow_type) {
1196 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1197 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1198 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1199 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
1200 			.src_addr = input->flow.ip4_flow.src_ip,
1201 			.dst_addr = input->flow.ip4_flow.dst_ip,
1202 			.time_to_live = input->flow.ip4_flow.ttl,
1203 			.type_of_service = input->flow.ip4_flow.tos,
1204 			.next_proto_id = input->flow.ip4_flow.proto,
1205 		};
1206 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
1207 			.src_addr = mask->ipv4_mask.src_ip,
1208 			.dst_addr = mask->ipv4_mask.dst_ip,
1209 			.time_to_live = mask->ipv4_mask.ttl,
1210 			.type_of_service = mask->ipv4_mask.tos,
1211 			.next_proto_id = mask->ipv4_mask.proto,
1212 		};
1213 		attributes->items[1] = (struct rte_flow_item){
1214 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
1215 			.spec = &attributes->l3,
1216 			.mask = &attributes->l3_mask,
1217 		};
1218 		break;
1219 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1220 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1221 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1222 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
1223 			.hop_limits = input->flow.ipv6_flow.hop_limits,
1224 			.proto = input->flow.ipv6_flow.proto,
1225 		};
1226 
1227 		memcpy(attributes->l3.ipv6.hdr.src_addr,
1228 		       input->flow.ipv6_flow.src_ip,
1229 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1230 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
1231 		       input->flow.ipv6_flow.dst_ip,
1232 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1233 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
1234 		       mask->ipv6_mask.src_ip,
1235 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1236 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
1237 		       mask->ipv6_mask.dst_ip,
1238 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1239 		attributes->items[1] = (struct rte_flow_item){
1240 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
1241 			.spec = &attributes->l3,
1242 			.mask = &attributes->l3_mask,
1243 		};
1244 		break;
1245 	default:
1246 		DRV_LOG(ERR, "port %u invalid flow type%d",
1247 			dev->data->port_id, fdir_filter->input.flow_type);
1248 		rte_errno = ENOTSUP;
1249 		return -rte_errno;
1250 	}
1251 	/* Handle L4. */
1252 	switch (fdir_filter->input.flow_type) {
1253 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1254 		attributes->l4.udp.hdr = (struct udp_hdr){
1255 			.src_port = input->flow.udp4_flow.src_port,
1256 			.dst_port = input->flow.udp4_flow.dst_port,
1257 		};
1258 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
1259 			.src_port = mask->src_port_mask,
1260 			.dst_port = mask->dst_port_mask,
1261 		};
1262 		attributes->items[2] = (struct rte_flow_item){
1263 			.type = RTE_FLOW_ITEM_TYPE_UDP,
1264 			.spec = &attributes->l4,
1265 			.mask = &attributes->l4_mask,
1266 		};
1267 		break;
1268 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1269 		attributes->l4.tcp.hdr = (struct tcp_hdr){
1270 			.src_port = input->flow.tcp4_flow.src_port,
1271 			.dst_port = input->flow.tcp4_flow.dst_port,
1272 		};
1273 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
1274 			.src_port = mask->src_port_mask,
1275 			.dst_port = mask->dst_port_mask,
1276 		};
1277 		attributes->items[2] = (struct rte_flow_item){
1278 			.type = RTE_FLOW_ITEM_TYPE_TCP,
1279 			.spec = &attributes->l4,
1280 			.mask = &attributes->l4_mask,
1281 		};
1282 		break;
1283 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1284 		attributes->l4.udp.hdr = (struct udp_hdr){
1285 			.src_port = input->flow.udp6_flow.src_port,
1286 			.dst_port = input->flow.udp6_flow.dst_port,
1287 		};
1288 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
1289 			.src_port = mask->src_port_mask,
1290 			.dst_port = mask->dst_port_mask,
1291 		};
1292 		attributes->items[2] = (struct rte_flow_item){
1293 			.type = RTE_FLOW_ITEM_TYPE_UDP,
1294 			.spec = &attributes->l4,
1295 			.mask = &attributes->l4_mask,
1296 		};
1297 		break;
1298 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1299 		attributes->l4.tcp.hdr = (struct tcp_hdr){
1300 			.src_port = input->flow.tcp6_flow.src_port,
1301 			.dst_port = input->flow.tcp6_flow.dst_port,
1302 		};
1303 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
1304 			.src_port = mask->src_port_mask,
1305 			.dst_port = mask->dst_port_mask,
1306 		};
1307 		attributes->items[2] = (struct rte_flow_item){
1308 			.type = RTE_FLOW_ITEM_TYPE_TCP,
1309 			.spec = &attributes->l4,
1310 			.mask = &attributes->l4_mask,
1311 		};
1312 		break;
1313 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1314 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1315 		break;
1316 	default:
1317 		DRV_LOG(ERR, "port %u invalid flow type%d",
1318 			dev->data->port_id, fdir_filter->input.flow_type);
1319 		rte_errno = ENOTSUP;
1320 		return -rte_errno;
1321 	}
1322 	return 0;
1323 }
1324 
1325 /**
1326  * Add new flow director filter and store it in list.
1327  *
1328  * @param dev
1329  *   Pointer to Ethernet device.
1330  * @param fdir_filter
1331  *   Flow director filter to add.
1332  *
1333  * @return
1334  *   0 on success, a negative errno value otherwise and rte_errno is set.
1335  */
1336 static int
1337 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
1338 		     const struct rte_eth_fdir_filter *fdir_filter)
1339 {
1340 	struct priv *priv = dev->data->dev_private;
1341 	struct mlx5_fdir attributes = {
1342 		.attr.group = 0,
1343 		.l2_mask = {
1344 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1345 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1346 			.type = 0,
1347 		},
1348 	};
1349 	struct rte_flow_error error;
1350 	struct rte_flow *flow;
1351 	int ret;
1352 
1353 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
1354 	if (ret)
1355 		return ret;
1356 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
1357 				     attributes.items, attributes.actions,
1358 				     &error);
1359 	if (flow) {
1360 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
1361 			(void *)flow);
1362 		return 0;
1363 	}
1364 	return -rte_errno;
1365 }
1366 
1367 /**
1368  * Delete specific filter.
1369  *
1370  * @param dev
1371  *   Pointer to Ethernet device.
1372  * @param fdir_filter
1373  *   Filter to be deleted.
1374  *
1375  * @return
1376  *   0 on success, a negative errno value otherwise and rte_errno is set.
1377  */
1378 static int
1379 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
1380 			const struct rte_eth_fdir_filter *fdir_filter
1381 			__rte_unused)
1382 {
1383 	rte_errno = ENOTSUP;
1384 	return -rte_errno;
1385 }
1386 
1387 /**
1388  * Update queue for specific filter.
1389  *
1390  * @param dev
1391  *   Pointer to Ethernet device.
1392  * @param fdir_filter
1393  *   Filter to be updated.
1394  *
1395  * @return
1396  *   0 on success, a negative errno value otherwise and rte_errno is set.
1397  */
1398 static int
1399 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
1400 			const struct rte_eth_fdir_filter *fdir_filter)
1401 {
1402 	int ret;
1403 
1404 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
1405 	if (ret)
1406 		return ret;
1407 	return mlx5_fdir_filter_add(dev, fdir_filter);
1408 }
1409 
1410 /**
1411  * Flush all filters.
1412  *
1413  * @param dev
1414  *   Pointer to Ethernet device.
1415  */
1416 static void
1417 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
1418 {
1419 	struct priv *priv = dev->data->dev_private;
1420 
1421 	mlx5_flow_list_flush(dev, &priv->flows);
1422 }
1423 
1424 /**
1425  * Get flow director information.
1426  *
1427  * @param dev
1428  *   Pointer to Ethernet device.
1429  * @param[out] fdir_info
1430  *   Resulting flow director information.
1431  */
1432 static void
1433 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
1434 {
1435 	struct rte_eth_fdir_masks *mask =
1436 		&dev->data->dev_conf.fdir_conf.mask;
1437 
1438 	fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
1439 	fdir_info->guarant_spc = 0;
1440 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
1441 	fdir_info->max_flexpayload = 0;
1442 	fdir_info->flow_types_mask[0] = 0;
1443 	fdir_info->flex_payload_unit = 0;
1444 	fdir_info->max_flex_payload_segment_num = 0;
1445 	fdir_info->flex_payload_limit = 0;
1446 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
1447 }
1448 
1449 /**
1450  * Deal with flow director operations.
1451  *
1452  * @param dev
1453  *   Pointer to Ethernet device.
1454  * @param filter_op
1455  *   Operation to perform.
1456  * @param arg
1457  *   Pointer to operation-specific structure.
1458  *
1459  * @return
1460  *   0 on success, a negative errno value otherwise and rte_errno is set.
1461  */
1462 static int
1463 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
1464 		    void *arg)
1465 {
1466 	enum rte_fdir_mode fdir_mode =
1467 		dev->data->dev_conf.fdir_conf.mode;
1468 
1469 	if (filter_op == RTE_ETH_FILTER_NOP)
1470 		return 0;
1471 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
1472 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
1473 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
1474 			dev->data->port_id, fdir_mode);
1475 		rte_errno = EINVAL;
1476 		return -rte_errno;
1477 	}
1478 	switch (filter_op) {
1479 	case RTE_ETH_FILTER_ADD:
1480 		return mlx5_fdir_filter_add(dev, arg);
1481 	case RTE_ETH_FILTER_UPDATE:
1482 		return mlx5_fdir_filter_update(dev, arg);
1483 	case RTE_ETH_FILTER_DELETE:
1484 		return mlx5_fdir_filter_delete(dev, arg);
1485 	case RTE_ETH_FILTER_FLUSH:
1486 		mlx5_fdir_filter_flush(dev);
1487 		break;
1488 	case RTE_ETH_FILTER_INFO:
1489 		mlx5_fdir_info_get(dev, arg);
1490 		break;
1491 	default:
1492 		DRV_LOG(DEBUG, "port %u unknown operation %u",
1493 			dev->data->port_id, filter_op);
1494 		rte_errno = EINVAL;
1495 		return -rte_errno;
1496 	}
1497 	return 0;
1498 }
1499 
1500 /**
1501  * Manage filter operations.
1502  *
1503  * @param dev
1504  *   Pointer to Ethernet device structure.
1505  * @param filter_type
1506  *   Filter type.
1507  * @param filter_op
1508  *   Operation to perform.
1509  * @param arg
1510  *   Pointer to operation-specific structure.
1511  *
1512  * @return
1513  *   0 on success, a negative errno value otherwise and rte_errno is set.
1514  */
1515 int
1516 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
1517 		     enum rte_filter_type filter_type,
1518 		     enum rte_filter_op filter_op,
1519 		     void *arg)
1520 {
1521 	switch (filter_type) {
1522 	case RTE_ETH_FILTER_GENERIC:
1523 		if (filter_op != RTE_ETH_FILTER_GET) {
1524 			rte_errno = EINVAL;
1525 			return -rte_errno;
1526 		}
1527 		*(const void **)arg = &mlx5_flow_ops;
1528 		return 0;
1529 	case RTE_ETH_FILTER_FDIR:
1530 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
1531 	default:
1532 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
1533 			dev->data->port_id, filter_type);
1534 		rte_errno = ENOTSUP;
1535 		return -rte_errno;
1536 	}
1537 	return 0;
1538 }
1539