xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision c388a2f6d71b77e318036171adf44824f3211ec4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19 
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33 
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37 
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45 /* Masks. */
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47 	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49 	(MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
50 
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
54 
55 /* Modify a packet. */
56 #define MLX5_FLOW_MOD_FLAG (1u << 0)
57 #define MLX5_FLOW_MOD_MARK (1u << 1)
58 
59 /* possible L3 layers protocols filtering. */
60 #define MLX5_IP_PROTOCOL_TCP 6
61 #define MLX5_IP_PROTOCOL_UDP 17
62 
63 /** Handles information leading to a drop fate. */
64 struct mlx5_flow_verbs {
65 	unsigned int size; /**< Size of the attribute. */
66 	struct {
67 		struct ibv_flow_attr *attr;
68 		/**< Pointer to the Specification buffer. */
69 		uint8_t *specs; /**< Pointer to the specifications. */
70 	};
71 	struct ibv_flow *flow; /**< Verbs flow pointer. */
72 	struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
73 };
74 
75 /* Flow structure. */
76 struct rte_flow {
77 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
78 	struct rte_flow_attr attributes; /**< User flow attribute. */
79 	uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
80 	uint32_t layers;
81 	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
82 	uint32_t modifier;
83 	/**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
84 	uint32_t fate;
85 	/**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
86 	uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
87 	struct mlx5_flow_verbs verbs; /* Verbs flow. */
88 	uint16_t queue; /**< Destination queue to redirect traffic to. */
89 };
90 
91 static const struct rte_flow_ops mlx5_flow_ops = {
92 	.validate = mlx5_flow_validate,
93 	.create = mlx5_flow_create,
94 	.destroy = mlx5_flow_destroy,
95 	.flush = mlx5_flow_flush,
96 	.isolate = mlx5_flow_isolate,
97 };
98 
99 /* Convert FDIR request to Generic flow. */
100 struct mlx5_fdir {
101 	struct rte_flow_attr attr;
102 	struct rte_flow_action actions[2];
103 	struct rte_flow_item items[4];
104 	struct rte_flow_item_eth l2;
105 	struct rte_flow_item_eth l2_mask;
106 	union {
107 		struct rte_flow_item_ipv4 ipv4;
108 		struct rte_flow_item_ipv6 ipv6;
109 	} l3;
110 	union {
111 		struct rte_flow_item_ipv4 ipv4;
112 		struct rte_flow_item_ipv6 ipv6;
113 	} l3_mask;
114 	union {
115 		struct rte_flow_item_udp udp;
116 		struct rte_flow_item_tcp tcp;
117 	} l4;
118 	union {
119 		struct rte_flow_item_udp udp;
120 		struct rte_flow_item_tcp tcp;
121 	} l4_mask;
122 	struct rte_flow_action_queue queue;
123 };
124 
125 /* Verbs specification header. */
126 struct ibv_spec_header {
127 	enum ibv_flow_spec_type type;
128 	uint16_t size;
129 };
130 
131  /**
132   * Discover the maximum number of priority available.
133   *
134   * @param[in] dev
135   *   Pointer to Ethernet device.
136   *
137   * @return
138   *   number of supported flow priority on success, a negative errno value
139   *   otherwise and rte_errno is set.
140   */
141 int
142 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
143 {
144 	struct {
145 		struct ibv_flow_attr attr;
146 		struct ibv_flow_spec_eth eth;
147 		struct ibv_flow_spec_action_drop drop;
148 	} flow_attr = {
149 		.attr = {
150 			.num_of_specs = 2,
151 		},
152 		.eth = {
153 			.type = IBV_FLOW_SPEC_ETH,
154 			.size = sizeof(struct ibv_flow_spec_eth),
155 		},
156 		.drop = {
157 			.size = sizeof(struct ibv_flow_spec_action_drop),
158 			.type = IBV_FLOW_SPEC_ACTION_DROP,
159 		},
160 	};
161 	struct ibv_flow *flow;
162 	struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
163 	uint16_t vprio[] = { 8, 16 };
164 	int i;
165 
166 	if (!drop) {
167 		rte_errno = ENOTSUP;
168 		return -rte_errno;
169 	}
170 	for (i = 0; i != RTE_DIM(vprio); i++) {
171 		flow_attr.attr.priority = vprio[i] - 1;
172 		flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
173 		if (!flow)
174 			break;
175 		claim_zero(mlx5_glue->destroy_flow(flow));
176 	}
177 	mlx5_hrxq_drop_release(dev);
178 	DRV_LOG(INFO, "port %u flow maximum priority: %d",
179 		dev->data->port_id, vprio[i - 1]);
180 	return vprio[i - 1];
181 }
182 
183 /**
184  * Verify the @p attributes will be correctly understood by the NIC and store
185  * them in the @p flow if everything is correct.
186  *
187  * @param[in] dev
188  *   Pointer to Ethernet device.
189  * @param[in] attributes
190  *   Pointer to flow attributes
191  * @param[in, out] flow
192  *   Pointer to the rte_flow structure.
193  * @param[out] error
194  *   Pointer to error structure.
195  *
196  * @return
197  *   0 on success, a negative errno value otherwise and rte_errno is set.
198  */
199 static int
200 mlx5_flow_attributes(struct rte_eth_dev *dev,
201 		     const struct rte_flow_attr *attributes,
202 		     struct rte_flow *flow,
203 		     struct rte_flow_error *error)
204 {
205 	uint32_t priority_max =
206 		((struct priv *)dev->data->dev_private)->config.flow_prio;
207 
208 	if (attributes->group)
209 		return rte_flow_error_set(error, ENOTSUP,
210 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
211 					  NULL,
212 					  "groups is not supported");
213 	if (attributes->priority >= priority_max)
214 		return rte_flow_error_set(error, ENOTSUP,
215 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
216 					  NULL,
217 					  "priority out of range");
218 	if (attributes->egress)
219 		return rte_flow_error_set(error, ENOTSUP,
220 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
221 					  NULL,
222 					  "egress is not supported");
223 	if (attributes->transfer)
224 		return rte_flow_error_set(error, ENOTSUP,
225 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
226 					  NULL,
227 					  "transfer is not supported");
228 	if (!attributes->ingress)
229 		return rte_flow_error_set(error, ENOTSUP,
230 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
231 					  NULL,
232 					  "ingress attribute is mandatory");
233 	flow->attributes = *attributes;
234 	return 0;
235 }
236 
237 /**
238  * Verify the @p item specifications (spec, last, mask) are compatible with the
239  * NIC capabilities.
240  *
241  * @param[in] item
242  *   Item specification.
243  * @param[in] mask
244  *   @p item->mask or flow default bit-masks.
245  * @param[in] nic_mask
246  *   Bit-masks covering supported fields by the NIC to compare with user mask.
247  * @param[in] size
248  *   Bit-masks size in bytes.
249  * @param[out] error
250  *   Pointer to error structure.
251  *
252  * @return
253  *   0 on success, a negative errno value otherwise and rte_errno is set.
254  */
255 static int
256 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
257 			  const uint8_t *mask,
258 			  const uint8_t *nic_mask,
259 			  unsigned int size,
260 			  struct rte_flow_error *error)
261 {
262 	unsigned int i;
263 
264 	assert(nic_mask);
265 	for (i = 0; i < size; ++i)
266 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
267 			return rte_flow_error_set(error, ENOTSUP,
268 						  RTE_FLOW_ERROR_TYPE_ITEM,
269 						  item,
270 						  "mask enables non supported"
271 						  " bits");
272 	if (!item->spec && (item->mask || item->last))
273 		return rte_flow_error_set(error, EINVAL,
274 					  RTE_FLOW_ERROR_TYPE_ITEM,
275 					  item,
276 					  "mask/last without a spec is not"
277 					  " supported");
278 	if (item->spec && item->last) {
279 		uint8_t spec[size];
280 		uint8_t last[size];
281 		unsigned int i;
282 		int ret;
283 
284 		for (i = 0; i < size; ++i) {
285 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
286 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
287 		}
288 		ret = memcmp(spec, last, size);
289 		if (ret != 0)
290 			return rte_flow_error_set(error, ENOTSUP,
291 						  RTE_FLOW_ERROR_TYPE_ITEM,
292 						  item,
293 						  "range is not supported");
294 	}
295 	return 0;
296 }
297 
298 /**
299  * Add a verbs specification into @p flow.
300  *
301  * @param[in, out] flow
302  *   Pointer to flow structure.
303  * @param[in] src
304  *   Create specification.
305  * @param[in] size
306  *   Size in bytes of the specification to copy.
307  */
308 static void
309 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
310 {
311 	if (flow->verbs.specs) {
312 		void *dst;
313 
314 		dst = (void *)(flow->verbs.specs + flow->verbs.size);
315 		memcpy(dst, src, size);
316 		++flow->verbs.attr->num_of_specs;
317 	}
318 	flow->verbs.size += size;
319 }
320 
321 /**
322  * Convert the @p item into a Verbs specification after ensuring the NIC
323  * will understand and process it correctly.
324  * If the necessary size for the conversion is greater than the @p flow_size,
325  * nothing is written in @p flow, the validation is still performed.
326  *
327  * @param[in] item
328  *   Item specification.
329  * @param[in, out] flow
330  *   Pointer to flow structure.
331  * @param[in] flow_size
332  *   Size in bytes of the available space in @p flow, if too small, nothing is
333  *   written.
334  * @param[out] error
335  *   Pointer to error structure.
336  *
337  * @return
338  *   On success the number of bytes consumed/necessary, if the returned value
339  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
340  *   otherwise another call with this returned memory size should be done.
341  *   On error, a negative errno value is returned and rte_errno is set.
342  */
343 static int
344 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
345 		   const size_t flow_size, struct rte_flow_error *error)
346 {
347 	const struct rte_flow_item_eth *spec = item->spec;
348 	const struct rte_flow_item_eth *mask = item->mask;
349 	const struct rte_flow_item_eth nic_mask = {
350 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
351 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
352 		.type = RTE_BE16(0xffff),
353 	};
354 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
355 	struct ibv_flow_spec_eth eth = {
356 		.type = IBV_FLOW_SPEC_ETH,
357 		.size = size,
358 	};
359 	int ret;
360 
361 	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
362 		return rte_flow_error_set(error, ENOTSUP,
363 					  RTE_FLOW_ERROR_TYPE_ITEM,
364 					  item,
365 					  "L2 layers already configured");
366 	if (!mask)
367 		mask = &rte_flow_item_eth_mask;
368 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
369 					(const uint8_t *)&nic_mask,
370 					sizeof(struct rte_flow_item_eth),
371 					error);
372 	if (ret)
373 		return ret;
374 	flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
375 	if (size > flow_size)
376 		return size;
377 	if (spec) {
378 		unsigned int i;
379 
380 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
381 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
382 		eth.val.ether_type = spec->type;
383 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
384 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
385 		eth.mask.ether_type = mask->type;
386 		/* Remove unwanted bits from values. */
387 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
388 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
389 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
390 		}
391 		eth.val.ether_type &= eth.mask.ether_type;
392 	}
393 	mlx5_flow_spec_verbs_add(flow, &eth, size);
394 	return size;
395 }
396 
397 /**
398  * Update the VLAN tag in the Verbs Ethernet specification.
399  *
400  * @param[in, out] attr
401  *   Pointer to Verbs attributes structure.
402  * @param[in] eth
403  *   Verbs structure containing the VLAN information to copy.
404  */
405 static void
406 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
407 			   struct ibv_flow_spec_eth *eth)
408 {
409 	unsigned int i;
410 	enum ibv_flow_spec_type search = IBV_FLOW_SPEC_ETH;
411 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
412 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
413 
414 	for (i = 0; i != attr->num_of_specs; ++i) {
415 		if (hdr->type == search) {
416 			struct ibv_flow_spec_eth *e =
417 				(struct ibv_flow_spec_eth *)hdr;
418 
419 			e->val.vlan_tag = eth->val.vlan_tag;
420 			e->mask.vlan_tag = eth->mask.vlan_tag;
421 			e->val.ether_type = eth->val.ether_type;
422 			e->mask.ether_type = eth->mask.ether_type;
423 			break;
424 		}
425 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
426 	}
427 }
428 
429 /**
430  * Convert the @p item into @p flow (or by updating the already present
431  * Ethernet Verbs) specification after ensuring the NIC will understand and
432  * process it correctly.
433  * If the necessary size for the conversion is greater than the @p flow_size,
434  * nothing is written in @p flow, the validation is still performed.
435  *
436  * @param[in] item
437  *   Item specification.
438  * @param[in, out] flow
439  *   Pointer to flow structure.
440  * @param[in] flow_size
441  *   Size in bytes of the available space in @p flow, if too small, nothing is
442  *   written.
443  * @param[out] error
444  *   Pointer to error structure.
445  *
446  * @return
447  *   On success the number of bytes consumed/necessary, if the returned value
448  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
449  *   otherwise another call with this returned memory size should be done.
450  *   On error, a negative errno value is returned and rte_errno is set.
451  */
452 static int
453 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
454 		    const size_t flow_size, struct rte_flow_error *error)
455 {
456 	const struct rte_flow_item_vlan *spec = item->spec;
457 	const struct rte_flow_item_vlan *mask = item->mask;
458 	const struct rte_flow_item_vlan nic_mask = {
459 		.tci = RTE_BE16(0x0fff),
460 		.inner_type = RTE_BE16(0xffff),
461 	};
462 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
463 	struct ibv_flow_spec_eth eth = {
464 		.type = IBV_FLOW_SPEC_ETH,
465 		.size = size,
466 	};
467 	int ret;
468 	const uint32_t l34m = MLX5_FLOW_LAYER_OUTER_L3 |
469 			MLX5_FLOW_LAYER_OUTER_L4;
470 	const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
471 	const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
472 
473 	if (flow->layers & vlanm)
474 		return rte_flow_error_set(error, ENOTSUP,
475 					  RTE_FLOW_ERROR_TYPE_ITEM,
476 					  item,
477 					  "VLAN layer already configured");
478 	else if ((flow->layers & l34m) != 0)
479 		return rte_flow_error_set(error, ENOTSUP,
480 					  RTE_FLOW_ERROR_TYPE_ITEM,
481 					  item,
482 					  "L2 layer cannot follow L3/L4 layer");
483 	if (!mask)
484 		mask = &rte_flow_item_vlan_mask;
485 	ret = mlx5_flow_item_acceptable
486 		(item, (const uint8_t *)mask,
487 		 (const uint8_t *)&nic_mask,
488 		 sizeof(struct rte_flow_item_vlan), error);
489 	if (ret)
490 		return ret;
491 	if (spec) {
492 		eth.val.vlan_tag = spec->tci;
493 		eth.mask.vlan_tag = mask->tci;
494 		eth.val.vlan_tag &= eth.mask.vlan_tag;
495 		eth.val.ether_type = spec->inner_type;
496 		eth.mask.ether_type = mask->inner_type;
497 		eth.val.ether_type &= eth.mask.ether_type;
498 	}
499 	/*
500 	 * From verbs perspective an empty VLAN is equivalent
501 	 * to a packet without VLAN layer.
502 	 */
503 	if (!eth.mask.vlan_tag)
504 		return rte_flow_error_set(error, EINVAL,
505 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
506 					  item->spec,
507 					  "VLAN cannot be empty");
508 	if (!(flow->layers & l2m)) {
509 		if (size <= flow_size)
510 			mlx5_flow_spec_verbs_add(flow, &eth, size);
511 	} else {
512 		if (flow->verbs.attr)
513 			mlx5_flow_item_vlan_update(flow->verbs.attr, &eth);
514 		size = 0; /* Only an update is done in eth specification. */
515 	}
516 	flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 |
517 		MLX5_FLOW_LAYER_OUTER_VLAN;
518 	return size;
519 }
520 
521 /**
522  * Convert the @p item into a Verbs specification after ensuring the NIC
523  * will understand and process it correctly.
524  * If the necessary size for the conversion is greater than the @p flow_size,
525  * nothing is written in @p flow, the validation is still performed.
526  *
527  * @param[in] item
528  *   Item specification.
529  * @param[in, out] flow
530  *   Pointer to flow structure.
531  * @param[in] flow_size
532  *   Size in bytes of the available space in @p flow, if too small, nothing is
533  *   written.
534  * @param[out] error
535  *   Pointer to error structure.
536  *
537  * @return
538  *   On success the number of bytes consumed/necessary, if the returned value
539  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
540  *   otherwise another call with this returned memory size should be done.
541  *   On error, a negative errno value is returned and rte_errno is set.
542  */
543 static int
544 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
545 		    const size_t flow_size, struct rte_flow_error *error)
546 {
547 	const struct rte_flow_item_ipv4 *spec = item->spec;
548 	const struct rte_flow_item_ipv4 *mask = item->mask;
549 	const struct rte_flow_item_ipv4 nic_mask = {
550 		.hdr = {
551 			.src_addr = RTE_BE32(0xffffffff),
552 			.dst_addr = RTE_BE32(0xffffffff),
553 			.type_of_service = 0xff,
554 			.next_proto_id = 0xff,
555 		},
556 	};
557 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
558 	struct ibv_flow_spec_ipv4_ext ipv4 = {
559 		.type = IBV_FLOW_SPEC_IPV4_EXT,
560 		.size = size,
561 	};
562 	int ret;
563 
564 	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
565 		return rte_flow_error_set(error, ENOTSUP,
566 					  RTE_FLOW_ERROR_TYPE_ITEM,
567 					  item,
568 					  "multiple L3 layers not supported");
569 	else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
570 		return rte_flow_error_set(error, ENOTSUP,
571 					  RTE_FLOW_ERROR_TYPE_ITEM,
572 					  item,
573 					  "L3 cannot follow an L4 layer.");
574 	if (!mask)
575 		mask = &rte_flow_item_ipv4_mask;
576 	ret = mlx5_flow_item_acceptable
577 		(item, (const uint8_t *)mask,
578 		 (const uint8_t *)&nic_mask,
579 		 sizeof(struct rte_flow_item_ipv4), error);
580 	if (ret < 0)
581 		return ret;
582 	flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
583 	if (spec) {
584 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
585 			.src_ip = spec->hdr.src_addr,
586 			.dst_ip = spec->hdr.dst_addr,
587 			.proto = spec->hdr.next_proto_id,
588 			.tos = spec->hdr.type_of_service,
589 		};
590 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
591 			.src_ip = mask->hdr.src_addr,
592 			.dst_ip = mask->hdr.dst_addr,
593 			.proto = mask->hdr.next_proto_id,
594 			.tos = mask->hdr.type_of_service,
595 		};
596 		/* Remove unwanted bits from values. */
597 		ipv4.val.src_ip &= ipv4.mask.src_ip;
598 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
599 		ipv4.val.proto &= ipv4.mask.proto;
600 		ipv4.val.tos &= ipv4.mask.tos;
601 	}
602 	flow->l3_protocol_en = !!ipv4.mask.proto;
603 	flow->l3_protocol = ipv4.val.proto;
604 	if (size <= flow_size)
605 		mlx5_flow_spec_verbs_add(flow, &ipv4, size);
606 	return size;
607 }
608 
609 /**
610  * Convert the @p item into a Verbs specification after ensuring the NIC
611  * will understand and process it correctly.
612  * If the necessary size for the conversion is greater than the @p flow_size,
613  * nothing is written in @p flow, the validation is still performed.
614  *
615  * @param[in] item
616  *   Item specification.
617  * @param[in, out] flow
618  *   Pointer to flow structure.
619  * @param[in] flow_size
620  *   Size in bytes of the available space in @p flow, if too small, nothing is
621  *   written.
622  * @param[out] error
623  *   Pointer to error structure.
624  *
625  * @return
626  *   On success the number of bytes consumed/necessary, if the returned value
627  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
628  *   otherwise another call with this returned memory size should be done.
629  *   On error, a negative errno value is returned and rte_errno is set.
630  */
631 static int
632 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
633 		    const size_t flow_size, struct rte_flow_error *error)
634 {
635 	const struct rte_flow_item_ipv6 *spec = item->spec;
636 	const struct rte_flow_item_ipv6 *mask = item->mask;
637 	const struct rte_flow_item_ipv6 nic_mask = {
638 		.hdr = {
639 			.src_addr =
640 				"\xff\xff\xff\xff\xff\xff\xff\xff"
641 				"\xff\xff\xff\xff\xff\xff\xff\xff",
642 			.dst_addr =
643 				"\xff\xff\xff\xff\xff\xff\xff\xff"
644 				"\xff\xff\xff\xff\xff\xff\xff\xff",
645 			.vtc_flow = RTE_BE32(0xffffffff),
646 			.proto = 0xff,
647 			.hop_limits = 0xff,
648 		},
649 	};
650 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
651 	struct ibv_flow_spec_ipv6 ipv6 = {
652 		.type = IBV_FLOW_SPEC_IPV6,
653 		.size = size,
654 	};
655 	int ret;
656 
657 	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
658 		return rte_flow_error_set(error, ENOTSUP,
659 					  RTE_FLOW_ERROR_TYPE_ITEM,
660 					  item,
661 					  "multiple L3 layers not supported");
662 	else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
663 		return rte_flow_error_set(error, ENOTSUP,
664 					  RTE_FLOW_ERROR_TYPE_ITEM,
665 					  item,
666 					  "L3 cannot follow an L4 layer.");
667 	if (!mask)
668 		mask = &rte_flow_item_ipv6_mask;
669 	ret = mlx5_flow_item_acceptable
670 		(item, (const uint8_t *)mask,
671 		 (const uint8_t *)&nic_mask,
672 		 sizeof(struct rte_flow_item_ipv6), error);
673 	if (ret < 0)
674 		return ret;
675 	flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
676 	if (spec) {
677 		unsigned int i;
678 		uint32_t vtc_flow_val;
679 		uint32_t vtc_flow_mask;
680 
681 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
682 		       RTE_DIM(ipv6.val.src_ip));
683 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
684 		       RTE_DIM(ipv6.val.dst_ip));
685 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
686 		       RTE_DIM(ipv6.mask.src_ip));
687 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
688 		       RTE_DIM(ipv6.mask.dst_ip));
689 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
690 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
691 		ipv6.val.flow_label =
692 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
693 					 IPV6_HDR_FL_SHIFT);
694 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
695 					 IPV6_HDR_TC_SHIFT;
696 		ipv6.val.next_hdr = spec->hdr.proto;
697 		ipv6.val.hop_limit = spec->hdr.hop_limits;
698 		ipv6.mask.flow_label =
699 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
700 					 IPV6_HDR_FL_SHIFT);
701 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
702 					  IPV6_HDR_TC_SHIFT;
703 		ipv6.mask.next_hdr = mask->hdr.proto;
704 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
705 		/* Remove unwanted bits from values. */
706 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
707 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
708 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
709 		}
710 		ipv6.val.flow_label &= ipv6.mask.flow_label;
711 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
712 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
713 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
714 	}
715 	flow->l3_protocol_en = !!ipv6.mask.next_hdr;
716 	flow->l3_protocol = ipv6.val.next_hdr;
717 	if (size <= flow_size)
718 		mlx5_flow_spec_verbs_add(flow, &ipv6, size);
719 	return size;
720 }
721 
722 /**
723  * Convert the @p item into a Verbs specification after ensuring the NIC
724  * will understand and process it correctly.
725  * If the necessary size for the conversion is greater than the @p flow_size,
726  * nothing is written in @p flow, the validation is still performed.
727  *
728  * @param[in] item
729  *   Item specification.
730  * @param[in, out] flow
731  *   Pointer to flow structure.
732  * @param[in] flow_size
733  *   Size in bytes of the available space in @p flow, if too small, nothing is
734  *   written.
735  * @param[out] error
736  *   Pointer to error structure.
737  *
738  * @return
739  *   On success the number of bytes consumed/necessary, if the returned value
740  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
741  *   otherwise another call with this returned memory size should be done.
742  *   On error, a negative errno value is returned and rte_errno is set.
743  */
744 static int
745 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
746 		   const size_t flow_size, struct rte_flow_error *error)
747 {
748 	const struct rte_flow_item_udp *spec = item->spec;
749 	const struct rte_flow_item_udp *mask = item->mask;
750 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
751 	struct ibv_flow_spec_tcp_udp udp = {
752 		.type = IBV_FLOW_SPEC_UDP,
753 		.size = size,
754 	};
755 	int ret;
756 
757 	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
758 		return rte_flow_error_set(error, ENOTSUP,
759 					  RTE_FLOW_ERROR_TYPE_ITEM,
760 					  item,
761 					  "L3 is mandatory to filter on L4");
762 	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
763 		return rte_flow_error_set(error, ENOTSUP,
764 					  RTE_FLOW_ERROR_TYPE_ITEM,
765 					  item,
766 					  "L4 layer is already present");
767 	if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
768 		return rte_flow_error_set(error, ENOTSUP,
769 					  RTE_FLOW_ERROR_TYPE_ITEM,
770 					  item,
771 					  "protocol filtering not compatible"
772 					  " with UDP layer");
773 	if (!mask)
774 		mask = &rte_flow_item_udp_mask;
775 	ret = mlx5_flow_item_acceptable
776 		(item, (const uint8_t *)mask,
777 		 (const uint8_t *)&rte_flow_item_udp_mask,
778 		 sizeof(struct rte_flow_item_udp), error);
779 	if (ret < 0)
780 		return ret;
781 	flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
782 	if (size > flow_size)
783 		return size;
784 	if (spec) {
785 		udp.val.dst_port = spec->hdr.dst_port;
786 		udp.val.src_port = spec->hdr.src_port;
787 		udp.mask.dst_port = mask->hdr.dst_port;
788 		udp.mask.src_port = mask->hdr.src_port;
789 		/* Remove unwanted bits from values. */
790 		udp.val.src_port &= udp.mask.src_port;
791 		udp.val.dst_port &= udp.mask.dst_port;
792 	}
793 	mlx5_flow_spec_verbs_add(flow, &udp, size);
794 	return size;
795 }
796 
797 /**
798  * Convert the @p item into a Verbs specification after ensuring the NIC
799  * will understand and process it correctly.
800  * If the necessary size for the conversion is greater than the @p flow_size,
801  * nothing is written in @p flow, the validation is still performed.
802  *
803  * @param[in] item
804  *   Item specification.
805  * @param[in, out] flow
806  *   Pointer to flow structure.
807  * @param[in] flow_size
808  *   Size in bytes of the available space in @p flow, if too small, nothing is
809  *   written.
810  * @param[out] error
811  *   Pointer to error structure.
812  *
813  * @return
814  *   On success the number of bytes consumed/necessary, if the returned value
815  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
816  *   otherwise another call with this returned memory size should be done.
817  *   On error, a negative errno value is returned and rte_errno is set.
818  */
819 static int
820 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
821 		   const size_t flow_size, struct rte_flow_error *error)
822 {
823 	const struct rte_flow_item_tcp *spec = item->spec;
824 	const struct rte_flow_item_tcp *mask = item->mask;
825 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
826 	struct ibv_flow_spec_tcp_udp tcp = {
827 		.type = IBV_FLOW_SPEC_TCP,
828 		.size = size,
829 	};
830 	int ret;
831 
832 	if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
833 		return rte_flow_error_set(error, ENOTSUP,
834 					  RTE_FLOW_ERROR_TYPE_ITEM,
835 					  item,
836 					  "protocol filtering not compatible"
837 					  " with TCP layer");
838 	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
839 		return rte_flow_error_set(error, ENOTSUP,
840 					  RTE_FLOW_ERROR_TYPE_ITEM,
841 					  item,
842 					  "L3 is mandatory to filter on L4");
843 	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
844 		return rte_flow_error_set(error, ENOTSUP,
845 					  RTE_FLOW_ERROR_TYPE_ITEM,
846 					  item,
847 					  "L4 layer is already present");
848 	if (!mask)
849 		mask = &rte_flow_item_tcp_mask;
850 	ret = mlx5_flow_item_acceptable
851 		(item, (const uint8_t *)mask,
852 		 (const uint8_t *)&rte_flow_item_tcp_mask,
853 		 sizeof(struct rte_flow_item_tcp), error);
854 	if (ret < 0)
855 		return ret;
856 	flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
857 	if (size > flow_size)
858 		return size;
859 	if (spec) {
860 		tcp.val.dst_port = spec->hdr.dst_port;
861 		tcp.val.src_port = spec->hdr.src_port;
862 		tcp.mask.dst_port = mask->hdr.dst_port;
863 		tcp.mask.src_port = mask->hdr.src_port;
864 		/* Remove unwanted bits from values. */
865 		tcp.val.src_port &= tcp.mask.src_port;
866 		tcp.val.dst_port &= tcp.mask.dst_port;
867 	}
868 	mlx5_flow_spec_verbs_add(flow, &tcp, size);
869 	return size;
870 }
871 
872 /**
873  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
874  * will understand and process it correctly.
875  * The conversion is performed item per item, each of them is written into
876  * the @p flow if its size is lesser or equal to @p flow_size.
877  * Validation and memory consumption computation are still performed until the
878  * end of @p pattern, unless an error is encountered.
879  *
880  * @param[in] pattern
881  *   Flow pattern.
882  * @param[in, out] flow
883  *   Pointer to the rte_flow structure.
884  * @param[in] flow_size
885  *   Size in bytes of the available space in @p flow, if too small some
886  *   garbage may be present.
887  * @param[out] error
888  *   Pointer to error structure.
889  *
890  * @return
891  *   On success the number of bytes consumed/necessary, if the returned value
892  *   is lesser or equal to @p flow_size, the @pattern  has fully been
893  *   converted, otherwise another call with this returned memory size should
894  *   be done.
895  *   On error, a negative errno value is returned and rte_errno is set.
896  */
897 static int
898 mlx5_flow_items(const struct rte_flow_item pattern[],
899 		struct rte_flow *flow, const size_t flow_size,
900 		struct rte_flow_error *error)
901 {
902 	int remain = flow_size;
903 	size_t size = 0;
904 
905 	for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
906 		int ret = 0;
907 
908 		switch (pattern->type) {
909 		case RTE_FLOW_ITEM_TYPE_VOID:
910 			break;
911 		case RTE_FLOW_ITEM_TYPE_ETH:
912 			ret = mlx5_flow_item_eth(pattern, flow, remain, error);
913 			break;
914 		case RTE_FLOW_ITEM_TYPE_VLAN:
915 			ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
916 			break;
917 		case RTE_FLOW_ITEM_TYPE_IPV4:
918 			ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
919 			break;
920 		case RTE_FLOW_ITEM_TYPE_IPV6:
921 			ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
922 			break;
923 		case RTE_FLOW_ITEM_TYPE_UDP:
924 			ret = mlx5_flow_item_udp(pattern, flow, remain, error);
925 			break;
926 		case RTE_FLOW_ITEM_TYPE_TCP:
927 			ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
928 			break;
929 		default:
930 			return rte_flow_error_set(error, ENOTSUP,
931 						  RTE_FLOW_ERROR_TYPE_ITEM,
932 						  pattern,
933 						  "item not supported");
934 		}
935 		if (ret < 0)
936 			return ret;
937 		if (remain > ret)
938 			remain -= ret;
939 		else
940 			remain = 0;
941 		size += ret;
942 	}
943 	if (!flow->layers) {
944 		const struct rte_flow_item item = {
945 			.type = RTE_FLOW_ITEM_TYPE_ETH,
946 		};
947 
948 		return mlx5_flow_item_eth(&item, flow, flow_size, error);
949 	}
950 	return size;
951 }
952 
953 /**
954  * Convert the @p action into a Verbs specification after ensuring the NIC
955  * will understand and process it correctly.
956  * If the necessary size for the conversion is greater than the @p flow_size,
957  * nothing is written in @p flow, the validation is still performed.
958  *
959  * @param[in] action
960  *   Action configuration.
961  * @param[in, out] flow
962  *   Pointer to flow structure.
963  * @param[in] flow_size
964  *   Size in bytes of the available space in @p flow, if too small, nothing is
965  *   written.
966  * @param[out] error
967  *   Pointer to error structure.
968  *
969  * @return
970  *   On success the number of bytes consumed/necessary, if the returned value
971  *   is lesser or equal to @p flow_size, the @p action has fully been
972  *   converted, otherwise another call with this returned memory size should
973  *   be done.
974  *   On error, a negative errno value is returned and rte_errno is set.
975  */
976 static int
977 mlx5_flow_action_drop(const struct rte_flow_action *action,
978 		      struct rte_flow *flow, const size_t flow_size,
979 		      struct rte_flow_error *error)
980 {
981 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
982 	struct ibv_flow_spec_action_drop drop = {
983 			.type = IBV_FLOW_SPEC_ACTION_DROP,
984 			.size = size,
985 	};
986 
987 	if (flow->fate)
988 		return rte_flow_error_set(error, ENOTSUP,
989 					  RTE_FLOW_ERROR_TYPE_ACTION,
990 					  action,
991 					  "multiple fate actions are not"
992 					  " supported");
993 	if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
994 		return rte_flow_error_set(error, ENOTSUP,
995 					  RTE_FLOW_ERROR_TYPE_ACTION,
996 					  action,
997 					  "drop is not compatible with"
998 					  " flag/mark action");
999 	if (size < flow_size)
1000 		mlx5_flow_spec_verbs_add(flow, &drop, size);
1001 	flow->fate |= MLX5_FLOW_FATE_DROP;
1002 	return size;
1003 }
1004 
1005 /**
1006  * Convert the @p action into @p flow after ensuring the NIC will understand
1007  * and process it correctly.
1008  *
1009  * @param[in] dev
1010  *   Pointer to Ethernet device structure.
1011  * @param[in] action
1012  *   Action configuration.
1013  * @param[in, out] flow
1014  *   Pointer to flow structure.
1015  * @param[out] error
1016  *   Pointer to error structure.
1017  *
1018  * @return
1019  *   0 on success, a negative errno value otherwise and rte_errno is set.
1020  */
1021 static int
1022 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1023 		       const struct rte_flow_action *action,
1024 		       struct rte_flow *flow,
1025 		       struct rte_flow_error *error)
1026 {
1027 	struct priv *priv = dev->data->dev_private;
1028 	const struct rte_flow_action_queue *queue = action->conf;
1029 
1030 	if (flow->fate)
1031 		return rte_flow_error_set(error, ENOTSUP,
1032 					  RTE_FLOW_ERROR_TYPE_ACTION,
1033 					  action,
1034 					  "multiple fate actions are not"
1035 					  " supported");
1036 	if (queue->index >= priv->rxqs_n)
1037 		return rte_flow_error_set(error, EINVAL,
1038 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1039 					  &queue->index,
1040 					  "queue index out of range");
1041 	if (!(*priv->rxqs)[queue->index])
1042 		return rte_flow_error_set(error, EINVAL,
1043 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1044 					  &queue->index,
1045 					  "queue is not configured");
1046 	flow->queue = queue->index;
1047 	flow->fate |= MLX5_FLOW_FATE_QUEUE;
1048 	return 0;
1049 }
1050 
1051 /**
1052  * Convert the @p action into a Verbs specification after ensuring the NIC
1053  * will understand and process it correctly.
1054  * If the necessary size for the conversion is greater than the @p flow_size,
1055  * nothing is written in @p flow, the validation is still performed.
1056  *
1057  * @param[in] action
1058  *   Action configuration.
1059  * @param[in, out] flow
1060  *   Pointer to flow structure.
1061  * @param[in] flow_size
1062  *   Size in bytes of the available space in @p flow, if too small, nothing is
1063  *   written.
1064  * @param[out] error
1065  *   Pointer to error structure.
1066  *
1067  * @return
1068  *   On success the number of bytes consumed/necessary, if the returned value
1069  *   is lesser or equal to @p flow_size, the @p action has fully been
1070  *   converted, otherwise another call with this returned memory size should
1071  *   be done.
1072  *   On error, a negative errno value is returned and rte_errno is set.
1073  */
1074 static int
1075 mlx5_flow_action_flag(const struct rte_flow_action *action,
1076 		      struct rte_flow *flow, const size_t flow_size,
1077 		      struct rte_flow_error *error)
1078 {
1079 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1080 	struct ibv_flow_spec_action_tag tag = {
1081 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1082 		.size = size,
1083 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1084 	};
1085 
1086 	if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1087 		return rte_flow_error_set(error, ENOTSUP,
1088 					  RTE_FLOW_ERROR_TYPE_ACTION,
1089 					  action,
1090 					  "flag action already present");
1091 	if (flow->fate & MLX5_FLOW_FATE_DROP)
1092 		return rte_flow_error_set(error, ENOTSUP,
1093 					  RTE_FLOW_ERROR_TYPE_ACTION,
1094 					  action,
1095 					  "flag is not compatible with drop"
1096 					  " action");
1097 	if (flow->modifier & MLX5_FLOW_MOD_MARK)
1098 		return 0;
1099 	flow->modifier |= MLX5_FLOW_MOD_FLAG;
1100 	if (size <= flow_size)
1101 		mlx5_flow_spec_verbs_add(flow, &tag, size);
1102 	return size;
1103 }
1104 
1105 /**
1106  * Update verbs specification to modify the flag to mark.
1107  *
1108  * @param[in, out] flow
1109  *   Pointer to the rte_flow structure.
1110  * @param[in] mark_id
1111  *   Mark identifier to replace the flag.
1112  */
1113 static void
1114 mlx5_flow_verbs_mark_update(struct rte_flow *flow, uint32_t mark_id)
1115 {
1116 	struct ibv_spec_header *hdr;
1117 	int i;
1118 
1119 	/* Update Verbs specification. */
1120 	hdr = (struct ibv_spec_header *)flow->verbs.specs;
1121 	if (!hdr)
1122 		return;
1123 	for (i = 0; i != flow->verbs.attr->num_of_specs; ++i) {
1124 		if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1125 			struct ibv_flow_spec_action_tag *t =
1126 				(struct ibv_flow_spec_action_tag *)hdr;
1127 
1128 			t->tag_id = mlx5_flow_mark_set(mark_id);
1129 		}
1130 		hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1131 	}
1132 }
1133 
1134 /**
1135  * Convert the @p action into @p flow (or by updating the already present
1136  * Flag Verbs specification) after ensuring the NIC will understand and
1137  * process it correctly.
1138  * If the necessary size for the conversion is greater than the @p flow_size,
1139  * nothing is written in @p flow, the validation is still performed.
1140  *
1141  * @param[in] action
1142  *   Action configuration.
1143  * @param[in, out] flow
1144  *   Pointer to flow structure.
1145  * @param[in] flow_size
1146  *   Size in bytes of the available space in @p flow, if too small, nothing is
1147  *   written.
1148  * @param[out] error
1149  *   Pointer to error structure.
1150  *
1151  * @return
1152  *   On success the number of bytes consumed/necessary, if the returned value
1153  *   is lesser or equal to @p flow_size, the @p action has fully been
1154  *   converted, otherwise another call with this returned memory size should
1155  *   be done.
1156  *   On error, a negative errno value is returned and rte_errno is set.
1157  */
1158 static int
1159 mlx5_flow_action_mark(const struct rte_flow_action *action,
1160 		      struct rte_flow *flow, const size_t flow_size,
1161 		      struct rte_flow_error *error)
1162 {
1163 	const struct rte_flow_action_mark *mark = action->conf;
1164 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1165 	struct ibv_flow_spec_action_tag tag = {
1166 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1167 		.size = size,
1168 	};
1169 
1170 	if (!mark)
1171 		return rte_flow_error_set(error, EINVAL,
1172 					  RTE_FLOW_ERROR_TYPE_ACTION,
1173 					  action,
1174 					  "configuration cannot be null");
1175 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1176 		return rte_flow_error_set(error, EINVAL,
1177 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1178 					  &mark->id,
1179 					  "mark id must in 0 <= id < "
1180 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1181 	if (flow->modifier & MLX5_FLOW_MOD_MARK)
1182 		return rte_flow_error_set(error, ENOTSUP,
1183 					  RTE_FLOW_ERROR_TYPE_ACTION,
1184 					  action,
1185 					  "mark action already present");
1186 	if (flow->fate & MLX5_FLOW_FATE_DROP)
1187 		return rte_flow_error_set(error, ENOTSUP,
1188 					  RTE_FLOW_ERROR_TYPE_ACTION,
1189 					  action,
1190 					  "mark is not compatible with drop"
1191 					  " action");
1192 	if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1193 		mlx5_flow_verbs_mark_update(flow, mark->id);
1194 		size = 0; /**< Only an update is done in the specification. */
1195 	} else {
1196 		tag.tag_id = mlx5_flow_mark_set(mark->id);
1197 		if (size <= flow_size) {
1198 			tag.tag_id = mlx5_flow_mark_set(mark->id);
1199 			mlx5_flow_spec_verbs_add(flow, &tag, size);
1200 		}
1201 	}
1202 	flow->modifier |= MLX5_FLOW_MOD_MARK;
1203 	return size;
1204 }
1205 
1206 /**
1207  * Convert the @p action into @p flow after ensuring the NIC will understand
1208  * and process it correctly.
1209  * The conversion is performed action per action, each of them is written into
1210  * the @p flow if its size is lesser or equal to @p flow_size.
1211  * Validation and memory consumption computation are still performed until the
1212  * end of @p action, unless an error is encountered.
1213  *
1214  * @param[in] dev
1215  *   Pointer to Ethernet device structure.
1216  * @param[in] actions
1217  *   Pointer to flow actions array.
1218  * @param[in, out] flow
1219  *   Pointer to the rte_flow structure.
1220  * @param[in] flow_size
1221  *   Size in bytes of the available space in @p flow, if too small some
1222  *   garbage may be present.
1223  * @param[out] error
1224  *   Pointer to error structure.
1225  *
1226  * @return
1227  *   On success the number of bytes consumed/necessary, if the returned value
1228  *   is lesser or equal to @p flow_size, the @p actions has fully been
1229  *   converted, otherwise another call with this returned memory size should
1230  *   be done.
1231  *   On error, a negative errno value is returned and rte_errno is set.
1232  */
1233 static int
1234 mlx5_flow_actions(struct rte_eth_dev *dev,
1235 		  const struct rte_flow_action actions[],
1236 		  struct rte_flow *flow, const size_t flow_size,
1237 		  struct rte_flow_error *error)
1238 {
1239 	size_t size = 0;
1240 	int remain = flow_size;
1241 	int ret = 0;
1242 
1243 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1244 		switch (actions->type) {
1245 		case RTE_FLOW_ACTION_TYPE_VOID:
1246 			break;
1247 		case RTE_FLOW_ACTION_TYPE_FLAG:
1248 			ret = mlx5_flow_action_flag(actions, flow, remain,
1249 						    error);
1250 			break;
1251 		case RTE_FLOW_ACTION_TYPE_MARK:
1252 			ret = mlx5_flow_action_mark(actions, flow, remain,
1253 						    error);
1254 			break;
1255 		case RTE_FLOW_ACTION_TYPE_DROP:
1256 			ret = mlx5_flow_action_drop(actions, flow, remain,
1257 						    error);
1258 			break;
1259 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1260 			ret = mlx5_flow_action_queue(dev, actions, flow, error);
1261 			break;
1262 		default:
1263 			return rte_flow_error_set(error, ENOTSUP,
1264 						  RTE_FLOW_ERROR_TYPE_ACTION,
1265 						  actions,
1266 						  "action not supported");
1267 		}
1268 		if (ret < 0)
1269 			return ret;
1270 		if (remain > ret)
1271 			remain -= ret;
1272 		else
1273 			remain = 0;
1274 		size += ret;
1275 	}
1276 	if (!flow->fate)
1277 		return rte_flow_error_set(error, ENOTSUP,
1278 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1279 					  NULL,
1280 					  "no fate action found");
1281 	return size;
1282 }
1283 
1284 /**
1285  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1286  * after ensuring the NIC will understand and process it correctly.
1287  * The conversion is only performed item/action per item/action, each of
1288  * them is written into the @p flow if its size is lesser or equal to @p
1289  * flow_size.
1290  * Validation and memory consumption computation are still performed until the
1291  * end, unless an error is encountered.
1292  *
1293  * @param[in] dev
1294  *   Pointer to Ethernet device.
1295  * @param[in, out] flow
1296  *   Pointer to flow structure.
1297  * @param[in] flow_size
1298  *   Size in bytes of the available space in @p flow, if too small some
1299  *   garbage may be present.
1300  * @param[in] attributes
1301  *   Flow rule attributes.
1302  * @param[in] pattern
1303  *   Pattern specification (list terminated by the END pattern item).
1304  * @param[in] actions
1305  *   Associated actions (list terminated by the END action).
1306  * @param[out] error
1307  *   Perform verbose error reporting if not NULL.
1308  *
1309  * @return
1310  *   On success the number of bytes consumed/necessary, if the returned value
1311  *   is lesser or equal to @p flow_size, the flow has fully been converted and
1312  *   can be applied, otherwise another call with this returned memory size
1313  *   should be done.
1314  *   On error, a negative errno value is returned and rte_errno is set.
1315  */
1316 static int
1317 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1318 		const size_t flow_size,
1319 		const struct rte_flow_attr *attributes,
1320 		const struct rte_flow_item pattern[],
1321 		const struct rte_flow_action actions[],
1322 		struct rte_flow_error *error)
1323 {
1324 	struct rte_flow local_flow = { .layers = 0, };
1325 	size_t size = sizeof(*flow) + sizeof(struct ibv_flow_attr);
1326 	int remain = (flow_size > size) ? flow_size - size : 0;
1327 	int ret;
1328 
1329 	if (!remain)
1330 		flow = &local_flow;
1331 	ret = mlx5_flow_attributes(dev, attributes, flow, error);
1332 	if (ret < 0)
1333 		return ret;
1334 	ret = mlx5_flow_items(pattern, flow, remain, error);
1335 	if (ret < 0)
1336 		return ret;
1337 	size += ret;
1338 	remain = (flow_size > size) ? flow_size - size : 0;
1339 	ret = mlx5_flow_actions(dev, actions, flow, remain, error);
1340 	if (ret < 0)
1341 		return ret;
1342 	size += ret;
1343 	if (size <= flow_size)
1344 		flow->verbs.attr->priority = flow->attributes.priority;
1345 	return size;
1346 }
1347 
1348 /**
1349  * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
1350  *
1351  * @param[in] dev
1352  *   Pointer to Ethernet device.
1353  * @param[in] flow
1354  *   Pointer to flow structure.
1355  */
1356 static void
1357 mlx5_flow_rxq_mark_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1358 {
1359 	struct priv *priv = dev->data->dev_private;
1360 
1361 	if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1362 		struct mlx5_rxq_ctrl *rxq_ctrl =
1363 			container_of((*priv->rxqs)[flow->queue],
1364 				     struct mlx5_rxq_ctrl, rxq);
1365 
1366 		rxq_ctrl->rxq.mark = 1;
1367 		rxq_ctrl->flow_mark_n++;
1368 	}
1369 }
1370 
1371 /**
1372  * Clear the Rx queue mark associated with the @p flow if no other flow uses
1373  * it with a mark request.
1374  *
1375  * @param dev
1376  *   Pointer to Ethernet device.
1377  * @param[in] flow
1378  *   Pointer to the flow.
1379  */
1380 static void
1381 mlx5_flow_rxq_mark_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1382 {
1383 	struct priv *priv = dev->data->dev_private;
1384 
1385 	if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1386 		struct mlx5_rxq_ctrl *rxq_ctrl =
1387 			container_of((*priv->rxqs)[flow->queue],
1388 				     struct mlx5_rxq_ctrl, rxq);
1389 
1390 		rxq_ctrl->flow_mark_n--;
1391 		rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1392 	}
1393 }
1394 
1395 /**
1396  * Clear the mark bit in all Rx queues.
1397  *
1398  * @param dev
1399  *   Pointer to Ethernet device.
1400  */
1401 static void
1402 mlx5_flow_rxq_mark_clear(struct rte_eth_dev *dev)
1403 {
1404 	struct priv *priv = dev->data->dev_private;
1405 	unsigned int i;
1406 	unsigned int idx;
1407 
1408 	for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
1409 		struct mlx5_rxq_ctrl *rxq_ctrl;
1410 
1411 		if (!(*priv->rxqs)[idx])
1412 			continue;
1413 		rxq_ctrl = container_of((*priv->rxqs)[idx],
1414 					struct mlx5_rxq_ctrl, rxq);
1415 		rxq_ctrl->flow_mark_n = 0;
1416 		rxq_ctrl->rxq.mark = 0;
1417 		++idx;
1418 	}
1419 }
1420 
1421 /**
1422  * Validate a flow supported by the NIC.
1423  *
1424  * @see rte_flow_validate()
1425  * @see rte_flow_ops
1426  */
1427 int
1428 mlx5_flow_validate(struct rte_eth_dev *dev,
1429 		   const struct rte_flow_attr *attr,
1430 		   const struct rte_flow_item items[],
1431 		   const struct rte_flow_action actions[],
1432 		   struct rte_flow_error *error)
1433 {
1434 	int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1435 
1436 	if (ret < 0)
1437 		return ret;
1438 	return 0;
1439 }
1440 
1441 /**
1442  * Remove the flow.
1443  *
1444  * @param[in] dev
1445  *   Pointer to Ethernet device.
1446  * @param[in, out] flow
1447  *   Pointer to flow structure.
1448  */
1449 static void
1450 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1451 {
1452 	if (flow->fate & MLX5_FLOW_FATE_DROP) {
1453 		if (flow->verbs.flow) {
1454 			claim_zero(mlx5_glue->destroy_flow(flow->verbs.flow));
1455 			flow->verbs.flow = NULL;
1456 		}
1457 	}
1458 	if (flow->verbs.hrxq) {
1459 		if (flow->fate & MLX5_FLOW_FATE_DROP)
1460 			mlx5_hrxq_drop_release(dev);
1461 		else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
1462 			mlx5_hrxq_release(dev, flow->verbs.hrxq);
1463 		flow->verbs.hrxq = NULL;
1464 	}
1465 }
1466 
1467 /**
1468  * Apply the flow.
1469  *
1470  * @param[in] dev
1471  *   Pointer to Ethernet device structure.
1472  * @param[in, out] flow
1473  *   Pointer to flow structure.
1474  * @param[out] error
1475  *   Pointer to error structure.
1476  *
1477  * @return
1478  *   0 on success, a negative errno value otherwise and rte_errno is set.
1479  */
1480 static int
1481 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1482 		struct rte_flow_error *error)
1483 {
1484 	if (flow->fate & MLX5_FLOW_FATE_DROP) {
1485 		flow->verbs.hrxq = mlx5_hrxq_drop_new(dev);
1486 		if (!flow->verbs.hrxq)
1487 			return rte_flow_error_set
1488 				(error, errno,
1489 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1490 				 NULL,
1491 				 "cannot allocate Drop queue");
1492 	} else if (flow->fate & MLX5_FLOW_FATE_QUEUE) {
1493 		struct mlx5_hrxq *hrxq;
1494 
1495 		hrxq = mlx5_hrxq_get(dev, rss_hash_default_key,
1496 				     MLX5_RSS_HASH_KEY_LEN, 0,
1497 				     &flow->queue, 1, 0, 0);
1498 		if (!hrxq)
1499 			hrxq = mlx5_hrxq_new(dev, rss_hash_default_key,
1500 					     MLX5_RSS_HASH_KEY_LEN, 0,
1501 					     &flow->queue, 1, 0, 0);
1502 		if (!hrxq)
1503 			return rte_flow_error_set(error, rte_errno,
1504 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1505 					NULL,
1506 					"cannot create flow");
1507 		flow->verbs.hrxq = hrxq;
1508 	}
1509 	flow->verbs.flow =
1510 		mlx5_glue->create_flow(flow->verbs.hrxq->qp, flow->verbs.attr);
1511 	if (!flow->verbs.flow) {
1512 		if (flow->fate & MLX5_FLOW_FATE_DROP)
1513 			mlx5_hrxq_drop_release(dev);
1514 		else
1515 			mlx5_hrxq_release(dev, flow->verbs.hrxq);
1516 		flow->verbs.hrxq = NULL;
1517 		return rte_flow_error_set(error, errno,
1518 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1519 					  NULL,
1520 					  "kernel module refuses to create"
1521 					  " flow");
1522 	}
1523 	return 0;
1524 }
1525 
1526 /**
1527  * Create a flow and add it to @p list.
1528  *
1529  * @param dev
1530  *   Pointer to Ethernet device.
1531  * @param list
1532  *   Pointer to a TAILQ flow list.
1533  * @param[in] attr
1534  *   Flow rule attributes.
1535  * @param[in] items
1536  *   Pattern specification (list terminated by the END pattern item).
1537  * @param[in] actions
1538  *   Associated actions (list terminated by the END action).
1539  * @param[out] error
1540  *   Perform verbose error reporting if not NULL.
1541  *
1542  * @return
1543  *   A flow on success, NULL otherwise and rte_errno is set.
1544  */
1545 static struct rte_flow *
1546 mlx5_flow_list_create(struct rte_eth_dev *dev,
1547 		      struct mlx5_flows *list,
1548 		      const struct rte_flow_attr *attr,
1549 		      const struct rte_flow_item items[],
1550 		      const struct rte_flow_action actions[],
1551 		      struct rte_flow_error *error)
1552 {
1553 	struct rte_flow *flow;
1554 	size_t size;
1555 	int ret;
1556 
1557 	ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1558 	if (ret < 0)
1559 		return NULL;
1560 	size = ret;
1561 	flow = rte_zmalloc(__func__, size, 0);
1562 	if (!flow) {
1563 		rte_flow_error_set(error, ENOMEM,
1564 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1565 				   NULL,
1566 				   "cannot allocate memory");
1567 		return NULL;
1568 	}
1569 	flow->verbs.attr = (struct ibv_flow_attr *)(flow + 1);
1570 	flow->verbs.specs = (uint8_t *)(flow->verbs.attr + 1);
1571 	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1572 	if (ret < 0)
1573 		goto error;
1574 	assert((size_t)ret == size);
1575 	if (dev->data->dev_started) {
1576 		ret = mlx5_flow_apply(dev, flow, error);
1577 		if (ret < 0)
1578 			goto error;
1579 	}
1580 	mlx5_flow_rxq_mark_set(dev, flow);
1581 	TAILQ_INSERT_TAIL(list, flow, next);
1582 	return flow;
1583 error:
1584 	ret = rte_errno; /* Save rte_errno before cleanup. */
1585 	mlx5_flow_remove(dev, flow);
1586 	rte_free(flow);
1587 	rte_errno = ret; /* Restore rte_errno. */
1588 	return NULL;
1589 }
1590 
1591 /**
1592  * Create a flow.
1593  *
1594  * @see rte_flow_create()
1595  * @see rte_flow_ops
1596  */
1597 struct rte_flow *
1598 mlx5_flow_create(struct rte_eth_dev *dev,
1599 		 const struct rte_flow_attr *attr,
1600 		 const struct rte_flow_item items[],
1601 		 const struct rte_flow_action actions[],
1602 		 struct rte_flow_error *error)
1603 {
1604 	return mlx5_flow_list_create
1605 		(dev, &((struct priv *)dev->data->dev_private)->flows,
1606 		 attr, items, actions, error);
1607 }
1608 
1609 /**
1610  * Destroy a flow in a list.
1611  *
1612  * @param dev
1613  *   Pointer to Ethernet device.
1614  * @param list
1615  *   Pointer to a TAILQ flow list.
1616  * @param[in] flow
1617  *   Flow to destroy.
1618  */
1619 static void
1620 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1621 		       struct rte_flow *flow)
1622 {
1623 	mlx5_flow_remove(dev, flow);
1624 	TAILQ_REMOVE(list, flow, next);
1625 	mlx5_flow_rxq_mark_trim(dev, flow);
1626 	rte_free(flow);
1627 }
1628 
1629 /**
1630  * Destroy all flows.
1631  *
1632  * @param dev
1633  *   Pointer to Ethernet device.
1634  * @param list
1635  *   Pointer to a TAILQ flow list.
1636  */
1637 void
1638 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
1639 {
1640 	while (!TAILQ_EMPTY(list)) {
1641 		struct rte_flow *flow;
1642 
1643 		flow = TAILQ_FIRST(list);
1644 		mlx5_flow_list_destroy(dev, list, flow);
1645 	}
1646 }
1647 
1648 /**
1649  * Remove all flows.
1650  *
1651  * @param dev
1652  *   Pointer to Ethernet device.
1653  * @param list
1654  *   Pointer to a TAILQ flow list.
1655  */
1656 void
1657 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
1658 {
1659 	struct rte_flow *flow;
1660 
1661 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
1662 		mlx5_flow_remove(dev, flow);
1663 	mlx5_flow_rxq_mark_clear(dev);
1664 }
1665 
1666 /**
1667  * Add all flows.
1668  *
1669  * @param dev
1670  *   Pointer to Ethernet device.
1671  * @param list
1672  *   Pointer to a TAILQ flow list.
1673  *
1674  * @return
1675  *   0 on success, a negative errno value otherwise and rte_errno is set.
1676  */
1677 int
1678 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
1679 {
1680 	struct rte_flow *flow;
1681 	struct rte_flow_error error;
1682 	int ret = 0;
1683 
1684 	TAILQ_FOREACH(flow, list, next) {
1685 		ret = mlx5_flow_apply(dev, flow, &error);
1686 		if (ret < 0)
1687 			goto error;
1688 		mlx5_flow_rxq_mark_set(dev, flow);
1689 	}
1690 	return 0;
1691 error:
1692 	ret = rte_errno; /* Save rte_errno before cleanup. */
1693 	mlx5_flow_stop(dev, list);
1694 	rte_errno = ret; /* Restore rte_errno. */
1695 	return -rte_errno;
1696 }
1697 
1698 /**
1699  * Verify the flow list is empty
1700  *
1701  * @param dev
1702  *  Pointer to Ethernet device.
1703  *
1704  * @return the number of flows not released.
1705  */
1706 int
1707 mlx5_flow_verify(struct rte_eth_dev *dev)
1708 {
1709 	struct priv *priv = dev->data->dev_private;
1710 	struct rte_flow *flow;
1711 	int ret = 0;
1712 
1713 	TAILQ_FOREACH(flow, &priv->flows, next) {
1714 		DRV_LOG(DEBUG, "port %u flow %p still referenced",
1715 			dev->data->port_id, (void *)flow);
1716 		++ret;
1717 	}
1718 	return ret;
1719 }
1720 
1721 /**
1722  * Enable a control flow configured from the control plane.
1723  *
1724  * @param dev
1725  *   Pointer to Ethernet device.
1726  * @param eth_spec
1727  *   An Ethernet flow spec to apply.
1728  * @param eth_mask
1729  *   An Ethernet flow mask to apply.
1730  * @param vlan_spec
1731  *   A VLAN flow spec to apply.
1732  * @param vlan_mask
1733  *   A VLAN flow mask to apply.
1734  *
1735  * @return
1736  *   0 on success, a negative errno value otherwise and rte_errno is set.
1737  */
1738 int
1739 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
1740 		    struct rte_flow_item_eth *eth_spec,
1741 		    struct rte_flow_item_eth *eth_mask,
1742 		    struct rte_flow_item_vlan *vlan_spec,
1743 		    struct rte_flow_item_vlan *vlan_mask)
1744 {
1745 	struct priv *priv = dev->data->dev_private;
1746 	const struct rte_flow_attr attr = {
1747 		.ingress = 1,
1748 		.priority = priv->config.flow_prio - 1,
1749 	};
1750 	struct rte_flow_item items[] = {
1751 		{
1752 			.type = RTE_FLOW_ITEM_TYPE_ETH,
1753 			.spec = eth_spec,
1754 			.last = NULL,
1755 			.mask = eth_mask,
1756 		},
1757 		{
1758 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
1759 				RTE_FLOW_ITEM_TYPE_END,
1760 			.spec = vlan_spec,
1761 			.last = NULL,
1762 			.mask = vlan_mask,
1763 		},
1764 		{
1765 			.type = RTE_FLOW_ITEM_TYPE_END,
1766 		},
1767 	};
1768 	uint16_t queue[priv->reta_idx_n];
1769 	struct rte_flow_action_rss action_rss = {
1770 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1771 		.level = 0,
1772 		.types = priv->rss_conf.rss_hf,
1773 		.key_len = priv->rss_conf.rss_key_len,
1774 		.queue_num = priv->reta_idx_n,
1775 		.key = priv->rss_conf.rss_key,
1776 		.queue = queue,
1777 	};
1778 	struct rte_flow_action actions[] = {
1779 		{
1780 			.type = RTE_FLOW_ACTION_TYPE_RSS,
1781 			.conf = &action_rss,
1782 		},
1783 		{
1784 			.type = RTE_FLOW_ACTION_TYPE_END,
1785 		},
1786 	};
1787 	struct rte_flow *flow;
1788 	struct rte_flow_error error;
1789 	unsigned int i;
1790 
1791 	if (!priv->reta_idx_n) {
1792 		rte_errno = EINVAL;
1793 		return -rte_errno;
1794 	}
1795 	for (i = 0; i != priv->reta_idx_n; ++i)
1796 		queue[i] = (*priv->reta_idx)[i];
1797 	flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
1798 				     actions, &error);
1799 	if (!flow)
1800 		return -rte_errno;
1801 	return 0;
1802 }
1803 
1804 /**
1805  * Enable a flow control configured from the control plane.
1806  *
1807  * @param dev
1808  *   Pointer to Ethernet device.
1809  * @param eth_spec
1810  *   An Ethernet flow spec to apply.
1811  * @param eth_mask
1812  *   An Ethernet flow mask to apply.
1813  *
1814  * @return
1815  *   0 on success, a negative errno value otherwise and rte_errno is set.
1816  */
1817 int
1818 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1819 	       struct rte_flow_item_eth *eth_spec,
1820 	       struct rte_flow_item_eth *eth_mask)
1821 {
1822 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
1823 }
1824 
1825 /**
1826  * Destroy a flow.
1827  *
1828  * @see rte_flow_destroy()
1829  * @see rte_flow_ops
1830  */
1831 int
1832 mlx5_flow_destroy(struct rte_eth_dev *dev,
1833 		  struct rte_flow *flow,
1834 		  struct rte_flow_error *error __rte_unused)
1835 {
1836 	struct priv *priv = dev->data->dev_private;
1837 
1838 	mlx5_flow_list_destroy(dev, &priv->flows, flow);
1839 	return 0;
1840 }
1841 
1842 /**
1843  * Destroy all flows.
1844  *
1845  * @see rte_flow_flush()
1846  * @see rte_flow_ops
1847  */
1848 int
1849 mlx5_flow_flush(struct rte_eth_dev *dev,
1850 		struct rte_flow_error *error __rte_unused)
1851 {
1852 	struct priv *priv = dev->data->dev_private;
1853 
1854 	mlx5_flow_list_flush(dev, &priv->flows);
1855 	return 0;
1856 }
1857 
1858 /**
1859  * Isolated mode.
1860  *
1861  * @see rte_flow_isolate()
1862  * @see rte_flow_ops
1863  */
1864 int
1865 mlx5_flow_isolate(struct rte_eth_dev *dev,
1866 		  int enable,
1867 		  struct rte_flow_error *error)
1868 {
1869 	struct priv *priv = dev->data->dev_private;
1870 
1871 	if (dev->data->dev_started) {
1872 		rte_flow_error_set(error, EBUSY,
1873 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1874 				   NULL,
1875 				   "port must be stopped first");
1876 		return -rte_errno;
1877 	}
1878 	priv->isolated = !!enable;
1879 	if (enable)
1880 		dev->dev_ops = &mlx5_dev_ops_isolate;
1881 	else
1882 		dev->dev_ops = &mlx5_dev_ops;
1883 	return 0;
1884 }
1885 
1886 /**
1887  * Convert a flow director filter to a generic flow.
1888  *
1889  * @param dev
1890  *   Pointer to Ethernet device.
1891  * @param fdir_filter
1892  *   Flow director filter to add.
1893  * @param attributes
1894  *   Generic flow parameters structure.
1895  *
1896  * @return
1897  *   0 on success, a negative errno value otherwise and rte_errno is set.
1898  */
1899 static int
1900 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
1901 			 const struct rte_eth_fdir_filter *fdir_filter,
1902 			 struct mlx5_fdir *attributes)
1903 {
1904 	struct priv *priv = dev->data->dev_private;
1905 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
1906 	const struct rte_eth_fdir_masks *mask =
1907 		&dev->data->dev_conf.fdir_conf.mask;
1908 
1909 	/* Validate queue number. */
1910 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
1911 		DRV_LOG(ERR, "port %u invalid queue number %d",
1912 			dev->data->port_id, fdir_filter->action.rx_queue);
1913 		rte_errno = EINVAL;
1914 		return -rte_errno;
1915 	}
1916 	attributes->attr.ingress = 1;
1917 	attributes->items[0] = (struct rte_flow_item) {
1918 		.type = RTE_FLOW_ITEM_TYPE_ETH,
1919 		.spec = &attributes->l2,
1920 		.mask = &attributes->l2_mask,
1921 	};
1922 	switch (fdir_filter->action.behavior) {
1923 	case RTE_ETH_FDIR_ACCEPT:
1924 		attributes->actions[0] = (struct rte_flow_action){
1925 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
1926 			.conf = &attributes->queue,
1927 		};
1928 		break;
1929 	case RTE_ETH_FDIR_REJECT:
1930 		attributes->actions[0] = (struct rte_flow_action){
1931 			.type = RTE_FLOW_ACTION_TYPE_DROP,
1932 		};
1933 		break;
1934 	default:
1935 		DRV_LOG(ERR, "port %u invalid behavior %d",
1936 			dev->data->port_id,
1937 			fdir_filter->action.behavior);
1938 		rte_errno = ENOTSUP;
1939 		return -rte_errno;
1940 	}
1941 	attributes->queue.index = fdir_filter->action.rx_queue;
1942 	/* Handle L3. */
1943 	switch (fdir_filter->input.flow_type) {
1944 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1945 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1946 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1947 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
1948 			.src_addr = input->flow.ip4_flow.src_ip,
1949 			.dst_addr = input->flow.ip4_flow.dst_ip,
1950 			.time_to_live = input->flow.ip4_flow.ttl,
1951 			.type_of_service = input->flow.ip4_flow.tos,
1952 			.next_proto_id = input->flow.ip4_flow.proto,
1953 		};
1954 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
1955 			.src_addr = mask->ipv4_mask.src_ip,
1956 			.dst_addr = mask->ipv4_mask.dst_ip,
1957 			.time_to_live = mask->ipv4_mask.ttl,
1958 			.type_of_service = mask->ipv4_mask.tos,
1959 			.next_proto_id = mask->ipv4_mask.proto,
1960 		};
1961 		attributes->items[1] = (struct rte_flow_item){
1962 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
1963 			.spec = &attributes->l3,
1964 			.mask = &attributes->l3_mask,
1965 		};
1966 		break;
1967 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1968 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1969 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1970 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
1971 			.hop_limits = input->flow.ipv6_flow.hop_limits,
1972 			.proto = input->flow.ipv6_flow.proto,
1973 		};
1974 
1975 		memcpy(attributes->l3.ipv6.hdr.src_addr,
1976 		       input->flow.ipv6_flow.src_ip,
1977 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1978 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
1979 		       input->flow.ipv6_flow.dst_ip,
1980 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1981 		memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
1982 		       mask->ipv6_mask.src_ip,
1983 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1984 		memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
1985 		       mask->ipv6_mask.dst_ip,
1986 		       RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1987 		attributes->items[1] = (struct rte_flow_item){
1988 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
1989 			.spec = &attributes->l3,
1990 			.mask = &attributes->l3_mask,
1991 		};
1992 		break;
1993 	default:
1994 		DRV_LOG(ERR, "port %u invalid flow type%d",
1995 			dev->data->port_id, fdir_filter->input.flow_type);
1996 		rte_errno = ENOTSUP;
1997 		return -rte_errno;
1998 	}
1999 	/* Handle L4. */
2000 	switch (fdir_filter->input.flow_type) {
2001 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2002 		attributes->l4.udp.hdr = (struct udp_hdr){
2003 			.src_port = input->flow.udp4_flow.src_port,
2004 			.dst_port = input->flow.udp4_flow.dst_port,
2005 		};
2006 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2007 			.src_port = mask->src_port_mask,
2008 			.dst_port = mask->dst_port_mask,
2009 		};
2010 		attributes->items[2] = (struct rte_flow_item){
2011 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2012 			.spec = &attributes->l4,
2013 			.mask = &attributes->l4_mask,
2014 		};
2015 		break;
2016 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2017 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2018 			.src_port = input->flow.tcp4_flow.src_port,
2019 			.dst_port = input->flow.tcp4_flow.dst_port,
2020 		};
2021 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2022 			.src_port = mask->src_port_mask,
2023 			.dst_port = mask->dst_port_mask,
2024 		};
2025 		attributes->items[2] = (struct rte_flow_item){
2026 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2027 			.spec = &attributes->l4,
2028 			.mask = &attributes->l4_mask,
2029 		};
2030 		break;
2031 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2032 		attributes->l4.udp.hdr = (struct udp_hdr){
2033 			.src_port = input->flow.udp6_flow.src_port,
2034 			.dst_port = input->flow.udp6_flow.dst_port,
2035 		};
2036 		attributes->l4_mask.udp.hdr = (struct udp_hdr){
2037 			.src_port = mask->src_port_mask,
2038 			.dst_port = mask->dst_port_mask,
2039 		};
2040 		attributes->items[2] = (struct rte_flow_item){
2041 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2042 			.spec = &attributes->l4,
2043 			.mask = &attributes->l4_mask,
2044 		};
2045 		break;
2046 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2047 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2048 			.src_port = input->flow.tcp6_flow.src_port,
2049 			.dst_port = input->flow.tcp6_flow.dst_port,
2050 		};
2051 		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2052 			.src_port = mask->src_port_mask,
2053 			.dst_port = mask->dst_port_mask,
2054 		};
2055 		attributes->items[2] = (struct rte_flow_item){
2056 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2057 			.spec = &attributes->l4,
2058 			.mask = &attributes->l4_mask,
2059 		};
2060 		break;
2061 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2062 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2063 		break;
2064 	default:
2065 		DRV_LOG(ERR, "port %u invalid flow type%d",
2066 			dev->data->port_id, fdir_filter->input.flow_type);
2067 		rte_errno = ENOTSUP;
2068 		return -rte_errno;
2069 	}
2070 	return 0;
2071 }
2072 
2073 /**
2074  * Add new flow director filter and store it in list.
2075  *
2076  * @param dev
2077  *   Pointer to Ethernet device.
2078  * @param fdir_filter
2079  *   Flow director filter to add.
2080  *
2081  * @return
2082  *   0 on success, a negative errno value otherwise and rte_errno is set.
2083  */
2084 static int
2085 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2086 		     const struct rte_eth_fdir_filter *fdir_filter)
2087 {
2088 	struct priv *priv = dev->data->dev_private;
2089 	struct mlx5_fdir attributes = {
2090 		.attr.group = 0,
2091 		.l2_mask = {
2092 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2093 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2094 			.type = 0,
2095 		},
2096 	};
2097 	struct rte_flow_error error;
2098 	struct rte_flow *flow;
2099 	int ret;
2100 
2101 	ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2102 	if (ret)
2103 		return ret;
2104 	flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2105 				     attributes.items, attributes.actions,
2106 				     &error);
2107 	if (flow) {
2108 		DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2109 			(void *)flow);
2110 		return 0;
2111 	}
2112 	return -rte_errno;
2113 }
2114 
2115 /**
2116  * Delete specific filter.
2117  *
2118  * @param dev
2119  *   Pointer to Ethernet device.
2120  * @param fdir_filter
2121  *   Filter to be deleted.
2122  *
2123  * @return
2124  *   0 on success, a negative errno value otherwise and rte_errno is set.
2125  */
2126 static int
2127 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2128 			const struct rte_eth_fdir_filter *fdir_filter
2129 			__rte_unused)
2130 {
2131 	rte_errno = ENOTSUP;
2132 	return -rte_errno;
2133 }
2134 
2135 /**
2136  * Update queue for specific filter.
2137  *
2138  * @param dev
2139  *   Pointer to Ethernet device.
2140  * @param fdir_filter
2141  *   Filter to be updated.
2142  *
2143  * @return
2144  *   0 on success, a negative errno value otherwise and rte_errno is set.
2145  */
2146 static int
2147 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2148 			const struct rte_eth_fdir_filter *fdir_filter)
2149 {
2150 	int ret;
2151 
2152 	ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2153 	if (ret)
2154 		return ret;
2155 	return mlx5_fdir_filter_add(dev, fdir_filter);
2156 }
2157 
2158 /**
2159  * Flush all filters.
2160  *
2161  * @param dev
2162  *   Pointer to Ethernet device.
2163  */
2164 static void
2165 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2166 {
2167 	struct priv *priv = dev->data->dev_private;
2168 
2169 	mlx5_flow_list_flush(dev, &priv->flows);
2170 }
2171 
2172 /**
2173  * Get flow director information.
2174  *
2175  * @param dev
2176  *   Pointer to Ethernet device.
2177  * @param[out] fdir_info
2178  *   Resulting flow director information.
2179  */
2180 static void
2181 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2182 {
2183 	struct rte_eth_fdir_masks *mask =
2184 		&dev->data->dev_conf.fdir_conf.mask;
2185 
2186 	fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2187 	fdir_info->guarant_spc = 0;
2188 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2189 	fdir_info->max_flexpayload = 0;
2190 	fdir_info->flow_types_mask[0] = 0;
2191 	fdir_info->flex_payload_unit = 0;
2192 	fdir_info->max_flex_payload_segment_num = 0;
2193 	fdir_info->flex_payload_limit = 0;
2194 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2195 }
2196 
2197 /**
2198  * Deal with flow director operations.
2199  *
2200  * @param dev
2201  *   Pointer to Ethernet device.
2202  * @param filter_op
2203  *   Operation to perform.
2204  * @param arg
2205  *   Pointer to operation-specific structure.
2206  *
2207  * @return
2208  *   0 on success, a negative errno value otherwise and rte_errno is set.
2209  */
2210 static int
2211 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2212 		    void *arg)
2213 {
2214 	enum rte_fdir_mode fdir_mode =
2215 		dev->data->dev_conf.fdir_conf.mode;
2216 
2217 	if (filter_op == RTE_ETH_FILTER_NOP)
2218 		return 0;
2219 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2220 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2221 		DRV_LOG(ERR, "port %u flow director mode %d not supported",
2222 			dev->data->port_id, fdir_mode);
2223 		rte_errno = EINVAL;
2224 		return -rte_errno;
2225 	}
2226 	switch (filter_op) {
2227 	case RTE_ETH_FILTER_ADD:
2228 		return mlx5_fdir_filter_add(dev, arg);
2229 	case RTE_ETH_FILTER_UPDATE:
2230 		return mlx5_fdir_filter_update(dev, arg);
2231 	case RTE_ETH_FILTER_DELETE:
2232 		return mlx5_fdir_filter_delete(dev, arg);
2233 	case RTE_ETH_FILTER_FLUSH:
2234 		mlx5_fdir_filter_flush(dev);
2235 		break;
2236 	case RTE_ETH_FILTER_INFO:
2237 		mlx5_fdir_info_get(dev, arg);
2238 		break;
2239 	default:
2240 		DRV_LOG(DEBUG, "port %u unknown operation %u",
2241 			dev->data->port_id, filter_op);
2242 		rte_errno = EINVAL;
2243 		return -rte_errno;
2244 	}
2245 	return 0;
2246 }
2247 
2248 /**
2249  * Manage filter operations.
2250  *
2251  * @param dev
2252  *   Pointer to Ethernet device structure.
2253  * @param filter_type
2254  *   Filter type.
2255  * @param filter_op
2256  *   Operation to perform.
2257  * @param arg
2258  *   Pointer to operation-specific structure.
2259  *
2260  * @return
2261  *   0 on success, a negative errno value otherwise and rte_errno is set.
2262  */
2263 int
2264 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2265 		     enum rte_filter_type filter_type,
2266 		     enum rte_filter_op filter_op,
2267 		     void *arg)
2268 {
2269 	switch (filter_type) {
2270 	case RTE_ETH_FILTER_GENERIC:
2271 		if (filter_op != RTE_ETH_FILTER_GET) {
2272 			rte_errno = EINVAL;
2273 			return -rte_errno;
2274 		}
2275 		*(const void **)arg = &mlx5_flow_ops;
2276 		return 0;
2277 	case RTE_ETH_FILTER_FDIR:
2278 		return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2279 	default:
2280 		DRV_LOG(ERR, "port %u filter type (%d) not supported",
2281 			dev->data->port_id, filter_type);
2282 		rte_errno = ENOTSUP;
2283 		return -rte_errno;
2284 	}
2285 	return 0;
2286 }
2287