xref: /dpdk/drivers/net/mlx5/mlx5_flow_verbs.c (revision 2d0c29a37a9c080c1cccb1ad7941aba2ccf5437e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4 
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20 
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29 
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35 
36 #define VERBS_SPEC_INNER(item_flags) \
37 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
38 
39 /**
40  * Create Verbs flow counter with Verbs library.
41  *
42  * @param[in] dev
43  *   Pointer to the Ethernet device structure.
44  * @param[in, out] counter
45  *   mlx5 flow counter object, contains the counter id,
46  *   handle of created Verbs flow counter is returned
47  *   in cs field (if counters are supported).
48  *
49  * @return
50  *   0 On success else a negative errno value is returned
51  *   and rte_errno is set.
52  */
53 static int
54 flow_verbs_counter_create(struct rte_eth_dev *dev,
55 			  struct mlx5_flow_counter *counter)
56 {
57 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
58 	struct mlx5_priv *priv = dev->data->dev_private;
59 	struct ibv_context *ctx = priv->sh->ctx;
60 	struct ibv_counter_set_init_attr init = {
61 			 .counter_set_id = counter->id};
62 
63 	counter->cs = mlx5_glue->create_counter_set(ctx, &init);
64 	if (!counter->cs) {
65 		rte_errno = ENOTSUP;
66 		return -ENOTSUP;
67 	}
68 	return 0;
69 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
70 	struct mlx5_priv *priv = dev->data->dev_private;
71 	struct ibv_context *ctx = priv->sh->ctx;
72 	struct ibv_counters_init_attr init = {0};
73 	struct ibv_counter_attach_attr attach;
74 	int ret;
75 
76 	memset(&attach, 0, sizeof(attach));
77 	counter->cs = mlx5_glue->create_counters(ctx, &init);
78 	if (!counter->cs) {
79 		rte_errno = ENOTSUP;
80 		return -ENOTSUP;
81 	}
82 	attach.counter_desc = IBV_COUNTER_PACKETS;
83 	attach.index = 0;
84 	ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
85 	if (!ret) {
86 		attach.counter_desc = IBV_COUNTER_BYTES;
87 		attach.index = 1;
88 		ret = mlx5_glue->attach_counters
89 					(counter->cs, &attach, NULL);
90 	}
91 	if (ret) {
92 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
93 		counter->cs = NULL;
94 		rte_errno = ret;
95 		return -ret;
96 	}
97 	return 0;
98 #else
99 	(void)dev;
100 	(void)counter;
101 	rte_errno = ENOTSUP;
102 	return -ENOTSUP;
103 #endif
104 }
105 
106 /**
107  * Get a flow counter.
108  *
109  * @param[in] dev
110  *   Pointer to the Ethernet device structure.
111  * @param[in] shared
112  *   Indicate if this counter is shared with other flows.
113  * @param[in] id
114  *   Counter identifier.
115  *
116  * @return
117  *   A pointer to the counter, NULL otherwise and rte_errno is set.
118  */
119 static struct mlx5_flow_counter *
120 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
121 {
122 	struct mlx5_priv *priv = dev->data->dev_private;
123 	struct mlx5_flow_counter *cnt;
124 	int ret;
125 
126 	if (shared) {
127 		LIST_FOREACH(cnt, &priv->flow_counters, next) {
128 			if (cnt->shared && cnt->id == id) {
129 				cnt->ref_cnt++;
130 				return cnt;
131 			}
132 		}
133 	}
134 	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
135 	if (!cnt) {
136 		rte_errno = ENOMEM;
137 		return NULL;
138 	}
139 	cnt->id = id;
140 	cnt->shared = shared;
141 	cnt->ref_cnt = 1;
142 	cnt->hits = 0;
143 	cnt->bytes = 0;
144 	/* Create counter with Verbs. */
145 	ret = flow_verbs_counter_create(dev, cnt);
146 	if (!ret) {
147 		LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
148 		return cnt;
149 	}
150 	/* Some error occurred in Verbs library. */
151 	rte_free(cnt);
152 	rte_errno = -ret;
153 	return NULL;
154 }
155 
156 /**
157  * Release a flow counter.
158  *
159  * @param[in] counter
160  *   Pointer to the counter handler.
161  */
162 static void
163 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
164 {
165 	if (--counter->ref_cnt == 0) {
166 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
167 		claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
168 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
169 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
170 #endif
171 		LIST_REMOVE(counter, next);
172 		rte_free(counter);
173 	}
174 }
175 
176 /**
177  * Query a flow counter via Verbs library call.
178  *
179  * @see rte_flow_query()
180  * @see rte_flow_ops
181  */
182 static int
183 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
184 			 struct rte_flow *flow, void *data,
185 			 struct rte_flow_error *error)
186 {
187 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
188 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
189 	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
190 		struct rte_flow_query_count *qc = data;
191 		uint64_t counters[2] = {0, 0};
192 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
193 		struct ibv_query_counter_set_attr query_cs_attr = {
194 			.cs = flow->counter->cs,
195 			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
196 		};
197 		struct ibv_counter_set_data query_out = {
198 			.out = counters,
199 			.outlen = 2 * sizeof(uint64_t),
200 		};
201 		int err = mlx5_glue->query_counter_set(&query_cs_attr,
202 						       &query_out);
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204 		int err = mlx5_glue->query_counters
205 			       (flow->counter->cs, counters,
206 				RTE_DIM(counters),
207 				IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
208 #endif
209 		if (err)
210 			return rte_flow_error_set
211 				(error, err,
212 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
213 				 NULL,
214 				 "cannot read counter");
215 		qc->hits_set = 1;
216 		qc->bytes_set = 1;
217 		qc->hits = counters[0] - flow->counter->hits;
218 		qc->bytes = counters[1] - flow->counter->bytes;
219 		if (qc->reset) {
220 			flow->counter->hits = counters[0];
221 			flow->counter->bytes = counters[1];
222 		}
223 		return 0;
224 	}
225 	return rte_flow_error_set(error, EINVAL,
226 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
227 				  NULL,
228 				  "flow does not have counter");
229 #else
230 	(void)flow;
231 	(void)data;
232 	return rte_flow_error_set(error, ENOTSUP,
233 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
234 				  NULL,
235 				  "counters are not available");
236 #endif
237 }
238 
239 /**
240  * Add a verbs item specification into @p verbs.
241  *
242  * @param[out] verbs
243  *   Pointer to verbs structure.
244  * @param[in] src
245  *   Create specification.
246  * @param[in] size
247  *   Size in bytes of the specification to copy.
248  */
249 static void
250 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
251 {
252 	void *dst;
253 
254 	if (!verbs)
255 		return;
256 	assert(verbs->specs);
257 	dst = (void *)(verbs->specs + verbs->size);
258 	memcpy(dst, src, size);
259 	++verbs->attr->num_of_specs;
260 	verbs->size += size;
261 }
262 
263 /**
264  * Convert the @p item into a Verbs specification. This function assumes that
265  * the input is valid and that there is space to insert the requested item
266  * into the flow.
267  *
268  * @param[in, out] dev_flow
269  *   Pointer to dev_flow structure.
270  * @param[in] item
271  *   Item specification.
272  * @param[in] item_flags
273  *   Parsed item flags.
274  */
275 static void
276 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
277 			      const struct rte_flow_item *item,
278 			      uint64_t item_flags)
279 {
280 	const struct rte_flow_item_eth *spec = item->spec;
281 	const struct rte_flow_item_eth *mask = item->mask;
282 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
283 	struct ibv_flow_spec_eth eth = {
284 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
285 		.size = size,
286 	};
287 
288 	if (!mask)
289 		mask = &rte_flow_item_eth_mask;
290 	if (spec) {
291 		unsigned int i;
292 
293 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
294 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
295 		eth.val.ether_type = spec->type;
296 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
297 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
298 		eth.mask.ether_type = mask->type;
299 		/* Remove unwanted bits from values. */
300 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
301 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
302 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
303 		}
304 		eth.val.ether_type &= eth.mask.ether_type;
305 	}
306 	flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
307 }
308 
309 /**
310  * Update the VLAN tag in the Verbs Ethernet specification.
311  * This function assumes that the input is valid and there is space to add
312  * the requested item.
313  *
314  * @param[in, out] attr
315  *   Pointer to Verbs attributes structure.
316  * @param[in] eth
317  *   Verbs structure containing the VLAN information to copy.
318  */
319 static void
320 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
321 			    struct ibv_flow_spec_eth *eth)
322 {
323 	unsigned int i;
324 	const enum ibv_flow_spec_type search = eth->type;
325 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
326 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
327 
328 	for (i = 0; i != attr->num_of_specs; ++i) {
329 		if (hdr->type == search) {
330 			struct ibv_flow_spec_eth *e =
331 				(struct ibv_flow_spec_eth *)hdr;
332 
333 			e->val.vlan_tag = eth->val.vlan_tag;
334 			e->mask.vlan_tag = eth->mask.vlan_tag;
335 			e->val.ether_type = eth->val.ether_type;
336 			e->mask.ether_type = eth->mask.ether_type;
337 			break;
338 		}
339 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
340 	}
341 }
342 
343 /**
344  * Convert the @p item into a Verbs specification. This function assumes that
345  * the input is valid and that there is space to insert the requested item
346  * into the flow.
347  *
348  * @param[in, out] dev_flow
349  *   Pointer to dev_flow structure.
350  * @param[in] item
351  *   Item specification.
352  * @param[in] item_flags
353  *   Parsed item flags.
354  */
355 static void
356 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
357 			       const struct rte_flow_item *item,
358 			       uint64_t item_flags)
359 {
360 	const struct rte_flow_item_vlan *spec = item->spec;
361 	const struct rte_flow_item_vlan *mask = item->mask;
362 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
363 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
364 	struct ibv_flow_spec_eth eth = {
365 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
366 		.size = size,
367 	};
368 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
369 				      MLX5_FLOW_LAYER_OUTER_L2;
370 
371 	if (!mask)
372 		mask = &rte_flow_item_vlan_mask;
373 	if (spec) {
374 		eth.val.vlan_tag = spec->tci;
375 		eth.mask.vlan_tag = mask->tci;
376 		eth.val.vlan_tag &= eth.mask.vlan_tag;
377 		eth.val.ether_type = spec->inner_type;
378 		eth.mask.ether_type = mask->inner_type;
379 		eth.val.ether_type &= eth.mask.ether_type;
380 	}
381 	if (!(item_flags & l2m))
382 		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
383 	else
384 		flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
385 }
386 
387 /**
388  * Convert the @p item into a Verbs specification. This function assumes that
389  * the input is valid and that there is space to insert the requested item
390  * into the flow.
391  *
392  * @param[in, out] dev_flow
393  *   Pointer to dev_flow structure.
394  * @param[in] item
395  *   Item specification.
396  * @param[in] item_flags
397  *   Parsed item flags.
398  */
399 static void
400 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
401 			       const struct rte_flow_item *item,
402 			       uint64_t item_flags)
403 {
404 	const struct rte_flow_item_ipv4 *spec = item->spec;
405 	const struct rte_flow_item_ipv4 *mask = item->mask;
406 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
407 	struct ibv_flow_spec_ipv4_ext ipv4 = {
408 		.type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
409 		.size = size,
410 	};
411 
412 	if (!mask)
413 		mask = &rte_flow_item_ipv4_mask;
414 	if (spec) {
415 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
416 			.src_ip = spec->hdr.src_addr,
417 			.dst_ip = spec->hdr.dst_addr,
418 			.proto = spec->hdr.next_proto_id,
419 			.tos = spec->hdr.type_of_service,
420 		};
421 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
422 			.src_ip = mask->hdr.src_addr,
423 			.dst_ip = mask->hdr.dst_addr,
424 			.proto = mask->hdr.next_proto_id,
425 			.tos = mask->hdr.type_of_service,
426 		};
427 		/* Remove unwanted bits from values. */
428 		ipv4.val.src_ip &= ipv4.mask.src_ip;
429 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
430 		ipv4.val.proto &= ipv4.mask.proto;
431 		ipv4.val.tos &= ipv4.mask.tos;
432 	}
433 	flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
434 }
435 
436 /**
437  * Convert the @p item into a Verbs specification. This function assumes that
438  * the input is valid and that there is space to insert the requested item
439  * into the flow.
440  *
441  * @param[in, out] dev_flow
442  *   Pointer to dev_flow structure.
443  * @param[in] item
444  *   Item specification.
445  * @param[in] item_flags
446  *   Parsed item flags.
447  */
448 static void
449 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
450 			       const struct rte_flow_item *item,
451 			       uint64_t item_flags)
452 {
453 	const struct rte_flow_item_ipv6 *spec = item->spec;
454 	const struct rte_flow_item_ipv6 *mask = item->mask;
455 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
456 	struct ibv_flow_spec_ipv6 ipv6 = {
457 		.type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
458 		.size = size,
459 	};
460 
461 	if (!mask)
462 		mask = &rte_flow_item_ipv6_mask;
463 	if (spec) {
464 		unsigned int i;
465 		uint32_t vtc_flow_val;
466 		uint32_t vtc_flow_mask;
467 
468 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
469 		       RTE_DIM(ipv6.val.src_ip));
470 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
471 		       RTE_DIM(ipv6.val.dst_ip));
472 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
473 		       RTE_DIM(ipv6.mask.src_ip));
474 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
475 		       RTE_DIM(ipv6.mask.dst_ip));
476 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
477 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
478 		ipv6.val.flow_label =
479 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
480 					 IPV6_HDR_FL_SHIFT);
481 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
482 					 IPV6_HDR_TC_SHIFT;
483 		ipv6.val.next_hdr = spec->hdr.proto;
484 		ipv6.val.hop_limit = spec->hdr.hop_limits;
485 		ipv6.mask.flow_label =
486 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
487 					 IPV6_HDR_FL_SHIFT);
488 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
489 					  IPV6_HDR_TC_SHIFT;
490 		ipv6.mask.next_hdr = mask->hdr.proto;
491 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
492 		/* Remove unwanted bits from values. */
493 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
494 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
495 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
496 		}
497 		ipv6.val.flow_label &= ipv6.mask.flow_label;
498 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
499 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
500 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
501 	}
502 	flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
503 }
504 
505 /**
506  * Convert the @p item into a Verbs specification. This function assumes that
507  * the input is valid and that there is space to insert the requested item
508  * into the flow.
509  *
510  * @param[in, out] dev_flow
511  *   Pointer to dev_flow structure.
512  * @param[in] item
513  *   Item specification.
514  * @param[in] item_flags
515  *   Parsed item flags.
516  */
517 static void
518 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
519 			      const struct rte_flow_item *item,
520 			      uint64_t item_flags __rte_unused)
521 {
522 	const struct rte_flow_item_tcp *spec = item->spec;
523 	const struct rte_flow_item_tcp *mask = item->mask;
524 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
525 	struct ibv_flow_spec_tcp_udp tcp = {
526 		.type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
527 		.size = size,
528 	};
529 
530 	if (!mask)
531 		mask = &rte_flow_item_tcp_mask;
532 	if (spec) {
533 		tcp.val.dst_port = spec->hdr.dst_port;
534 		tcp.val.src_port = spec->hdr.src_port;
535 		tcp.mask.dst_port = mask->hdr.dst_port;
536 		tcp.mask.src_port = mask->hdr.src_port;
537 		/* Remove unwanted bits from values. */
538 		tcp.val.src_port &= tcp.mask.src_port;
539 		tcp.val.dst_port &= tcp.mask.dst_port;
540 	}
541 	flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
542 }
543 
544 /**
545  * Convert the @p item into a Verbs specification. This function assumes that
546  * the input is valid and that there is space to insert the requested item
547  * into the flow.
548  *
549  * @param[in, out] dev_flow
550  *   Pointer to dev_flow structure.
551  * @param[in] item
552  *   Item specification.
553  * @param[in] item_flags
554  *   Parsed item flags.
555  */
556 static void
557 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
558 			      const struct rte_flow_item *item,
559 			      uint64_t item_flags __rte_unused)
560 {
561 	const struct rte_flow_item_udp *spec = item->spec;
562 	const struct rte_flow_item_udp *mask = item->mask;
563 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
564 	struct ibv_flow_spec_tcp_udp udp = {
565 		.type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
566 		.size = size,
567 	};
568 
569 	if (!mask)
570 		mask = &rte_flow_item_udp_mask;
571 	if (spec) {
572 		udp.val.dst_port = spec->hdr.dst_port;
573 		udp.val.src_port = spec->hdr.src_port;
574 		udp.mask.dst_port = mask->hdr.dst_port;
575 		udp.mask.src_port = mask->hdr.src_port;
576 		/* Remove unwanted bits from values. */
577 		udp.val.src_port &= udp.mask.src_port;
578 		udp.val.dst_port &= udp.mask.dst_port;
579 	}
580 	flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
581 }
582 
583 /**
584  * Convert the @p item into a Verbs specification. This function assumes that
585  * the input is valid and that there is space to insert the requested item
586  * into the flow.
587  *
588  * @param[in, out] dev_flow
589  *   Pointer to dev_flow structure.
590  * @param[in] item
591  *   Item specification.
592  * @param[in] item_flags
593  *   Parsed item flags.
594  */
595 static void
596 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
597 				const struct rte_flow_item *item,
598 				uint64_t item_flags __rte_unused)
599 {
600 	const struct rte_flow_item_vxlan *spec = item->spec;
601 	const struct rte_flow_item_vxlan *mask = item->mask;
602 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
603 	struct ibv_flow_spec_tunnel vxlan = {
604 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
605 		.size = size,
606 	};
607 	union vni {
608 		uint32_t vlan_id;
609 		uint8_t vni[4];
610 	} id = { .vlan_id = 0, };
611 
612 	if (!mask)
613 		mask = &rte_flow_item_vxlan_mask;
614 	if (spec) {
615 		memcpy(&id.vni[1], spec->vni, 3);
616 		vxlan.val.tunnel_id = id.vlan_id;
617 		memcpy(&id.vni[1], mask->vni, 3);
618 		vxlan.mask.tunnel_id = id.vlan_id;
619 		/* Remove unwanted bits from values. */
620 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
621 	}
622 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
623 }
624 
625 /**
626  * Convert the @p item into a Verbs specification. This function assumes that
627  * the input is valid and that there is space to insert the requested item
628  * into the flow.
629  *
630  * @param[in, out] dev_flow
631  *   Pointer to dev_flow structure.
632  * @param[in] item
633  *   Item specification.
634  * @param[in] item_flags
635  *   Parsed item flags.
636  */
637 static void
638 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
639 				    const struct rte_flow_item *item,
640 				    uint64_t item_flags __rte_unused)
641 {
642 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
643 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
644 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
645 	struct ibv_flow_spec_tunnel vxlan_gpe = {
646 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
647 		.size = size,
648 	};
649 	union vni {
650 		uint32_t vlan_id;
651 		uint8_t vni[4];
652 	} id = { .vlan_id = 0, };
653 
654 	if (!mask)
655 		mask = &rte_flow_item_vxlan_gpe_mask;
656 	if (spec) {
657 		memcpy(&id.vni[1], spec->vni, 3);
658 		vxlan_gpe.val.tunnel_id = id.vlan_id;
659 		memcpy(&id.vni[1], mask->vni, 3);
660 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
661 		/* Remove unwanted bits from values. */
662 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
663 	}
664 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
665 }
666 
667 /**
668  * Update the protocol in Verbs IPv4/IPv6 spec.
669  *
670  * @param[in, out] attr
671  *   Pointer to Verbs attributes structure.
672  * @param[in] search
673  *   Specification type to search in order to update the IP protocol.
674  * @param[in] protocol
675  *   Protocol value to set if none is present in the specification.
676  */
677 static void
678 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
679 				       enum ibv_flow_spec_type search,
680 				       uint8_t protocol)
681 {
682 	unsigned int i;
683 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
684 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
685 
686 	if (!attr)
687 		return;
688 	for (i = 0; i != attr->num_of_specs; ++i) {
689 		if (hdr->type == search) {
690 			union {
691 				struct ibv_flow_spec_ipv4_ext *ipv4;
692 				struct ibv_flow_spec_ipv6 *ipv6;
693 			} ip;
694 
695 			switch (search) {
696 			case IBV_FLOW_SPEC_IPV4_EXT:
697 				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
698 				if (!ip.ipv4->val.proto) {
699 					ip.ipv4->val.proto = protocol;
700 					ip.ipv4->mask.proto = 0xff;
701 				}
702 				break;
703 			case IBV_FLOW_SPEC_IPV6:
704 				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
705 				if (!ip.ipv6->val.next_hdr) {
706 					ip.ipv6->val.next_hdr = protocol;
707 					ip.ipv6->mask.next_hdr = 0xff;
708 				}
709 				break;
710 			default:
711 				break;
712 			}
713 			break;
714 		}
715 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
716 	}
717 }
718 
719 /**
720  * Convert the @p item into a Verbs specification. This function assumes that
721  * the input is valid and that there is space to insert the requested item
722  * into the flow.
723  *
724  * @param[in, out] dev_flow
725  *   Pointer to dev_flow structure.
726  * @param[in] item
727  *   Item specification.
728  * @param[in] item_flags
729  *   Parsed item flags.
730  */
731 static void
732 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
733 			      const struct rte_flow_item *item __rte_unused,
734 			      uint64_t item_flags)
735 {
736 	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
737 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
738 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
739 	struct ibv_flow_spec_tunnel tunnel = {
740 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
741 		.size = size,
742 	};
743 #else
744 	const struct rte_flow_item_gre *spec = item->spec;
745 	const struct rte_flow_item_gre *mask = item->mask;
746 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
747 	struct ibv_flow_spec_gre tunnel = {
748 		.type = IBV_FLOW_SPEC_GRE,
749 		.size = size,
750 	};
751 
752 	if (!mask)
753 		mask = &rte_flow_item_gre_mask;
754 	if (spec) {
755 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
756 		tunnel.val.protocol = spec->protocol;
757 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
758 		tunnel.mask.protocol = mask->protocol;
759 		/* Remove unwanted bits from values. */
760 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
761 		tunnel.val.protocol &= tunnel.mask.protocol;
762 		tunnel.val.key &= tunnel.mask.key;
763 	}
764 #endif
765 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
766 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
767 						       IBV_FLOW_SPEC_IPV4_EXT,
768 						       IPPROTO_GRE);
769 	else
770 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
771 						       IBV_FLOW_SPEC_IPV6,
772 						       IPPROTO_GRE);
773 	flow_verbs_spec_add(verbs, &tunnel, size);
774 }
775 
776 /**
777  * Convert the @p action into a Verbs specification. This function assumes that
778  * the input is valid and that there is space to insert the requested action
779  * into the flow. This function also return the action that was added.
780  *
781  * @param[in, out] dev_flow
782  *   Pointer to dev_flow structure.
783  * @param[in] item
784  *   Item specification.
785  * @param[in] item_flags
786  *   Parsed item flags.
787  */
788 static void
789 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
790 			       const struct rte_flow_item *item __rte_unused,
791 			       uint64_t item_flags __rte_unused)
792 {
793 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
794 	const struct rte_flow_item_mpls *spec = item->spec;
795 	const struct rte_flow_item_mpls *mask = item->mask;
796 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
797 	struct ibv_flow_spec_mpls mpls = {
798 		.type = IBV_FLOW_SPEC_MPLS,
799 		.size = size,
800 	};
801 
802 	if (!mask)
803 		mask = &rte_flow_item_mpls_mask;
804 	if (spec) {
805 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
806 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
807 		/* Remove unwanted bits from values.  */
808 		mpls.val.label &= mpls.mask.label;
809 	}
810 	flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
811 #endif
812 }
813 
814 /**
815  * Convert the @p action into a Verbs specification. This function assumes that
816  * the input is valid and that there is space to insert the requested action
817  * into the flow.
818  *
819  * @param[in] dev_flow
820  *   Pointer to mlx5_flow.
821  * @param[in] action
822  *   Action configuration.
823  */
824 static void
825 flow_verbs_translate_action_drop
826 	(struct mlx5_flow *dev_flow,
827 	 const struct rte_flow_action *action __rte_unused)
828 {
829 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
830 	struct ibv_flow_spec_action_drop drop = {
831 			.type = IBV_FLOW_SPEC_ACTION_DROP,
832 			.size = size,
833 	};
834 
835 	flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
836 }
837 
838 /**
839  * Convert the @p action into a Verbs specification. This function assumes that
840  * the input is valid and that there is space to insert the requested action
841  * into the flow.
842  *
843  * @param[in] dev_flow
844  *   Pointer to mlx5_flow.
845  * @param[in] action
846  *   Action configuration.
847  */
848 static void
849 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
850 				  const struct rte_flow_action *action)
851 {
852 	const struct rte_flow_action_queue *queue = action->conf;
853 	struct rte_flow *flow = dev_flow->flow;
854 
855 	if (flow->queue)
856 		(*flow->queue)[0] = queue->index;
857 	flow->rss.queue_num = 1;
858 }
859 
860 /**
861  * Convert the @p action into a Verbs specification. This function assumes that
862  * the input is valid and that there is space to insert the requested action
863  * into the flow.
864  *
865  * @param[in] action
866  *   Action configuration.
867  * @param[in, out] action_flags
868  *   Pointer to the detected actions.
869  * @param[in] dev_flow
870  *   Pointer to mlx5_flow.
871  */
872 static void
873 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
874 				const struct rte_flow_action *action)
875 {
876 	const struct rte_flow_action_rss *rss = action->conf;
877 	const uint8_t *rss_key;
878 	struct rte_flow *flow = dev_flow->flow;
879 
880 	if (flow->queue)
881 		memcpy((*flow->queue), rss->queue,
882 		       rss->queue_num * sizeof(uint16_t));
883 	flow->rss.queue_num = rss->queue_num;
884 	/* NULL RSS key indicates default RSS key. */
885 	rss_key = !rss->key ? rss_hash_default_key : rss->key;
886 	memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
887 	/* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
888 	flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
889 	flow->rss.level = rss->level;
890 }
891 
892 /**
893  * Convert the @p action into a Verbs specification. This function assumes that
894  * the input is valid and that there is space to insert the requested action
895  * into the flow.
896  *
897  * @param[in] dev_flow
898  *   Pointer to mlx5_flow.
899  * @param[in] action
900  *   Action configuration.
901  */
902 static void
903 flow_verbs_translate_action_flag
904 	(struct mlx5_flow *dev_flow,
905 	 const struct rte_flow_action *action __rte_unused)
906 {
907 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
908 	struct ibv_flow_spec_action_tag tag = {
909 		.type = IBV_FLOW_SPEC_ACTION_TAG,
910 		.size = size,
911 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
912 	};
913 
914 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
915 }
916 
917 /**
918  * Convert the @p action into a Verbs specification. This function assumes that
919  * the input is valid and that there is space to insert the requested action
920  * into the flow.
921  *
922  * @param[in] dev_flow
923  *   Pointer to mlx5_flow.
924  * @param[in] action
925  *   Action configuration.
926  */
927 static void
928 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
929 				 const struct rte_flow_action *action)
930 {
931 	const struct rte_flow_action_mark *mark = action->conf;
932 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
933 	struct ibv_flow_spec_action_tag tag = {
934 		.type = IBV_FLOW_SPEC_ACTION_TAG,
935 		.size = size,
936 		.tag_id = mlx5_flow_mark_set(mark->id),
937 	};
938 
939 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
940 }
941 
942 /**
943  * Convert the @p action into a Verbs specification. This function assumes that
944  * the input is valid and that there is space to insert the requested action
945  * into the flow.
946  *
947  * @param[in] dev
948  *   Pointer to the Ethernet device structure.
949  * @param[in] action
950  *   Action configuration.
951  * @param[in] dev_flow
952  *   Pointer to mlx5_flow.
953  * @param[out] error
954  *   Pointer to error structure.
955  *
956  * @return
957  *   0 On success else a negative errno value is returned and rte_errno is set.
958  */
959 static int
960 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
961 				  const struct rte_flow_action *action,
962 				  struct rte_eth_dev *dev,
963 				  struct rte_flow_error *error)
964 {
965 	const struct rte_flow_action_count *count = action->conf;
966 	struct rte_flow *flow = dev_flow->flow;
967 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
968 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
969 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
970 	struct ibv_flow_spec_counter_action counter = {
971 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
972 		.size = size,
973 	};
974 #endif
975 
976 	if (!flow->counter) {
977 		flow->counter = flow_verbs_counter_new(dev, count->shared,
978 						       count->id);
979 		if (!flow->counter)
980 			return rte_flow_error_set(error, rte_errno,
981 						  RTE_FLOW_ERROR_TYPE_ACTION,
982 						  action,
983 						  "cannot get counter"
984 						  " context.");
985 	}
986 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
987 	counter.counter_set_handle = flow->counter->cs->handle;
988 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
989 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
990 	counter.counters = flow->counter->cs;
991 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
992 #endif
993 	return 0;
994 }
995 
996 /**
997  * Internal validation function. For validating both actions and items.
998  *
999  * @param[in] dev
1000  *   Pointer to the Ethernet device structure.
1001  * @param[in] attr
1002  *   Pointer to the flow attributes.
1003  * @param[in] items
1004  *   Pointer to the list of items.
1005  * @param[in] actions
1006  *   Pointer to the list of actions.
1007  * @param[out] error
1008  *   Pointer to the error structure.
1009  *
1010  * @return
1011  *   0 on success, a negative errno value otherwise and rte_errno is set.
1012  */
1013 static int
1014 flow_verbs_validate(struct rte_eth_dev *dev,
1015 		    const struct rte_flow_attr *attr,
1016 		    const struct rte_flow_item items[],
1017 		    const struct rte_flow_action actions[],
1018 		    struct rte_flow_error *error)
1019 {
1020 	int ret;
1021 	uint64_t action_flags = 0;
1022 	uint64_t item_flags = 0;
1023 	uint64_t last_item = 0;
1024 	uint8_t next_protocol = 0xff;
1025 
1026 	if (items == NULL)
1027 		return -1;
1028 	ret = mlx5_flow_validate_attributes(dev, attr, error);
1029 	if (ret < 0)
1030 		return ret;
1031 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1032 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1033 		int ret = 0;
1034 
1035 		switch (items->type) {
1036 		case RTE_FLOW_ITEM_TYPE_VOID:
1037 			break;
1038 		case RTE_FLOW_ITEM_TYPE_ETH:
1039 			ret = mlx5_flow_validate_item_eth(items, item_flags,
1040 							  error);
1041 			if (ret < 0)
1042 				return ret;
1043 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1044 					     MLX5_FLOW_LAYER_OUTER_L2;
1045 			break;
1046 		case RTE_FLOW_ITEM_TYPE_VLAN:
1047 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
1048 							   error);
1049 			if (ret < 0)
1050 				return ret;
1051 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1052 					      MLX5_FLOW_LAYER_INNER_VLAN) :
1053 					     (MLX5_FLOW_LAYER_OUTER_L2 |
1054 					      MLX5_FLOW_LAYER_OUTER_VLAN);
1055 			break;
1056 		case RTE_FLOW_ITEM_TYPE_IPV4:
1057 			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1058 							   NULL, error);
1059 			if (ret < 0)
1060 				return ret;
1061 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1062 					     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1063 			if (items->mask != NULL &&
1064 			    ((const struct rte_flow_item_ipv4 *)
1065 			     items->mask)->hdr.next_proto_id) {
1066 				next_protocol =
1067 					((const struct rte_flow_item_ipv4 *)
1068 					 (items->spec))->hdr.next_proto_id;
1069 				next_protocol &=
1070 					((const struct rte_flow_item_ipv4 *)
1071 					 (items->mask))->hdr.next_proto_id;
1072 			} else {
1073 				/* Reset for inner layer. */
1074 				next_protocol = 0xff;
1075 			}
1076 			break;
1077 		case RTE_FLOW_ITEM_TYPE_IPV6:
1078 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1079 							   NULL, error);
1080 			if (ret < 0)
1081 				return ret;
1082 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1083 					     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1084 			if (items->mask != NULL &&
1085 			    ((const struct rte_flow_item_ipv6 *)
1086 			     items->mask)->hdr.proto) {
1087 				next_protocol =
1088 					((const struct rte_flow_item_ipv6 *)
1089 					 items->spec)->hdr.proto;
1090 				next_protocol &=
1091 					((const struct rte_flow_item_ipv6 *)
1092 					 items->mask)->hdr.proto;
1093 			} else {
1094 				/* Reset for inner layer. */
1095 				next_protocol = 0xff;
1096 			}
1097 			break;
1098 		case RTE_FLOW_ITEM_TYPE_UDP:
1099 			ret = mlx5_flow_validate_item_udp(items, item_flags,
1100 							  next_protocol,
1101 							  error);
1102 			if (ret < 0)
1103 				return ret;
1104 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1105 					     MLX5_FLOW_LAYER_OUTER_L4_UDP;
1106 			break;
1107 		case RTE_FLOW_ITEM_TYPE_TCP:
1108 			ret = mlx5_flow_validate_item_tcp
1109 						(items, item_flags,
1110 						 next_protocol,
1111 						 &rte_flow_item_tcp_mask,
1112 						 error);
1113 			if (ret < 0)
1114 				return ret;
1115 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1116 					     MLX5_FLOW_LAYER_OUTER_L4_TCP;
1117 			break;
1118 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1119 			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1120 							    error);
1121 			if (ret < 0)
1122 				return ret;
1123 			last_item = MLX5_FLOW_LAYER_VXLAN;
1124 			break;
1125 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1126 			ret = mlx5_flow_validate_item_vxlan_gpe(items,
1127 								item_flags,
1128 								dev, error);
1129 			if (ret < 0)
1130 				return ret;
1131 			last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1132 			break;
1133 		case RTE_FLOW_ITEM_TYPE_GRE:
1134 			ret = mlx5_flow_validate_item_gre(items, item_flags,
1135 							  next_protocol, error);
1136 			if (ret < 0)
1137 				return ret;
1138 			last_item = MLX5_FLOW_LAYER_GRE;
1139 			break;
1140 		case RTE_FLOW_ITEM_TYPE_MPLS:
1141 			ret = mlx5_flow_validate_item_mpls(dev, items,
1142 							   item_flags,
1143 							   last_item, error);
1144 			if (ret < 0)
1145 				return ret;
1146 			last_item = MLX5_FLOW_LAYER_MPLS;
1147 			break;
1148 		default:
1149 			return rte_flow_error_set(error, ENOTSUP,
1150 						  RTE_FLOW_ERROR_TYPE_ITEM,
1151 						  NULL, "item not supported");
1152 		}
1153 		item_flags |= last_item;
1154 	}
1155 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1156 		switch (actions->type) {
1157 		case RTE_FLOW_ACTION_TYPE_VOID:
1158 			break;
1159 		case RTE_FLOW_ACTION_TYPE_FLAG:
1160 			ret = mlx5_flow_validate_action_flag(action_flags,
1161 							     attr,
1162 							     error);
1163 			if (ret < 0)
1164 				return ret;
1165 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1166 			break;
1167 		case RTE_FLOW_ACTION_TYPE_MARK:
1168 			ret = mlx5_flow_validate_action_mark(actions,
1169 							     action_flags,
1170 							     attr,
1171 							     error);
1172 			if (ret < 0)
1173 				return ret;
1174 			action_flags |= MLX5_FLOW_ACTION_MARK;
1175 			break;
1176 		case RTE_FLOW_ACTION_TYPE_DROP:
1177 			ret = mlx5_flow_validate_action_drop(action_flags,
1178 							     attr,
1179 							     error);
1180 			if (ret < 0)
1181 				return ret;
1182 			action_flags |= MLX5_FLOW_ACTION_DROP;
1183 			break;
1184 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1185 			ret = mlx5_flow_validate_action_queue(actions,
1186 							      action_flags, dev,
1187 							      attr,
1188 							      error);
1189 			if (ret < 0)
1190 				return ret;
1191 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1192 			break;
1193 		case RTE_FLOW_ACTION_TYPE_RSS:
1194 			ret = mlx5_flow_validate_action_rss(actions,
1195 							    action_flags, dev,
1196 							    attr,
1197 							    error);
1198 			if (ret < 0)
1199 				return ret;
1200 			action_flags |= MLX5_FLOW_ACTION_RSS;
1201 			break;
1202 		case RTE_FLOW_ACTION_TYPE_COUNT:
1203 			ret = mlx5_flow_validate_action_count(dev, attr, error);
1204 			if (ret < 0)
1205 				return ret;
1206 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1207 			break;
1208 		default:
1209 			return rte_flow_error_set(error, ENOTSUP,
1210 						  RTE_FLOW_ERROR_TYPE_ACTION,
1211 						  actions,
1212 						  "action not supported");
1213 		}
1214 	}
1215 	if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1216 		return rte_flow_error_set(error, EINVAL,
1217 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
1218 					  "no fate action is found");
1219 	return 0;
1220 }
1221 
1222 /**
1223  * Calculate the required bytes that are needed for the action part of the verbs
1224  * flow.
1225  *
1226  * @param[in] actions
1227  *   Pointer to the list of actions.
1228  *
1229  * @return
1230  *   The size of the memory needed for all actions.
1231  */
1232 static int
1233 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1234 {
1235 	int size = 0;
1236 
1237 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1238 		switch (actions->type) {
1239 		case RTE_FLOW_ACTION_TYPE_VOID:
1240 			break;
1241 		case RTE_FLOW_ACTION_TYPE_FLAG:
1242 			size += sizeof(struct ibv_flow_spec_action_tag);
1243 			break;
1244 		case RTE_FLOW_ACTION_TYPE_MARK:
1245 			size += sizeof(struct ibv_flow_spec_action_tag);
1246 			break;
1247 		case RTE_FLOW_ACTION_TYPE_DROP:
1248 			size += sizeof(struct ibv_flow_spec_action_drop);
1249 			break;
1250 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1251 			break;
1252 		case RTE_FLOW_ACTION_TYPE_RSS:
1253 			break;
1254 		case RTE_FLOW_ACTION_TYPE_COUNT:
1255 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1256 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1257 			size += sizeof(struct ibv_flow_spec_counter_action);
1258 #endif
1259 			break;
1260 		default:
1261 			break;
1262 		}
1263 	}
1264 	return size;
1265 }
1266 
1267 /**
1268  * Calculate the required bytes that are needed for the item part of the verbs
1269  * flow.
1270  *
1271  * @param[in] items
1272  *   Pointer to the list of items.
1273  *
1274  * @return
1275  *   The size of the memory needed for all items.
1276  */
1277 static int
1278 flow_verbs_get_items_size(const struct rte_flow_item items[])
1279 {
1280 	int size = 0;
1281 
1282 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1283 		switch (items->type) {
1284 		case RTE_FLOW_ITEM_TYPE_VOID:
1285 			break;
1286 		case RTE_FLOW_ITEM_TYPE_ETH:
1287 			size += sizeof(struct ibv_flow_spec_eth);
1288 			break;
1289 		case RTE_FLOW_ITEM_TYPE_VLAN:
1290 			size += sizeof(struct ibv_flow_spec_eth);
1291 			break;
1292 		case RTE_FLOW_ITEM_TYPE_IPV4:
1293 			size += sizeof(struct ibv_flow_spec_ipv4_ext);
1294 			break;
1295 		case RTE_FLOW_ITEM_TYPE_IPV6:
1296 			size += sizeof(struct ibv_flow_spec_ipv6);
1297 			break;
1298 		case RTE_FLOW_ITEM_TYPE_UDP:
1299 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1300 			break;
1301 		case RTE_FLOW_ITEM_TYPE_TCP:
1302 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1303 			break;
1304 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1305 			size += sizeof(struct ibv_flow_spec_tunnel);
1306 			break;
1307 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1308 			size += sizeof(struct ibv_flow_spec_tunnel);
1309 			break;
1310 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1311 		case RTE_FLOW_ITEM_TYPE_GRE:
1312 			size += sizeof(struct ibv_flow_spec_gre);
1313 			break;
1314 		case RTE_FLOW_ITEM_TYPE_MPLS:
1315 			size += sizeof(struct ibv_flow_spec_mpls);
1316 			break;
1317 #else
1318 		case RTE_FLOW_ITEM_TYPE_GRE:
1319 			size += sizeof(struct ibv_flow_spec_tunnel);
1320 			break;
1321 #endif
1322 		default:
1323 			break;
1324 		}
1325 	}
1326 	return size;
1327 }
1328 
1329 /**
1330  * Internal preparation function. Allocate mlx5_flow with the required size.
1331  * The required size is calculate based on the actions and items. This function
1332  * also returns the detected actions and items for later use.
1333  *
1334  * @param[in] attr
1335  *   Pointer to the flow attributes.
1336  * @param[in] items
1337  *   Pointer to the list of items.
1338  * @param[in] actions
1339  *   Pointer to the list of actions.
1340  * @param[out] error
1341  *   Pointer to the error structure.
1342  *
1343  * @return
1344  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1345  *   is set.
1346  */
1347 static struct mlx5_flow *
1348 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1349 		   const struct rte_flow_item items[],
1350 		   const struct rte_flow_action actions[],
1351 		   struct rte_flow_error *error)
1352 {
1353 	uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1354 	struct mlx5_flow *flow;
1355 
1356 	size += flow_verbs_get_actions_size(actions);
1357 	size += flow_verbs_get_items_size(items);
1358 	flow = rte_calloc(__func__, 1, size, 0);
1359 	if (!flow) {
1360 		rte_flow_error_set(error, ENOMEM,
1361 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1362 				   "not enough memory to create flow");
1363 		return NULL;
1364 	}
1365 	flow->verbs.attr = (void *)(flow + 1);
1366 	flow->verbs.specs =
1367 		(uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1368 	return flow;
1369 }
1370 
1371 /**
1372  * Fill the flow with verb spec.
1373  *
1374  * @param[in] dev
1375  *   Pointer to Ethernet device.
1376  * @param[in, out] dev_flow
1377  *   Pointer to the mlx5 flow.
1378  * @param[in] attr
1379  *   Pointer to the flow attributes.
1380  * @param[in] items
1381  *   Pointer to the list of items.
1382  * @param[in] actions
1383  *   Pointer to the list of actions.
1384  * @param[out] error
1385  *   Pointer to the error structure.
1386  *
1387  * @return
1388  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1389  */
1390 static int
1391 flow_verbs_translate(struct rte_eth_dev *dev,
1392 		     struct mlx5_flow *dev_flow,
1393 		     const struct rte_flow_attr *attr,
1394 		     const struct rte_flow_item items[],
1395 		     const struct rte_flow_action actions[],
1396 		     struct rte_flow_error *error)
1397 {
1398 	struct rte_flow *flow = dev_flow->flow;
1399 	uint64_t item_flags = 0;
1400 	uint64_t action_flags = 0;
1401 	uint64_t priority = attr->priority;
1402 	uint32_t subpriority = 0;
1403 	struct mlx5_priv *priv = dev->data->dev_private;
1404 
1405 	if (priority == MLX5_FLOW_PRIO_RSVD)
1406 		priority = priv->config.flow_prio - 1;
1407 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1408 		int ret;
1409 
1410 		switch (actions->type) {
1411 		case RTE_FLOW_ACTION_TYPE_VOID:
1412 			break;
1413 		case RTE_FLOW_ACTION_TYPE_FLAG:
1414 			flow_verbs_translate_action_flag(dev_flow, actions);
1415 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1416 			break;
1417 		case RTE_FLOW_ACTION_TYPE_MARK:
1418 			flow_verbs_translate_action_mark(dev_flow, actions);
1419 			action_flags |= MLX5_FLOW_ACTION_MARK;
1420 			break;
1421 		case RTE_FLOW_ACTION_TYPE_DROP:
1422 			flow_verbs_translate_action_drop(dev_flow, actions);
1423 			action_flags |= MLX5_FLOW_ACTION_DROP;
1424 			break;
1425 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1426 			flow_verbs_translate_action_queue(dev_flow, actions);
1427 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1428 			break;
1429 		case RTE_FLOW_ACTION_TYPE_RSS:
1430 			flow_verbs_translate_action_rss(dev_flow, actions);
1431 			action_flags |= MLX5_FLOW_ACTION_RSS;
1432 			break;
1433 		case RTE_FLOW_ACTION_TYPE_COUNT:
1434 			ret = flow_verbs_translate_action_count(dev_flow,
1435 								actions,
1436 								dev, error);
1437 			if (ret < 0)
1438 				return ret;
1439 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1440 			break;
1441 		default:
1442 			return rte_flow_error_set(error, ENOTSUP,
1443 						  RTE_FLOW_ERROR_TYPE_ACTION,
1444 						  actions,
1445 						  "action not supported");
1446 		}
1447 	}
1448 	flow->actions = action_flags;
1449 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1450 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1451 
1452 		switch (items->type) {
1453 		case RTE_FLOW_ITEM_TYPE_VOID:
1454 			break;
1455 		case RTE_FLOW_ITEM_TYPE_ETH:
1456 			flow_verbs_translate_item_eth(dev_flow, items,
1457 						      item_flags);
1458 			subpriority = MLX5_PRIORITY_MAP_L2;
1459 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1460 					       MLX5_FLOW_LAYER_OUTER_L2;
1461 			break;
1462 		case RTE_FLOW_ITEM_TYPE_VLAN:
1463 			flow_verbs_translate_item_vlan(dev_flow, items,
1464 						       item_flags);
1465 			subpriority = MLX5_PRIORITY_MAP_L2;
1466 			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1467 						MLX5_FLOW_LAYER_INNER_VLAN) :
1468 					       (MLX5_FLOW_LAYER_OUTER_L2 |
1469 						MLX5_FLOW_LAYER_OUTER_VLAN);
1470 			break;
1471 		case RTE_FLOW_ITEM_TYPE_IPV4:
1472 			flow_verbs_translate_item_ipv4(dev_flow, items,
1473 						       item_flags);
1474 			subpriority = MLX5_PRIORITY_MAP_L3;
1475 			dev_flow->verbs.hash_fields |=
1476 				mlx5_flow_hashfields_adjust
1477 					(dev_flow, tunnel,
1478 					 MLX5_IPV4_LAYER_TYPES,
1479 					 MLX5_IPV4_IBV_RX_HASH);
1480 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1481 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1482 			break;
1483 		case RTE_FLOW_ITEM_TYPE_IPV6:
1484 			flow_verbs_translate_item_ipv6(dev_flow, items,
1485 						       item_flags);
1486 			subpriority = MLX5_PRIORITY_MAP_L3;
1487 			dev_flow->verbs.hash_fields |=
1488 				mlx5_flow_hashfields_adjust
1489 					(dev_flow, tunnel,
1490 					 MLX5_IPV6_LAYER_TYPES,
1491 					 MLX5_IPV6_IBV_RX_HASH);
1492 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1493 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1494 			break;
1495 		case RTE_FLOW_ITEM_TYPE_TCP:
1496 			flow_verbs_translate_item_tcp(dev_flow, items,
1497 						      item_flags);
1498 			subpriority = MLX5_PRIORITY_MAP_L4;
1499 			dev_flow->verbs.hash_fields |=
1500 				mlx5_flow_hashfields_adjust
1501 					(dev_flow, tunnel, ETH_RSS_TCP,
1502 					 (IBV_RX_HASH_SRC_PORT_TCP |
1503 					  IBV_RX_HASH_DST_PORT_TCP));
1504 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1505 					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
1506 			break;
1507 		case RTE_FLOW_ITEM_TYPE_UDP:
1508 			flow_verbs_translate_item_udp(dev_flow, items,
1509 						      item_flags);
1510 			subpriority = MLX5_PRIORITY_MAP_L4;
1511 			dev_flow->verbs.hash_fields |=
1512 				mlx5_flow_hashfields_adjust
1513 					(dev_flow, tunnel, ETH_RSS_UDP,
1514 					 (IBV_RX_HASH_SRC_PORT_UDP |
1515 					  IBV_RX_HASH_DST_PORT_UDP));
1516 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1517 					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
1518 			break;
1519 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1520 			flow_verbs_translate_item_vxlan(dev_flow, items,
1521 							item_flags);
1522 			subpriority = MLX5_PRIORITY_MAP_L2;
1523 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
1524 			break;
1525 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1526 			flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1527 							    item_flags);
1528 			subpriority = MLX5_PRIORITY_MAP_L2;
1529 			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1530 			break;
1531 		case RTE_FLOW_ITEM_TYPE_GRE:
1532 			flow_verbs_translate_item_gre(dev_flow, items,
1533 						      item_flags);
1534 			subpriority = MLX5_PRIORITY_MAP_L2;
1535 			item_flags |= MLX5_FLOW_LAYER_GRE;
1536 			break;
1537 		case RTE_FLOW_ITEM_TYPE_MPLS:
1538 			flow_verbs_translate_item_mpls(dev_flow, items,
1539 						       item_flags);
1540 			subpriority = MLX5_PRIORITY_MAP_L2;
1541 			item_flags |= MLX5_FLOW_LAYER_MPLS;
1542 			break;
1543 		default:
1544 			return rte_flow_error_set(error, ENOTSUP,
1545 						  RTE_FLOW_ERROR_TYPE_ITEM,
1546 						  NULL,
1547 						  "item not supported");
1548 		}
1549 	}
1550 	dev_flow->layers = item_flags;
1551 	dev_flow->verbs.attr->priority =
1552 		mlx5_flow_adjust_priority(dev, priority, subpriority);
1553 	dev_flow->verbs.attr->port = (uint8_t)priv->ibv_port;
1554 	return 0;
1555 }
1556 
1557 /**
1558  * Remove the flow from the NIC but keeps it in memory.
1559  *
1560  * @param[in] dev
1561  *   Pointer to the Ethernet device structure.
1562  * @param[in, out] flow
1563  *   Pointer to flow structure.
1564  */
1565 static void
1566 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1567 {
1568 	struct mlx5_flow_verbs *verbs;
1569 	struct mlx5_flow *dev_flow;
1570 
1571 	if (!flow)
1572 		return;
1573 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1574 		verbs = &dev_flow->verbs;
1575 		if (verbs->flow) {
1576 			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1577 			verbs->flow = NULL;
1578 		}
1579 		if (verbs->hrxq) {
1580 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1581 				mlx5_hrxq_drop_release(dev);
1582 			else
1583 				mlx5_hrxq_release(dev, verbs->hrxq);
1584 			verbs->hrxq = NULL;
1585 		}
1586 	}
1587 }
1588 
1589 /**
1590  * Remove the flow from the NIC and the memory.
1591  *
1592  * @param[in] dev
1593  *   Pointer to the Ethernet device structure.
1594  * @param[in, out] flow
1595  *   Pointer to flow structure.
1596  */
1597 static void
1598 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1599 {
1600 	struct mlx5_flow *dev_flow;
1601 
1602 	if (!flow)
1603 		return;
1604 	flow_verbs_remove(dev, flow);
1605 	while (!LIST_EMPTY(&flow->dev_flows)) {
1606 		dev_flow = LIST_FIRST(&flow->dev_flows);
1607 		LIST_REMOVE(dev_flow, next);
1608 		rte_free(dev_flow);
1609 	}
1610 	if (flow->counter) {
1611 		flow_verbs_counter_release(flow->counter);
1612 		flow->counter = NULL;
1613 	}
1614 }
1615 
1616 /**
1617  * Apply the flow to the NIC.
1618  *
1619  * @param[in] dev
1620  *   Pointer to the Ethernet device structure.
1621  * @param[in, out] flow
1622  *   Pointer to flow structure.
1623  * @param[out] error
1624  *   Pointer to error structure.
1625  *
1626  * @return
1627  *   0 on success, a negative errno value otherwise and rte_errno is set.
1628  */
1629 static int
1630 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1631 		 struct rte_flow_error *error)
1632 {
1633 	struct mlx5_flow_verbs *verbs;
1634 	struct mlx5_flow *dev_flow;
1635 	int err;
1636 
1637 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1638 		verbs = &dev_flow->verbs;
1639 		if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1640 			verbs->hrxq = mlx5_hrxq_drop_new(dev);
1641 			if (!verbs->hrxq) {
1642 				rte_flow_error_set
1643 					(error, errno,
1644 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1645 					 "cannot get drop hash queue");
1646 				goto error;
1647 			}
1648 		} else {
1649 			struct mlx5_hrxq *hrxq;
1650 
1651 			hrxq = mlx5_hrxq_get(dev, flow->key,
1652 					     MLX5_RSS_HASH_KEY_LEN,
1653 					     verbs->hash_fields,
1654 					     (*flow->queue),
1655 					     flow->rss.queue_num);
1656 			if (!hrxq)
1657 				hrxq = mlx5_hrxq_new(dev, flow->key,
1658 						     MLX5_RSS_HASH_KEY_LEN,
1659 						     verbs->hash_fields,
1660 						     (*flow->queue),
1661 						     flow->rss.queue_num,
1662 						     !!(dev_flow->layers &
1663 						      MLX5_FLOW_LAYER_TUNNEL));
1664 			if (!hrxq) {
1665 				rte_flow_error_set
1666 					(error, rte_errno,
1667 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1668 					 "cannot get hash queue");
1669 				goto error;
1670 			}
1671 			verbs->hrxq = hrxq;
1672 		}
1673 		verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1674 						     verbs->attr);
1675 		if (!verbs->flow) {
1676 			rte_flow_error_set(error, errno,
1677 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1678 					   NULL,
1679 					   "hardware refuses to create flow");
1680 			goto error;
1681 		}
1682 	}
1683 	return 0;
1684 error:
1685 	err = rte_errno; /* Save rte_errno before cleanup. */
1686 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1687 		verbs = &dev_flow->verbs;
1688 		if (verbs->hrxq) {
1689 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1690 				mlx5_hrxq_drop_release(dev);
1691 			else
1692 				mlx5_hrxq_release(dev, verbs->hrxq);
1693 			verbs->hrxq = NULL;
1694 		}
1695 	}
1696 	rte_errno = err; /* Restore rte_errno. */
1697 	return -rte_errno;
1698 }
1699 
1700 /**
1701  * Query a flow.
1702  *
1703  * @see rte_flow_query()
1704  * @see rte_flow_ops
1705  */
1706 static int
1707 flow_verbs_query(struct rte_eth_dev *dev,
1708 		 struct rte_flow *flow,
1709 		 const struct rte_flow_action *actions,
1710 		 void *data,
1711 		 struct rte_flow_error *error)
1712 {
1713 	int ret = -EINVAL;
1714 
1715 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1716 		switch (actions->type) {
1717 		case RTE_FLOW_ACTION_TYPE_VOID:
1718 			break;
1719 		case RTE_FLOW_ACTION_TYPE_COUNT:
1720 			ret = flow_verbs_counter_query(dev, flow, data, error);
1721 			break;
1722 		default:
1723 			return rte_flow_error_set(error, ENOTSUP,
1724 						  RTE_FLOW_ERROR_TYPE_ACTION,
1725 						  actions,
1726 						  "action not supported");
1727 		}
1728 	}
1729 	return ret;
1730 }
1731 
1732 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1733 	.validate = flow_verbs_validate,
1734 	.prepare = flow_verbs_prepare,
1735 	.translate = flow_verbs_translate,
1736 	.apply = flow_verbs_apply,
1737 	.remove = flow_verbs_remove,
1738 	.destroy = flow_verbs_destroy,
1739 	.query = flow_verbs_query,
1740 };
1741