xref: /dpdk/drivers/net/mlx5/mlx5_flow_verbs.c (revision 5ecb687a5698d2d8ec1f3b3b5a7a16bceca3e29c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4 
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20 
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29 
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_flow.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_prm.h"
35 #include "mlx5_rxtx.h"
36 
37 #define VERBS_SPEC_INNER(item_flags) \
38 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
39 
40 /**
41  * Create Verbs flow counter with Verbs library.
42  *
43  * @param[in] dev
44  *   Pointer to the Ethernet device structure.
45  * @param[in, out] counter
46  *   mlx5 flow counter object, contains the counter id,
47  *   handle of created Verbs flow counter is returned
48  *   in cs field (if counters are supported).
49  *
50  * @return
51  *   0 On success else a negative errno value is returned
52  *   and rte_errno is set.
53  */
54 static int
55 flow_verbs_counter_create(struct rte_eth_dev *dev,
56 			  struct mlx5_flow_counter *counter)
57 {
58 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
59 	struct mlx5_priv *priv = dev->data->dev_private;
60 	struct ibv_context *ctx = priv->sh->ctx;
61 	struct ibv_counter_set_init_attr init = {
62 			 .counter_set_id = counter->id};
63 
64 	counter->cs = mlx5_glue->create_counter_set(ctx, &init);
65 	if (!counter->cs) {
66 		rte_errno = ENOTSUP;
67 		return -ENOTSUP;
68 	}
69 	return 0;
70 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
71 	struct mlx5_priv *priv = dev->data->dev_private;
72 	struct ibv_context *ctx = priv->sh->ctx;
73 	struct ibv_counters_init_attr init = {0};
74 	struct ibv_counter_attach_attr attach;
75 	int ret;
76 
77 	memset(&attach, 0, sizeof(attach));
78 	counter->cs = mlx5_glue->create_counters(ctx, &init);
79 	if (!counter->cs) {
80 		rte_errno = ENOTSUP;
81 		return -ENOTSUP;
82 	}
83 	attach.counter_desc = IBV_COUNTER_PACKETS;
84 	attach.index = 0;
85 	ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
86 	if (!ret) {
87 		attach.counter_desc = IBV_COUNTER_BYTES;
88 		attach.index = 1;
89 		ret = mlx5_glue->attach_counters
90 					(counter->cs, &attach, NULL);
91 	}
92 	if (ret) {
93 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
94 		counter->cs = NULL;
95 		rte_errno = ret;
96 		return -ret;
97 	}
98 	return 0;
99 #else
100 	(void)dev;
101 	(void)counter;
102 	rte_errno = ENOTSUP;
103 	return -ENOTSUP;
104 #endif
105 }
106 
107 /**
108  * Get a flow counter.
109  *
110  * @param[in] dev
111  *   Pointer to the Ethernet device structure.
112  * @param[in] shared
113  *   Indicate if this counter is shared with other flows.
114  * @param[in] id
115  *   Counter identifier.
116  *
117  * @return
118  *   A pointer to the counter, NULL otherwise and rte_errno is set.
119  */
120 static struct mlx5_flow_counter *
121 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
122 {
123 	struct mlx5_priv *priv = dev->data->dev_private;
124 	struct mlx5_flow_counter *cnt;
125 	int ret;
126 
127 	if (shared) {
128 		LIST_FOREACH(cnt, &priv->flow_counters, next) {
129 			if (cnt->shared && cnt->id == id) {
130 				cnt->ref_cnt++;
131 				return cnt;
132 			}
133 		}
134 	}
135 	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
136 	if (!cnt) {
137 		rte_errno = ENOMEM;
138 		return NULL;
139 	}
140 	cnt->id = id;
141 	cnt->shared = shared;
142 	cnt->ref_cnt = 1;
143 	cnt->hits = 0;
144 	cnt->bytes = 0;
145 	/* Create counter with Verbs. */
146 	ret = flow_verbs_counter_create(dev, cnt);
147 	if (!ret) {
148 		LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
149 		return cnt;
150 	}
151 	/* Some error occurred in Verbs library. */
152 	rte_free(cnt);
153 	rte_errno = -ret;
154 	return NULL;
155 }
156 
157 /**
158  * Release a flow counter.
159  *
160  * @param[in] counter
161  *   Pointer to the counter handler.
162  */
163 static void
164 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
165 {
166 	if (--counter->ref_cnt == 0) {
167 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
168 		claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
169 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
170 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
171 #endif
172 		LIST_REMOVE(counter, next);
173 		rte_free(counter);
174 	}
175 }
176 
177 /**
178  * Query a flow counter via Verbs library call.
179  *
180  * @see rte_flow_query()
181  * @see rte_flow_ops
182  */
183 static int
184 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
185 			 struct rte_flow *flow, void *data,
186 			 struct rte_flow_error *error)
187 {
188 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
189 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
190 	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
191 		struct rte_flow_query_count *qc = data;
192 		uint64_t counters[2] = {0, 0};
193 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
194 		struct ibv_query_counter_set_attr query_cs_attr = {
195 			.cs = flow->counter->cs,
196 			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
197 		};
198 		struct ibv_counter_set_data query_out = {
199 			.out = counters,
200 			.outlen = 2 * sizeof(uint64_t),
201 		};
202 		int err = mlx5_glue->query_counter_set(&query_cs_attr,
203 						       &query_out);
204 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
205 		int err = mlx5_glue->query_counters
206 			       (flow->counter->cs, counters,
207 				RTE_DIM(counters),
208 				IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
209 #endif
210 		if (err)
211 			return rte_flow_error_set
212 				(error, err,
213 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
214 				 NULL,
215 				 "cannot read counter");
216 		qc->hits_set = 1;
217 		qc->bytes_set = 1;
218 		qc->hits = counters[0] - flow->counter->hits;
219 		qc->bytes = counters[1] - flow->counter->bytes;
220 		if (qc->reset) {
221 			flow->counter->hits = counters[0];
222 			flow->counter->bytes = counters[1];
223 		}
224 		return 0;
225 	}
226 	return rte_flow_error_set(error, EINVAL,
227 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
228 				  NULL,
229 				  "flow does not have counter");
230 #else
231 	(void)flow;
232 	(void)data;
233 	return rte_flow_error_set(error, ENOTSUP,
234 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
235 				  NULL,
236 				  "counters are not available");
237 #endif
238 }
239 
240 /**
241  * Add a verbs item specification into @p verbs.
242  *
243  * @param[out] verbs
244  *   Pointer to verbs structure.
245  * @param[in] src
246  *   Create specification.
247  * @param[in] size
248  *   Size in bytes of the specification to copy.
249  */
250 static void
251 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
252 {
253 	void *dst;
254 
255 	if (!verbs)
256 		return;
257 	assert(verbs->specs);
258 	dst = (void *)(verbs->specs + verbs->size);
259 	memcpy(dst, src, size);
260 	++verbs->attr->num_of_specs;
261 	verbs->size += size;
262 }
263 
264 /**
265  * Convert the @p item into a Verbs specification. This function assumes that
266  * the input is valid and that there is space to insert the requested item
267  * into the flow.
268  *
269  * @param[in, out] dev_flow
270  *   Pointer to dev_flow structure.
271  * @param[in] item
272  *   Item specification.
273  * @param[in] item_flags
274  *   Parsed item flags.
275  */
276 static void
277 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
278 			      const struct rte_flow_item *item,
279 			      uint64_t item_flags)
280 {
281 	const struct rte_flow_item_eth *spec = item->spec;
282 	const struct rte_flow_item_eth *mask = item->mask;
283 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
284 	struct ibv_flow_spec_eth eth = {
285 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
286 		.size = size,
287 	};
288 
289 	if (!mask)
290 		mask = &rte_flow_item_eth_mask;
291 	if (spec) {
292 		unsigned int i;
293 
294 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
295 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
296 		eth.val.ether_type = spec->type;
297 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
298 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
299 		eth.mask.ether_type = mask->type;
300 		/* Remove unwanted bits from values. */
301 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
302 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
303 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
304 		}
305 		eth.val.ether_type &= eth.mask.ether_type;
306 	}
307 	flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
308 }
309 
310 /**
311  * Update the VLAN tag in the Verbs Ethernet specification.
312  * This function assumes that the input is valid and there is space to add
313  * the requested item.
314  *
315  * @param[in, out] attr
316  *   Pointer to Verbs attributes structure.
317  * @param[in] eth
318  *   Verbs structure containing the VLAN information to copy.
319  */
320 static void
321 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
322 			    struct ibv_flow_spec_eth *eth)
323 {
324 	unsigned int i;
325 	const enum ibv_flow_spec_type search = eth->type;
326 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
327 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
328 
329 	for (i = 0; i != attr->num_of_specs; ++i) {
330 		if (hdr->type == search) {
331 			struct ibv_flow_spec_eth *e =
332 				(struct ibv_flow_spec_eth *)hdr;
333 
334 			e->val.vlan_tag = eth->val.vlan_tag;
335 			e->mask.vlan_tag = eth->mask.vlan_tag;
336 			e->val.ether_type = eth->val.ether_type;
337 			e->mask.ether_type = eth->mask.ether_type;
338 			break;
339 		}
340 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
341 	}
342 }
343 
344 /**
345  * Convert the @p item into a Verbs specification. This function assumes that
346  * the input is valid and that there is space to insert the requested item
347  * into the flow.
348  *
349  * @param[in, out] dev_flow
350  *   Pointer to dev_flow structure.
351  * @param[in] item
352  *   Item specification.
353  * @param[in] item_flags
354  *   Parsed item flags.
355  */
356 static void
357 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
358 			       const struct rte_flow_item *item,
359 			       uint64_t item_flags)
360 {
361 	const struct rte_flow_item_vlan *spec = item->spec;
362 	const struct rte_flow_item_vlan *mask = item->mask;
363 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
364 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
365 	struct ibv_flow_spec_eth eth = {
366 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
367 		.size = size,
368 	};
369 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
370 				      MLX5_FLOW_LAYER_OUTER_L2;
371 
372 	if (!mask)
373 		mask = &rte_flow_item_vlan_mask;
374 	if (spec) {
375 		eth.val.vlan_tag = spec->tci;
376 		eth.mask.vlan_tag = mask->tci;
377 		eth.val.vlan_tag &= eth.mask.vlan_tag;
378 		eth.val.ether_type = spec->inner_type;
379 		eth.mask.ether_type = mask->inner_type;
380 		eth.val.ether_type &= eth.mask.ether_type;
381 	}
382 	if (!(item_flags & l2m))
383 		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
384 	else
385 		flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
386 }
387 
388 /**
389  * Convert the @p item into a Verbs specification. This function assumes that
390  * the input is valid and that there is space to insert the requested item
391  * into the flow.
392  *
393  * @param[in, out] dev_flow
394  *   Pointer to dev_flow structure.
395  * @param[in] item
396  *   Item specification.
397  * @param[in] item_flags
398  *   Parsed item flags.
399  */
400 static void
401 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
402 			       const struct rte_flow_item *item,
403 			       uint64_t item_flags)
404 {
405 	const struct rte_flow_item_ipv4 *spec = item->spec;
406 	const struct rte_flow_item_ipv4 *mask = item->mask;
407 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
408 	struct ibv_flow_spec_ipv4_ext ipv4 = {
409 		.type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
410 		.size = size,
411 	};
412 
413 	if (!mask)
414 		mask = &rte_flow_item_ipv4_mask;
415 	if (spec) {
416 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
417 			.src_ip = spec->hdr.src_addr,
418 			.dst_ip = spec->hdr.dst_addr,
419 			.proto = spec->hdr.next_proto_id,
420 			.tos = spec->hdr.type_of_service,
421 		};
422 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
423 			.src_ip = mask->hdr.src_addr,
424 			.dst_ip = mask->hdr.dst_addr,
425 			.proto = mask->hdr.next_proto_id,
426 			.tos = mask->hdr.type_of_service,
427 		};
428 		/* Remove unwanted bits from values. */
429 		ipv4.val.src_ip &= ipv4.mask.src_ip;
430 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
431 		ipv4.val.proto &= ipv4.mask.proto;
432 		ipv4.val.tos &= ipv4.mask.tos;
433 	}
434 	flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
435 }
436 
437 /**
438  * Convert the @p item into a Verbs specification. This function assumes that
439  * the input is valid and that there is space to insert the requested item
440  * into the flow.
441  *
442  * @param[in, out] dev_flow
443  *   Pointer to dev_flow structure.
444  * @param[in] item
445  *   Item specification.
446  * @param[in] item_flags
447  *   Parsed item flags.
448  */
449 static void
450 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
451 			       const struct rte_flow_item *item,
452 			       uint64_t item_flags)
453 {
454 	const struct rte_flow_item_ipv6 *spec = item->spec;
455 	const struct rte_flow_item_ipv6 *mask = item->mask;
456 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
457 	struct ibv_flow_spec_ipv6 ipv6 = {
458 		.type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
459 		.size = size,
460 	};
461 
462 	if (!mask)
463 		mask = &rte_flow_item_ipv6_mask;
464 	if (spec) {
465 		unsigned int i;
466 		uint32_t vtc_flow_val;
467 		uint32_t vtc_flow_mask;
468 
469 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
470 		       RTE_DIM(ipv6.val.src_ip));
471 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
472 		       RTE_DIM(ipv6.val.dst_ip));
473 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
474 		       RTE_DIM(ipv6.mask.src_ip));
475 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
476 		       RTE_DIM(ipv6.mask.dst_ip));
477 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
478 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
479 		ipv6.val.flow_label =
480 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
481 					 IPV6_HDR_FL_SHIFT);
482 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
483 					 IPV6_HDR_TC_SHIFT;
484 		ipv6.val.next_hdr = spec->hdr.proto;
485 		ipv6.val.hop_limit = spec->hdr.hop_limits;
486 		ipv6.mask.flow_label =
487 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
488 					 IPV6_HDR_FL_SHIFT);
489 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
490 					  IPV6_HDR_TC_SHIFT;
491 		ipv6.mask.next_hdr = mask->hdr.proto;
492 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
493 		/* Remove unwanted bits from values. */
494 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
495 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
496 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
497 		}
498 		ipv6.val.flow_label &= ipv6.mask.flow_label;
499 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
500 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
501 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
502 	}
503 	flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
504 }
505 
506 /**
507  * Convert the @p item into a Verbs specification. This function assumes that
508  * the input is valid and that there is space to insert the requested item
509  * into the flow.
510  *
511  * @param[in, out] dev_flow
512  *   Pointer to dev_flow structure.
513  * @param[in] item
514  *   Item specification.
515  * @param[in] item_flags
516  *   Parsed item flags.
517  */
518 static void
519 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
520 			      const struct rte_flow_item *item,
521 			      uint64_t item_flags __rte_unused)
522 {
523 	const struct rte_flow_item_tcp *spec = item->spec;
524 	const struct rte_flow_item_tcp *mask = item->mask;
525 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
526 	struct ibv_flow_spec_tcp_udp tcp = {
527 		.type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
528 		.size = size,
529 	};
530 
531 	if (!mask)
532 		mask = &rte_flow_item_tcp_mask;
533 	if (spec) {
534 		tcp.val.dst_port = spec->hdr.dst_port;
535 		tcp.val.src_port = spec->hdr.src_port;
536 		tcp.mask.dst_port = mask->hdr.dst_port;
537 		tcp.mask.src_port = mask->hdr.src_port;
538 		/* Remove unwanted bits from values. */
539 		tcp.val.src_port &= tcp.mask.src_port;
540 		tcp.val.dst_port &= tcp.mask.dst_port;
541 	}
542 	flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
543 }
544 
545 /**
546  * Convert the @p item into a Verbs specification. This function assumes that
547  * the input is valid and that there is space to insert the requested item
548  * into the flow.
549  *
550  * @param[in, out] dev_flow
551  *   Pointer to dev_flow structure.
552  * @param[in] item
553  *   Item specification.
554  * @param[in] item_flags
555  *   Parsed item flags.
556  */
557 static void
558 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
559 			      const struct rte_flow_item *item,
560 			      uint64_t item_flags __rte_unused)
561 {
562 	const struct rte_flow_item_udp *spec = item->spec;
563 	const struct rte_flow_item_udp *mask = item->mask;
564 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
565 	struct ibv_flow_spec_tcp_udp udp = {
566 		.type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
567 		.size = size,
568 	};
569 
570 	if (!mask)
571 		mask = &rte_flow_item_udp_mask;
572 	if (spec) {
573 		udp.val.dst_port = spec->hdr.dst_port;
574 		udp.val.src_port = spec->hdr.src_port;
575 		udp.mask.dst_port = mask->hdr.dst_port;
576 		udp.mask.src_port = mask->hdr.src_port;
577 		/* Remove unwanted bits from values. */
578 		udp.val.src_port &= udp.mask.src_port;
579 		udp.val.dst_port &= udp.mask.dst_port;
580 	}
581 	flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
582 }
583 
584 /**
585  * Convert the @p item into a Verbs specification. This function assumes that
586  * the input is valid and that there is space to insert the requested item
587  * into the flow.
588  *
589  * @param[in, out] dev_flow
590  *   Pointer to dev_flow structure.
591  * @param[in] item
592  *   Item specification.
593  * @param[in] item_flags
594  *   Parsed item flags.
595  */
596 static void
597 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
598 				const struct rte_flow_item *item,
599 				uint64_t item_flags __rte_unused)
600 {
601 	const struct rte_flow_item_vxlan *spec = item->spec;
602 	const struct rte_flow_item_vxlan *mask = item->mask;
603 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
604 	struct ibv_flow_spec_tunnel vxlan = {
605 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
606 		.size = size,
607 	};
608 	union vni {
609 		uint32_t vlan_id;
610 		uint8_t vni[4];
611 	} id = { .vlan_id = 0, };
612 
613 	if (!mask)
614 		mask = &rte_flow_item_vxlan_mask;
615 	if (spec) {
616 		memcpy(&id.vni[1], spec->vni, 3);
617 		vxlan.val.tunnel_id = id.vlan_id;
618 		memcpy(&id.vni[1], mask->vni, 3);
619 		vxlan.mask.tunnel_id = id.vlan_id;
620 		/* Remove unwanted bits from values. */
621 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
622 	}
623 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
624 }
625 
626 /**
627  * Convert the @p item into a Verbs specification. This function assumes that
628  * the input is valid and that there is space to insert the requested item
629  * into the flow.
630  *
631  * @param[in, out] dev_flow
632  *   Pointer to dev_flow structure.
633  * @param[in] item
634  *   Item specification.
635  * @param[in] item_flags
636  *   Parsed item flags.
637  */
638 static void
639 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
640 				    const struct rte_flow_item *item,
641 				    uint64_t item_flags __rte_unused)
642 {
643 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
644 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
645 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
646 	struct ibv_flow_spec_tunnel vxlan_gpe = {
647 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
648 		.size = size,
649 	};
650 	union vni {
651 		uint32_t vlan_id;
652 		uint8_t vni[4];
653 	} id = { .vlan_id = 0, };
654 
655 	if (!mask)
656 		mask = &rte_flow_item_vxlan_gpe_mask;
657 	if (spec) {
658 		memcpy(&id.vni[1], spec->vni, 3);
659 		vxlan_gpe.val.tunnel_id = id.vlan_id;
660 		memcpy(&id.vni[1], mask->vni, 3);
661 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
662 		/* Remove unwanted bits from values. */
663 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
664 	}
665 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
666 }
667 
668 /**
669  * Update the protocol in Verbs IPv4/IPv6 spec.
670  *
671  * @param[in, out] attr
672  *   Pointer to Verbs attributes structure.
673  * @param[in] search
674  *   Specification type to search in order to update the IP protocol.
675  * @param[in] protocol
676  *   Protocol value to set if none is present in the specification.
677  */
678 static void
679 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
680 				       enum ibv_flow_spec_type search,
681 				       uint8_t protocol)
682 {
683 	unsigned int i;
684 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
685 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
686 
687 	if (!attr)
688 		return;
689 	for (i = 0; i != attr->num_of_specs; ++i) {
690 		if (hdr->type == search) {
691 			union {
692 				struct ibv_flow_spec_ipv4_ext *ipv4;
693 				struct ibv_flow_spec_ipv6 *ipv6;
694 			} ip;
695 
696 			switch (search) {
697 			case IBV_FLOW_SPEC_IPV4_EXT:
698 				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
699 				if (!ip.ipv4->val.proto) {
700 					ip.ipv4->val.proto = protocol;
701 					ip.ipv4->mask.proto = 0xff;
702 				}
703 				break;
704 			case IBV_FLOW_SPEC_IPV6:
705 				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
706 				if (!ip.ipv6->val.next_hdr) {
707 					ip.ipv6->val.next_hdr = protocol;
708 					ip.ipv6->mask.next_hdr = 0xff;
709 				}
710 				break;
711 			default:
712 				break;
713 			}
714 			break;
715 		}
716 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
717 	}
718 }
719 
720 /**
721  * Convert the @p item into a Verbs specification. This function assumes that
722  * the input is valid and that there is space to insert the requested item
723  * into the flow.
724  *
725  * @param[in, out] dev_flow
726  *   Pointer to dev_flow structure.
727  * @param[in] item
728  *   Item specification.
729  * @param[in] item_flags
730  *   Parsed item flags.
731  */
732 static void
733 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
734 			      const struct rte_flow_item *item __rte_unused,
735 			      uint64_t item_flags)
736 {
737 	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
738 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
739 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
740 	struct ibv_flow_spec_tunnel tunnel = {
741 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
742 		.size = size,
743 	};
744 #else
745 	const struct rte_flow_item_gre *spec = item->spec;
746 	const struct rte_flow_item_gre *mask = item->mask;
747 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
748 	struct ibv_flow_spec_gre tunnel = {
749 		.type = IBV_FLOW_SPEC_GRE,
750 		.size = size,
751 	};
752 
753 	if (!mask)
754 		mask = &rte_flow_item_gre_mask;
755 	if (spec) {
756 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
757 		tunnel.val.protocol = spec->protocol;
758 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
759 		tunnel.mask.protocol = mask->protocol;
760 		/* Remove unwanted bits from values. */
761 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
762 		tunnel.val.protocol &= tunnel.mask.protocol;
763 		tunnel.val.key &= tunnel.mask.key;
764 	}
765 #endif
766 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
767 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
768 						       IBV_FLOW_SPEC_IPV4_EXT,
769 						       IPPROTO_GRE);
770 	else
771 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
772 						       IBV_FLOW_SPEC_IPV6,
773 						       IPPROTO_GRE);
774 	flow_verbs_spec_add(verbs, &tunnel, size);
775 }
776 
777 /**
778  * Convert the @p action into a Verbs specification. This function assumes that
779  * the input is valid and that there is space to insert the requested action
780  * into the flow. This function also return the action that was added.
781  *
782  * @param[in, out] dev_flow
783  *   Pointer to dev_flow structure.
784  * @param[in] item
785  *   Item specification.
786  * @param[in] item_flags
787  *   Parsed item flags.
788  */
789 static void
790 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
791 			       const struct rte_flow_item *item __rte_unused,
792 			       uint64_t item_flags __rte_unused)
793 {
794 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
795 	const struct rte_flow_item_mpls *spec = item->spec;
796 	const struct rte_flow_item_mpls *mask = item->mask;
797 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
798 	struct ibv_flow_spec_mpls mpls = {
799 		.type = IBV_FLOW_SPEC_MPLS,
800 		.size = size,
801 	};
802 
803 	if (!mask)
804 		mask = &rte_flow_item_mpls_mask;
805 	if (spec) {
806 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
807 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
808 		/* Remove unwanted bits from values.  */
809 		mpls.val.label &= mpls.mask.label;
810 	}
811 	flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
812 #endif
813 }
814 
815 /**
816  * Convert the @p action into a Verbs specification. This function assumes that
817  * the input is valid and that there is space to insert the requested action
818  * into the flow.
819  *
820  * @param[in] dev_flow
821  *   Pointer to mlx5_flow.
822  * @param[in] action
823  *   Action configuration.
824  */
825 static void
826 flow_verbs_translate_action_drop
827 	(struct mlx5_flow *dev_flow,
828 	 const struct rte_flow_action *action __rte_unused)
829 {
830 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
831 	struct ibv_flow_spec_action_drop drop = {
832 			.type = IBV_FLOW_SPEC_ACTION_DROP,
833 			.size = size,
834 	};
835 
836 	flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
837 }
838 
839 /**
840  * Convert the @p action into a Verbs specification. This function assumes that
841  * the input is valid and that there is space to insert the requested action
842  * into the flow.
843  *
844  * @param[in] dev_flow
845  *   Pointer to mlx5_flow.
846  * @param[in] action
847  *   Action configuration.
848  */
849 static void
850 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
851 				  const struct rte_flow_action *action)
852 {
853 	const struct rte_flow_action_queue *queue = action->conf;
854 	struct rte_flow *flow = dev_flow->flow;
855 
856 	if (flow->queue)
857 		(*flow->queue)[0] = queue->index;
858 	flow->rss.queue_num = 1;
859 }
860 
861 /**
862  * Convert the @p action into a Verbs specification. This function assumes that
863  * the input is valid and that there is space to insert the requested action
864  * into the flow.
865  *
866  * @param[in] action
867  *   Action configuration.
868  * @param[in, out] action_flags
869  *   Pointer to the detected actions.
870  * @param[in] dev_flow
871  *   Pointer to mlx5_flow.
872  */
873 static void
874 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
875 				const struct rte_flow_action *action)
876 {
877 	const struct rte_flow_action_rss *rss = action->conf;
878 	const uint8_t *rss_key;
879 	struct rte_flow *flow = dev_flow->flow;
880 
881 	if (flow->queue)
882 		memcpy((*flow->queue), rss->queue,
883 		       rss->queue_num * sizeof(uint16_t));
884 	flow->rss.queue_num = rss->queue_num;
885 	/* NULL RSS key indicates default RSS key. */
886 	rss_key = !rss->key ? rss_hash_default_key : rss->key;
887 	memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
888 	/* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
889 	flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
890 	flow->rss.level = rss->level;
891 }
892 
893 /**
894  * Convert the @p action into a Verbs specification. This function assumes that
895  * the input is valid and that there is space to insert the requested action
896  * into the flow.
897  *
898  * @param[in] dev_flow
899  *   Pointer to mlx5_flow.
900  * @param[in] action
901  *   Action configuration.
902  */
903 static void
904 flow_verbs_translate_action_flag
905 	(struct mlx5_flow *dev_flow,
906 	 const struct rte_flow_action *action __rte_unused)
907 {
908 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
909 	struct ibv_flow_spec_action_tag tag = {
910 		.type = IBV_FLOW_SPEC_ACTION_TAG,
911 		.size = size,
912 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
913 	};
914 
915 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
916 }
917 
918 /**
919  * Convert the @p action into a Verbs specification. This function assumes that
920  * the input is valid and that there is space to insert the requested action
921  * into the flow.
922  *
923  * @param[in] dev_flow
924  *   Pointer to mlx5_flow.
925  * @param[in] action
926  *   Action configuration.
927  */
928 static void
929 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
930 				 const struct rte_flow_action *action)
931 {
932 	const struct rte_flow_action_mark *mark = action->conf;
933 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
934 	struct ibv_flow_spec_action_tag tag = {
935 		.type = IBV_FLOW_SPEC_ACTION_TAG,
936 		.size = size,
937 		.tag_id = mlx5_flow_mark_set(mark->id),
938 	};
939 
940 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
941 }
942 
943 /**
944  * Convert the @p action into a Verbs specification. This function assumes that
945  * the input is valid and that there is space to insert the requested action
946  * into the flow.
947  *
948  * @param[in] dev
949  *   Pointer to the Ethernet device structure.
950  * @param[in] action
951  *   Action configuration.
952  * @param[in] dev_flow
953  *   Pointer to mlx5_flow.
954  * @param[out] error
955  *   Pointer to error structure.
956  *
957  * @return
958  *   0 On success else a negative errno value is returned and rte_errno is set.
959  */
960 static int
961 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
962 				  const struct rte_flow_action *action,
963 				  struct rte_eth_dev *dev,
964 				  struct rte_flow_error *error)
965 {
966 	const struct rte_flow_action_count *count = action->conf;
967 	struct rte_flow *flow = dev_flow->flow;
968 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
969 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
970 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
971 	struct ibv_flow_spec_counter_action counter = {
972 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
973 		.size = size,
974 	};
975 #endif
976 
977 	if (!flow->counter) {
978 		flow->counter = flow_verbs_counter_new(dev, count->shared,
979 						       count->id);
980 		if (!flow->counter)
981 			return rte_flow_error_set(error, rte_errno,
982 						  RTE_FLOW_ERROR_TYPE_ACTION,
983 						  action,
984 						  "cannot get counter"
985 						  " context.");
986 	}
987 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
988 	counter.counter_set_handle = flow->counter->cs->handle;
989 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
990 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
991 	counter.counters = flow->counter->cs;
992 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
993 #endif
994 	return 0;
995 }
996 
997 /**
998  * Internal validation function. For validating both actions and items.
999  *
1000  * @param[in] dev
1001  *   Pointer to the Ethernet device structure.
1002  * @param[in] attr
1003  *   Pointer to the flow attributes.
1004  * @param[in] items
1005  *   Pointer to the list of items.
1006  * @param[in] actions
1007  *   Pointer to the list of actions.
1008  * @param[out] error
1009  *   Pointer to the error structure.
1010  *
1011  * @return
1012  *   0 on success, a negative errno value otherwise and rte_errno is set.
1013  */
1014 static int
1015 flow_verbs_validate(struct rte_eth_dev *dev,
1016 		    const struct rte_flow_attr *attr,
1017 		    const struct rte_flow_item items[],
1018 		    const struct rte_flow_action actions[],
1019 		    struct rte_flow_error *error)
1020 {
1021 	int ret;
1022 	uint64_t action_flags = 0;
1023 	uint64_t item_flags = 0;
1024 	uint64_t last_item = 0;
1025 	uint8_t next_protocol = 0xff;
1026 
1027 	if (items == NULL)
1028 		return -1;
1029 	ret = mlx5_flow_validate_attributes(dev, attr, error);
1030 	if (ret < 0)
1031 		return ret;
1032 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1033 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1034 		int ret = 0;
1035 
1036 		switch (items->type) {
1037 		case RTE_FLOW_ITEM_TYPE_VOID:
1038 			break;
1039 		case RTE_FLOW_ITEM_TYPE_ETH:
1040 			ret = mlx5_flow_validate_item_eth(items, item_flags,
1041 							  error);
1042 			if (ret < 0)
1043 				return ret;
1044 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1045 					     MLX5_FLOW_LAYER_OUTER_L2;
1046 			break;
1047 		case RTE_FLOW_ITEM_TYPE_VLAN:
1048 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
1049 							   error);
1050 			if (ret < 0)
1051 				return ret;
1052 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1053 					      MLX5_FLOW_LAYER_INNER_VLAN) :
1054 					     (MLX5_FLOW_LAYER_OUTER_L2 |
1055 					      MLX5_FLOW_LAYER_OUTER_VLAN);
1056 			break;
1057 		case RTE_FLOW_ITEM_TYPE_IPV4:
1058 			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1059 							   NULL, error);
1060 			if (ret < 0)
1061 				return ret;
1062 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1063 					     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1064 			if (items->mask != NULL &&
1065 			    ((const struct rte_flow_item_ipv4 *)
1066 			     items->mask)->hdr.next_proto_id) {
1067 				next_protocol =
1068 					((const struct rte_flow_item_ipv4 *)
1069 					 (items->spec))->hdr.next_proto_id;
1070 				next_protocol &=
1071 					((const struct rte_flow_item_ipv4 *)
1072 					 (items->mask))->hdr.next_proto_id;
1073 			} else {
1074 				/* Reset for inner layer. */
1075 				next_protocol = 0xff;
1076 			}
1077 			break;
1078 		case RTE_FLOW_ITEM_TYPE_IPV6:
1079 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1080 							   NULL, error);
1081 			if (ret < 0)
1082 				return ret;
1083 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1084 					     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1085 			if (items->mask != NULL &&
1086 			    ((const struct rte_flow_item_ipv6 *)
1087 			     items->mask)->hdr.proto) {
1088 				next_protocol =
1089 					((const struct rte_flow_item_ipv6 *)
1090 					 items->spec)->hdr.proto;
1091 				next_protocol &=
1092 					((const struct rte_flow_item_ipv6 *)
1093 					 items->mask)->hdr.proto;
1094 			} else {
1095 				/* Reset for inner layer. */
1096 				next_protocol = 0xff;
1097 			}
1098 			break;
1099 		case RTE_FLOW_ITEM_TYPE_UDP:
1100 			ret = mlx5_flow_validate_item_udp(items, item_flags,
1101 							  next_protocol,
1102 							  error);
1103 			if (ret < 0)
1104 				return ret;
1105 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1106 					     MLX5_FLOW_LAYER_OUTER_L4_UDP;
1107 			break;
1108 		case RTE_FLOW_ITEM_TYPE_TCP:
1109 			ret = mlx5_flow_validate_item_tcp
1110 						(items, item_flags,
1111 						 next_protocol,
1112 						 &rte_flow_item_tcp_mask,
1113 						 error);
1114 			if (ret < 0)
1115 				return ret;
1116 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1117 					     MLX5_FLOW_LAYER_OUTER_L4_TCP;
1118 			break;
1119 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1120 			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1121 							    error);
1122 			if (ret < 0)
1123 				return ret;
1124 			last_item = MLX5_FLOW_LAYER_VXLAN;
1125 			break;
1126 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1127 			ret = mlx5_flow_validate_item_vxlan_gpe(items,
1128 								item_flags,
1129 								dev, error);
1130 			if (ret < 0)
1131 				return ret;
1132 			last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1133 			break;
1134 		case RTE_FLOW_ITEM_TYPE_GRE:
1135 			ret = mlx5_flow_validate_item_gre(items, item_flags,
1136 							  next_protocol, error);
1137 			if (ret < 0)
1138 				return ret;
1139 			last_item = MLX5_FLOW_LAYER_GRE;
1140 			break;
1141 		case RTE_FLOW_ITEM_TYPE_MPLS:
1142 			ret = mlx5_flow_validate_item_mpls(dev, items,
1143 							   item_flags,
1144 							   last_item, error);
1145 			if (ret < 0)
1146 				return ret;
1147 			last_item = MLX5_FLOW_LAYER_MPLS;
1148 			break;
1149 		default:
1150 			return rte_flow_error_set(error, ENOTSUP,
1151 						  RTE_FLOW_ERROR_TYPE_ITEM,
1152 						  NULL, "item not supported");
1153 		}
1154 		item_flags |= last_item;
1155 	}
1156 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1157 		switch (actions->type) {
1158 		case RTE_FLOW_ACTION_TYPE_VOID:
1159 			break;
1160 		case RTE_FLOW_ACTION_TYPE_FLAG:
1161 			ret = mlx5_flow_validate_action_flag(action_flags,
1162 							     attr,
1163 							     error);
1164 			if (ret < 0)
1165 				return ret;
1166 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1167 			break;
1168 		case RTE_FLOW_ACTION_TYPE_MARK:
1169 			ret = mlx5_flow_validate_action_mark(actions,
1170 							     action_flags,
1171 							     attr,
1172 							     error);
1173 			if (ret < 0)
1174 				return ret;
1175 			action_flags |= MLX5_FLOW_ACTION_MARK;
1176 			break;
1177 		case RTE_FLOW_ACTION_TYPE_DROP:
1178 			ret = mlx5_flow_validate_action_drop(action_flags,
1179 							     attr,
1180 							     error);
1181 			if (ret < 0)
1182 				return ret;
1183 			action_flags |= MLX5_FLOW_ACTION_DROP;
1184 			break;
1185 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1186 			ret = mlx5_flow_validate_action_queue(actions,
1187 							      action_flags, dev,
1188 							      attr,
1189 							      error);
1190 			if (ret < 0)
1191 				return ret;
1192 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1193 			break;
1194 		case RTE_FLOW_ACTION_TYPE_RSS:
1195 			ret = mlx5_flow_validate_action_rss(actions,
1196 							    action_flags, dev,
1197 							    attr,
1198 							    error);
1199 			if (ret < 0)
1200 				return ret;
1201 			action_flags |= MLX5_FLOW_ACTION_RSS;
1202 			break;
1203 		case RTE_FLOW_ACTION_TYPE_COUNT:
1204 			ret = mlx5_flow_validate_action_count(dev, attr, error);
1205 			if (ret < 0)
1206 				return ret;
1207 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1208 			break;
1209 		default:
1210 			return rte_flow_error_set(error, ENOTSUP,
1211 						  RTE_FLOW_ERROR_TYPE_ACTION,
1212 						  actions,
1213 						  "action not supported");
1214 		}
1215 	}
1216 	if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1217 		return rte_flow_error_set(error, EINVAL,
1218 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
1219 					  "no fate action is found");
1220 	return 0;
1221 }
1222 
1223 /**
1224  * Calculate the required bytes that are needed for the action part of the verbs
1225  * flow.
1226  *
1227  * @param[in] actions
1228  *   Pointer to the list of actions.
1229  *
1230  * @return
1231  *   The size of the memory needed for all actions.
1232  */
1233 static int
1234 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1235 {
1236 	int size = 0;
1237 
1238 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1239 		switch (actions->type) {
1240 		case RTE_FLOW_ACTION_TYPE_VOID:
1241 			break;
1242 		case RTE_FLOW_ACTION_TYPE_FLAG:
1243 			size += sizeof(struct ibv_flow_spec_action_tag);
1244 			break;
1245 		case RTE_FLOW_ACTION_TYPE_MARK:
1246 			size += sizeof(struct ibv_flow_spec_action_tag);
1247 			break;
1248 		case RTE_FLOW_ACTION_TYPE_DROP:
1249 			size += sizeof(struct ibv_flow_spec_action_drop);
1250 			break;
1251 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1252 			break;
1253 		case RTE_FLOW_ACTION_TYPE_RSS:
1254 			break;
1255 		case RTE_FLOW_ACTION_TYPE_COUNT:
1256 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1257 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1258 			size += sizeof(struct ibv_flow_spec_counter_action);
1259 #endif
1260 			break;
1261 		default:
1262 			break;
1263 		}
1264 	}
1265 	return size;
1266 }
1267 
1268 /**
1269  * Calculate the required bytes that are needed for the item part of the verbs
1270  * flow.
1271  *
1272  * @param[in] items
1273  *   Pointer to the list of items.
1274  *
1275  * @return
1276  *   The size of the memory needed for all items.
1277  */
1278 static int
1279 flow_verbs_get_items_size(const struct rte_flow_item items[])
1280 {
1281 	int size = 0;
1282 
1283 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1284 		switch (items->type) {
1285 		case RTE_FLOW_ITEM_TYPE_VOID:
1286 			break;
1287 		case RTE_FLOW_ITEM_TYPE_ETH:
1288 			size += sizeof(struct ibv_flow_spec_eth);
1289 			break;
1290 		case RTE_FLOW_ITEM_TYPE_VLAN:
1291 			size += sizeof(struct ibv_flow_spec_eth);
1292 			break;
1293 		case RTE_FLOW_ITEM_TYPE_IPV4:
1294 			size += sizeof(struct ibv_flow_spec_ipv4_ext);
1295 			break;
1296 		case RTE_FLOW_ITEM_TYPE_IPV6:
1297 			size += sizeof(struct ibv_flow_spec_ipv6);
1298 			break;
1299 		case RTE_FLOW_ITEM_TYPE_UDP:
1300 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1301 			break;
1302 		case RTE_FLOW_ITEM_TYPE_TCP:
1303 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1304 			break;
1305 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1306 			size += sizeof(struct ibv_flow_spec_tunnel);
1307 			break;
1308 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1309 			size += sizeof(struct ibv_flow_spec_tunnel);
1310 			break;
1311 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1312 		case RTE_FLOW_ITEM_TYPE_GRE:
1313 			size += sizeof(struct ibv_flow_spec_gre);
1314 			break;
1315 		case RTE_FLOW_ITEM_TYPE_MPLS:
1316 			size += sizeof(struct ibv_flow_spec_mpls);
1317 			break;
1318 #else
1319 		case RTE_FLOW_ITEM_TYPE_GRE:
1320 			size += sizeof(struct ibv_flow_spec_tunnel);
1321 			break;
1322 #endif
1323 		default:
1324 			break;
1325 		}
1326 	}
1327 	return size;
1328 }
1329 
1330 /**
1331  * Internal preparation function. Allocate mlx5_flow with the required size.
1332  * The required size is calculate based on the actions and items. This function
1333  * also returns the detected actions and items for later use.
1334  *
1335  * @param[in] attr
1336  *   Pointer to the flow attributes.
1337  * @param[in] items
1338  *   Pointer to the list of items.
1339  * @param[in] actions
1340  *   Pointer to the list of actions.
1341  * @param[out] error
1342  *   Pointer to the error structure.
1343  *
1344  * @return
1345  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1346  *   is set.
1347  */
1348 static struct mlx5_flow *
1349 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1350 		   const struct rte_flow_item items[],
1351 		   const struct rte_flow_action actions[],
1352 		   struct rte_flow_error *error)
1353 {
1354 	uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1355 	struct mlx5_flow *flow;
1356 
1357 	size += flow_verbs_get_actions_size(actions);
1358 	size += flow_verbs_get_items_size(items);
1359 	flow = rte_calloc(__func__, 1, size, 0);
1360 	if (!flow) {
1361 		rte_flow_error_set(error, ENOMEM,
1362 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1363 				   "not enough memory to create flow");
1364 		return NULL;
1365 	}
1366 	flow->verbs.attr = (void *)(flow + 1);
1367 	flow->verbs.specs =
1368 		(uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1369 	return flow;
1370 }
1371 
1372 /**
1373  * Fill the flow with verb spec.
1374  *
1375  * @param[in] dev
1376  *   Pointer to Ethernet device.
1377  * @param[in, out] dev_flow
1378  *   Pointer to the mlx5 flow.
1379  * @param[in] attr
1380  *   Pointer to the flow attributes.
1381  * @param[in] items
1382  *   Pointer to the list of items.
1383  * @param[in] actions
1384  *   Pointer to the list of actions.
1385  * @param[out] error
1386  *   Pointer to the error structure.
1387  *
1388  * @return
1389  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1390  */
1391 static int
1392 flow_verbs_translate(struct rte_eth_dev *dev,
1393 		     struct mlx5_flow *dev_flow,
1394 		     const struct rte_flow_attr *attr,
1395 		     const struct rte_flow_item items[],
1396 		     const struct rte_flow_action actions[],
1397 		     struct rte_flow_error *error)
1398 {
1399 	struct rte_flow *flow = dev_flow->flow;
1400 	uint64_t item_flags = 0;
1401 	uint64_t action_flags = 0;
1402 	uint64_t priority = attr->priority;
1403 	uint32_t subpriority = 0;
1404 	struct mlx5_priv *priv = dev->data->dev_private;
1405 
1406 	if (priority == MLX5_FLOW_PRIO_RSVD)
1407 		priority = priv->config.flow_prio - 1;
1408 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1409 		int ret;
1410 
1411 		switch (actions->type) {
1412 		case RTE_FLOW_ACTION_TYPE_VOID:
1413 			break;
1414 		case RTE_FLOW_ACTION_TYPE_FLAG:
1415 			flow_verbs_translate_action_flag(dev_flow, actions);
1416 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1417 			break;
1418 		case RTE_FLOW_ACTION_TYPE_MARK:
1419 			flow_verbs_translate_action_mark(dev_flow, actions);
1420 			action_flags |= MLX5_FLOW_ACTION_MARK;
1421 			break;
1422 		case RTE_FLOW_ACTION_TYPE_DROP:
1423 			flow_verbs_translate_action_drop(dev_flow, actions);
1424 			action_flags |= MLX5_FLOW_ACTION_DROP;
1425 			break;
1426 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1427 			flow_verbs_translate_action_queue(dev_flow, actions);
1428 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1429 			break;
1430 		case RTE_FLOW_ACTION_TYPE_RSS:
1431 			flow_verbs_translate_action_rss(dev_flow, actions);
1432 			action_flags |= MLX5_FLOW_ACTION_RSS;
1433 			break;
1434 		case RTE_FLOW_ACTION_TYPE_COUNT:
1435 			ret = flow_verbs_translate_action_count(dev_flow,
1436 								actions,
1437 								dev, error);
1438 			if (ret < 0)
1439 				return ret;
1440 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1441 			break;
1442 		default:
1443 			return rte_flow_error_set(error, ENOTSUP,
1444 						  RTE_FLOW_ERROR_TYPE_ACTION,
1445 						  actions,
1446 						  "action not supported");
1447 		}
1448 	}
1449 	flow->actions = action_flags;
1450 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1451 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1452 
1453 		switch (items->type) {
1454 		case RTE_FLOW_ITEM_TYPE_VOID:
1455 			break;
1456 		case RTE_FLOW_ITEM_TYPE_ETH:
1457 			flow_verbs_translate_item_eth(dev_flow, items,
1458 						      item_flags);
1459 			subpriority = MLX5_PRIORITY_MAP_L2;
1460 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1461 					       MLX5_FLOW_LAYER_OUTER_L2;
1462 			break;
1463 		case RTE_FLOW_ITEM_TYPE_VLAN:
1464 			flow_verbs_translate_item_vlan(dev_flow, items,
1465 						       item_flags);
1466 			subpriority = MLX5_PRIORITY_MAP_L2;
1467 			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1468 						MLX5_FLOW_LAYER_INNER_VLAN) :
1469 					       (MLX5_FLOW_LAYER_OUTER_L2 |
1470 						MLX5_FLOW_LAYER_OUTER_VLAN);
1471 			break;
1472 		case RTE_FLOW_ITEM_TYPE_IPV4:
1473 			flow_verbs_translate_item_ipv4(dev_flow, items,
1474 						       item_flags);
1475 			subpriority = MLX5_PRIORITY_MAP_L3;
1476 			dev_flow->verbs.hash_fields |=
1477 				mlx5_flow_hashfields_adjust
1478 					(dev_flow, tunnel,
1479 					 MLX5_IPV4_LAYER_TYPES,
1480 					 MLX5_IPV4_IBV_RX_HASH);
1481 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1482 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1483 			break;
1484 		case RTE_FLOW_ITEM_TYPE_IPV6:
1485 			flow_verbs_translate_item_ipv6(dev_flow, items,
1486 						       item_flags);
1487 			subpriority = MLX5_PRIORITY_MAP_L3;
1488 			dev_flow->verbs.hash_fields |=
1489 				mlx5_flow_hashfields_adjust
1490 					(dev_flow, tunnel,
1491 					 MLX5_IPV6_LAYER_TYPES,
1492 					 MLX5_IPV6_IBV_RX_HASH);
1493 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1494 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1495 			break;
1496 		case RTE_FLOW_ITEM_TYPE_TCP:
1497 			flow_verbs_translate_item_tcp(dev_flow, items,
1498 						      item_flags);
1499 			subpriority = MLX5_PRIORITY_MAP_L4;
1500 			dev_flow->verbs.hash_fields |=
1501 				mlx5_flow_hashfields_adjust
1502 					(dev_flow, tunnel, ETH_RSS_TCP,
1503 					 (IBV_RX_HASH_SRC_PORT_TCP |
1504 					  IBV_RX_HASH_DST_PORT_TCP));
1505 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1506 					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
1507 			break;
1508 		case RTE_FLOW_ITEM_TYPE_UDP:
1509 			flow_verbs_translate_item_udp(dev_flow, items,
1510 						      item_flags);
1511 			subpriority = MLX5_PRIORITY_MAP_L4;
1512 			dev_flow->verbs.hash_fields |=
1513 				mlx5_flow_hashfields_adjust
1514 					(dev_flow, tunnel, ETH_RSS_UDP,
1515 					 (IBV_RX_HASH_SRC_PORT_UDP |
1516 					  IBV_RX_HASH_DST_PORT_UDP));
1517 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1518 					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
1519 			break;
1520 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1521 			flow_verbs_translate_item_vxlan(dev_flow, items,
1522 							item_flags);
1523 			subpriority = MLX5_PRIORITY_MAP_L2;
1524 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
1525 			break;
1526 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1527 			flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1528 							    item_flags);
1529 			subpriority = MLX5_PRIORITY_MAP_L2;
1530 			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1531 			break;
1532 		case RTE_FLOW_ITEM_TYPE_GRE:
1533 			flow_verbs_translate_item_gre(dev_flow, items,
1534 						      item_flags);
1535 			subpriority = MLX5_PRIORITY_MAP_L2;
1536 			item_flags |= MLX5_FLOW_LAYER_GRE;
1537 			break;
1538 		case RTE_FLOW_ITEM_TYPE_MPLS:
1539 			flow_verbs_translate_item_mpls(dev_flow, items,
1540 						       item_flags);
1541 			subpriority = MLX5_PRIORITY_MAP_L2;
1542 			item_flags |= MLX5_FLOW_LAYER_MPLS;
1543 			break;
1544 		default:
1545 			return rte_flow_error_set(error, ENOTSUP,
1546 						  RTE_FLOW_ERROR_TYPE_ITEM,
1547 						  NULL,
1548 						  "item not supported");
1549 		}
1550 	}
1551 	dev_flow->layers = item_flags;
1552 	dev_flow->verbs.attr->priority =
1553 		mlx5_flow_adjust_priority(dev, priority, subpriority);
1554 	dev_flow->verbs.attr->port = (uint8_t)priv->ibv_port;
1555 	return 0;
1556 }
1557 
1558 /**
1559  * Remove the flow from the NIC but keeps it in memory.
1560  *
1561  * @param[in] dev
1562  *   Pointer to the Ethernet device structure.
1563  * @param[in, out] flow
1564  *   Pointer to flow structure.
1565  */
1566 static void
1567 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1568 {
1569 	struct mlx5_flow_verbs *verbs;
1570 	struct mlx5_flow *dev_flow;
1571 
1572 	if (!flow)
1573 		return;
1574 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1575 		verbs = &dev_flow->verbs;
1576 		if (verbs->flow) {
1577 			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1578 			verbs->flow = NULL;
1579 		}
1580 		if (verbs->hrxq) {
1581 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1582 				mlx5_hrxq_drop_release(dev);
1583 			else
1584 				mlx5_hrxq_release(dev, verbs->hrxq);
1585 			verbs->hrxq = NULL;
1586 		}
1587 	}
1588 }
1589 
1590 /**
1591  * Remove the flow from the NIC and the memory.
1592  *
1593  * @param[in] dev
1594  *   Pointer to the Ethernet device structure.
1595  * @param[in, out] flow
1596  *   Pointer to flow structure.
1597  */
1598 static void
1599 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1600 {
1601 	struct mlx5_flow *dev_flow;
1602 
1603 	if (!flow)
1604 		return;
1605 	flow_verbs_remove(dev, flow);
1606 	while (!LIST_EMPTY(&flow->dev_flows)) {
1607 		dev_flow = LIST_FIRST(&flow->dev_flows);
1608 		LIST_REMOVE(dev_flow, next);
1609 		rte_free(dev_flow);
1610 	}
1611 	if (flow->counter) {
1612 		flow_verbs_counter_release(flow->counter);
1613 		flow->counter = NULL;
1614 	}
1615 }
1616 
1617 /**
1618  * Apply the flow to the NIC.
1619  *
1620  * @param[in] dev
1621  *   Pointer to the Ethernet device structure.
1622  * @param[in, out] flow
1623  *   Pointer to flow structure.
1624  * @param[out] error
1625  *   Pointer to error structure.
1626  *
1627  * @return
1628  *   0 on success, a negative errno value otherwise and rte_errno is set.
1629  */
1630 static int
1631 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1632 		 struct rte_flow_error *error)
1633 {
1634 	struct mlx5_flow_verbs *verbs;
1635 	struct mlx5_flow *dev_flow;
1636 	int err;
1637 
1638 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1639 		verbs = &dev_flow->verbs;
1640 		if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1641 			verbs->hrxq = mlx5_hrxq_drop_new(dev);
1642 			if (!verbs->hrxq) {
1643 				rte_flow_error_set
1644 					(error, errno,
1645 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1646 					 "cannot get drop hash queue");
1647 				goto error;
1648 			}
1649 		} else {
1650 			struct mlx5_hrxq *hrxq;
1651 
1652 			hrxq = mlx5_hrxq_get(dev, flow->key,
1653 					     MLX5_RSS_HASH_KEY_LEN,
1654 					     verbs->hash_fields,
1655 					     (*flow->queue),
1656 					     flow->rss.queue_num);
1657 			if (!hrxq)
1658 				hrxq = mlx5_hrxq_new(dev, flow->key,
1659 						     MLX5_RSS_HASH_KEY_LEN,
1660 						     verbs->hash_fields,
1661 						     (*flow->queue),
1662 						     flow->rss.queue_num,
1663 						     !!(dev_flow->layers &
1664 						      MLX5_FLOW_LAYER_TUNNEL));
1665 			if (!hrxq) {
1666 				rte_flow_error_set
1667 					(error, rte_errno,
1668 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1669 					 "cannot get hash queue");
1670 				goto error;
1671 			}
1672 			verbs->hrxq = hrxq;
1673 		}
1674 		verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1675 						     verbs->attr);
1676 		if (!verbs->flow) {
1677 			rte_flow_error_set(error, errno,
1678 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1679 					   NULL,
1680 					   "hardware refuses to create flow");
1681 			goto error;
1682 		}
1683 	}
1684 	return 0;
1685 error:
1686 	err = rte_errno; /* Save rte_errno before cleanup. */
1687 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1688 		verbs = &dev_flow->verbs;
1689 		if (verbs->hrxq) {
1690 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1691 				mlx5_hrxq_drop_release(dev);
1692 			else
1693 				mlx5_hrxq_release(dev, verbs->hrxq);
1694 			verbs->hrxq = NULL;
1695 		}
1696 	}
1697 	rte_errno = err; /* Restore rte_errno. */
1698 	return -rte_errno;
1699 }
1700 
1701 /**
1702  * Query a flow.
1703  *
1704  * @see rte_flow_query()
1705  * @see rte_flow_ops
1706  */
1707 static int
1708 flow_verbs_query(struct rte_eth_dev *dev,
1709 		 struct rte_flow *flow,
1710 		 const struct rte_flow_action *actions,
1711 		 void *data,
1712 		 struct rte_flow_error *error)
1713 {
1714 	int ret = -EINVAL;
1715 
1716 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1717 		switch (actions->type) {
1718 		case RTE_FLOW_ACTION_TYPE_VOID:
1719 			break;
1720 		case RTE_FLOW_ACTION_TYPE_COUNT:
1721 			ret = flow_verbs_counter_query(dev, flow, data, error);
1722 			break;
1723 		default:
1724 			return rte_flow_error_set(error, ENOTSUP,
1725 						  RTE_FLOW_ERROR_TYPE_ACTION,
1726 						  actions,
1727 						  "action not supported");
1728 		}
1729 	}
1730 	return ret;
1731 }
1732 
1733 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1734 	.validate = flow_verbs_validate,
1735 	.prepare = flow_verbs_prepare,
1736 	.translate = flow_verbs_translate,
1737 	.apply = flow_verbs_apply,
1738 	.remove = flow_verbs_remove,
1739 	.destroy = flow_verbs_destroy,
1740 	.query = flow_verbs_query,
1741 };
1742