xref: /dpdk/drivers/net/mlx5/mlx5_flow_verbs.c (revision e11bdd37745229bf26b557305c07d118c3dbaad7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4 
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20 
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include <mlx5_glue.h>
30 #include <mlx5_prm.h>
31 
32 #include "mlx5_defs.h"
33 #include "mlx5.h"
34 #include "mlx5_flow.h"
35 #include "mlx5_rxtx.h"
36 
37 #define VERBS_SPEC_INNER(item_flags) \
38 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
39 
40 /**
41  * Get Verbs flow counter by index.
42  *
43  * @param[in] dev
44  *   Pointer to the Ethernet device structure.
45  * @param[in] idx
46  *   mlx5 flow counter index in the container.
47  * @param[out] ppool
48  *   mlx5 flow counter pool in the container,
49  *
50  * @return
51  *   A pointer to the counter, NULL otherwise.
52  */
53 static struct mlx5_flow_counter *
54 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
55 			      uint32_t idx,
56 			      struct mlx5_flow_counter_pool **ppool)
57 {
58 	struct mlx5_priv *priv = dev->data->dev_private;
59 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
60 									0);
61 	struct mlx5_flow_counter_pool *pool;
62 
63 	idx--;
64 	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
65 	MLX5_ASSERT(pool);
66 	if (ppool)
67 		*ppool = pool;
68 	return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
69 }
70 
71 /**
72  * Create Verbs flow counter with Verbs library.
73  *
74  * @param[in] dev
75  *   Pointer to the Ethernet device structure.
76  * @param[in, out] counter
77  *   mlx5 flow counter object, contains the counter id,
78  *   handle of created Verbs flow counter is returned
79  *   in cs field (if counters are supported).
80  *
81  * @return
82  *   0 On success else a negative errno value is returned
83  *   and rte_errno is set.
84  */
85 static int
86 flow_verbs_counter_create(struct rte_eth_dev *dev,
87 			  struct mlx5_flow_counter_ext *counter)
88 {
89 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
90 	struct mlx5_priv *priv = dev->data->dev_private;
91 	struct ibv_context *ctx = priv->sh->ctx;
92 	struct ibv_counter_set_init_attr init = {
93 			 .counter_set_id = counter->id};
94 
95 	counter->cs = mlx5_glue->create_counter_set(ctx, &init);
96 	if (!counter->cs) {
97 		rte_errno = ENOTSUP;
98 		return -ENOTSUP;
99 	}
100 	return 0;
101 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
102 	struct mlx5_priv *priv = dev->data->dev_private;
103 	struct ibv_context *ctx = priv->sh->ctx;
104 	struct ibv_counters_init_attr init = {0};
105 	struct ibv_counter_attach_attr attach;
106 	int ret;
107 
108 	memset(&attach, 0, sizeof(attach));
109 	counter->cs = mlx5_glue->create_counters(ctx, &init);
110 	if (!counter->cs) {
111 		rte_errno = ENOTSUP;
112 		return -ENOTSUP;
113 	}
114 	attach.counter_desc = IBV_COUNTER_PACKETS;
115 	attach.index = 0;
116 	ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
117 	if (!ret) {
118 		attach.counter_desc = IBV_COUNTER_BYTES;
119 		attach.index = 1;
120 		ret = mlx5_glue->attach_counters
121 					(counter->cs, &attach, NULL);
122 	}
123 	if (ret) {
124 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
125 		counter->cs = NULL;
126 		rte_errno = ret;
127 		return -ret;
128 	}
129 	return 0;
130 #else
131 	(void)dev;
132 	(void)counter;
133 	rte_errno = ENOTSUP;
134 	return -ENOTSUP;
135 #endif
136 }
137 
138 /**
139  * Get a flow counter.
140  *
141  * @param[in] dev
142  *   Pointer to the Ethernet device structure.
143  * @param[in] shared
144  *   Indicate if this counter is shared with other flows.
145  * @param[in] id
146  *   Counter identifier.
147  *
148  * @return
149  *   Index to the counter, 0 otherwise and rte_errno is set.
150  */
151 static uint32_t
152 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
153 {
154 	struct mlx5_priv *priv = dev->data->dev_private;
155 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
156 									0);
157 	struct mlx5_flow_counter_pool *pool = NULL;
158 	struct mlx5_flow_counter_ext *cnt_ext = NULL;
159 	struct mlx5_flow_counter *cnt = NULL;
160 	uint32_t n_valid = rte_atomic16_read(&cont->n_valid);
161 	uint32_t pool_idx;
162 	uint32_t i;
163 	int ret;
164 
165 	if (shared) {
166 		for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
167 			pool = cont->pools[pool_idx];
168 			for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
169 				cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
170 				if (cnt_ext->shared && cnt_ext->id == id) {
171 					cnt_ext->ref_cnt++;
172 					return MLX5_MAKE_CNT_IDX(pool_idx, i);
173 				}
174 			}
175 		}
176 	}
177 	for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
178 		pool = cont->pools[pool_idx];
179 		if (!pool)
180 			continue;
181 		cnt = TAILQ_FIRST(&pool->counters);
182 		if (cnt)
183 			break;
184 	}
185 	if (!cnt) {
186 		struct mlx5_flow_counter_pool **pools;
187 		uint32_t size;
188 
189 		if (n_valid == cont->n) {
190 			/* Resize the container pool array. */
191 			size = sizeof(struct mlx5_flow_counter_pool *) *
192 				     (n_valid + MLX5_CNT_CONTAINER_RESIZE);
193 			pools = rte_zmalloc(__func__, size, 0);
194 			if (!pools)
195 				return 0;
196 			if (n_valid) {
197 				memcpy(pools, cont->pools,
198 				       sizeof(struct mlx5_flow_counter_pool *) *
199 				       n_valid);
200 				rte_free(cont->pools);
201 			}
202 			cont->pools = pools;
203 			cont->n += MLX5_CNT_CONTAINER_RESIZE;
204 		}
205 		/* Allocate memory for new pool*/
206 		size = sizeof(*pool) + sizeof(*cnt_ext) *
207 		       MLX5_COUNTERS_PER_POOL;
208 		pool = rte_calloc(__func__, 1, size, 0);
209 		if (!pool)
210 			return 0;
211 		for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
212 			cnt = MLX5_POOL_GET_CNT(pool, i);
213 			TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
214 		}
215 		cnt = MLX5_POOL_GET_CNT(pool, 0);
216 		cont->pools[n_valid] = pool;
217 		pool_idx = n_valid;
218 		rte_atomic16_add(&cont->n_valid, 1);
219 		TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
220 	}
221 	i = MLX5_CNT_ARRAY_IDX(pool, cnt);
222 	cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
223 	cnt_ext->id = id;
224 	cnt_ext->shared = shared;
225 	cnt_ext->ref_cnt = 1;
226 	cnt->hits = 0;
227 	cnt->bytes = 0;
228 	/* Create counter with Verbs. */
229 	ret = flow_verbs_counter_create(dev, cnt_ext);
230 	if (!ret) {
231 		TAILQ_REMOVE(&pool->counters, cnt, next);
232 		return MLX5_MAKE_CNT_IDX(pool_idx, i);
233 	}
234 	/* Some error occurred in Verbs library. */
235 	rte_errno = -ret;
236 	return 0;
237 }
238 
239 /**
240  * Release a flow counter.
241  *
242  * @param[in] dev
243  *   Pointer to the Ethernet device structure.
244  * @param[in] counter
245  *   Index to the counter handler.
246  */
247 static void
248 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
249 {
250 	struct mlx5_flow_counter_pool *pool;
251 	struct mlx5_flow_counter *cnt;
252 	struct mlx5_flow_counter_ext *cnt_ext;
253 
254 	cnt = flow_verbs_counter_get_by_idx(dev, counter,
255 					    &pool);
256 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
257 	if (--cnt_ext->ref_cnt == 0) {
258 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
259 		claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
260 		cnt_ext->cs = NULL;
261 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
262 		claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
263 		cnt_ext->cs = NULL;
264 #endif
265 		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
266 	}
267 }
268 
269 /**
270  * Query a flow counter via Verbs library call.
271  *
272  * @see rte_flow_query()
273  * @see rte_flow_ops
274  */
275 static int
276 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
277 			 struct rte_flow *flow, void *data,
278 			 struct rte_flow_error *error)
279 {
280 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
281 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
282 	if (flow->counter) {
283 		struct mlx5_flow_counter_pool *pool;
284 		struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
285 						(dev, flow->counter, &pool);
286 		struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
287 						(pool, cnt);
288 		struct rte_flow_query_count *qc = data;
289 		uint64_t counters[2] = {0, 0};
290 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
291 		struct ibv_query_counter_set_attr query_cs_attr = {
292 			.cs = cnt_ext->cs,
293 			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
294 		};
295 		struct ibv_counter_set_data query_out = {
296 			.out = counters,
297 			.outlen = 2 * sizeof(uint64_t),
298 		};
299 		int err = mlx5_glue->query_counter_set(&query_cs_attr,
300 						       &query_out);
301 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
302 		int err = mlx5_glue->query_counters
303 			       (cnt_ext->cs, counters,
304 				RTE_DIM(counters),
305 				IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
306 #endif
307 		if (err)
308 			return rte_flow_error_set
309 				(error, err,
310 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
311 				 NULL,
312 				 "cannot read counter");
313 		qc->hits_set = 1;
314 		qc->bytes_set = 1;
315 		qc->hits = counters[0] - cnt->hits;
316 		qc->bytes = counters[1] - cnt->bytes;
317 		if (qc->reset) {
318 			cnt->hits = counters[0];
319 			cnt->bytes = counters[1];
320 		}
321 		return 0;
322 	}
323 	return rte_flow_error_set(error, EINVAL,
324 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
325 				  NULL,
326 				  "flow does not have counter");
327 #else
328 	(void)flow;
329 	(void)data;
330 	return rte_flow_error_set(error, ENOTSUP,
331 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
332 				  NULL,
333 				  "counters are not available");
334 #endif
335 }
336 
337 /**
338  * Add a verbs item specification into @p verbs.
339  *
340  * @param[out] verbs
341  *   Pointer to verbs structure.
342  * @param[in] src
343  *   Create specification.
344  * @param[in] size
345  *   Size in bytes of the specification to copy.
346  */
347 static void
348 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
349 		    void *src, unsigned int size)
350 {
351 	void *dst;
352 
353 	if (!verbs)
354 		return;
355 	MLX5_ASSERT(verbs->specs);
356 	dst = (void *)(verbs->specs + verbs->size);
357 	memcpy(dst, src, size);
358 	++verbs->attr.num_of_specs;
359 	verbs->size += size;
360 }
361 
362 /**
363  * Convert the @p item into a Verbs specification. This function assumes that
364  * the input is valid and that there is space to insert the requested item
365  * into the flow.
366  *
367  * @param[in, out] dev_flow
368  *   Pointer to dev_flow structure.
369  * @param[in] item
370  *   Item specification.
371  * @param[in] item_flags
372  *   Parsed item flags.
373  */
374 static void
375 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
376 			      const struct rte_flow_item *item,
377 			      uint64_t item_flags)
378 {
379 	const struct rte_flow_item_eth *spec = item->spec;
380 	const struct rte_flow_item_eth *mask = item->mask;
381 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
382 	struct ibv_flow_spec_eth eth = {
383 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
384 		.size = size,
385 	};
386 
387 	if (!mask)
388 		mask = &rte_flow_item_eth_mask;
389 	if (spec) {
390 		unsigned int i;
391 
392 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
393 			RTE_ETHER_ADDR_LEN);
394 		memcpy(&eth.val.src_mac, spec->src.addr_bytes,
395 			RTE_ETHER_ADDR_LEN);
396 		eth.val.ether_type = spec->type;
397 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
398 			RTE_ETHER_ADDR_LEN);
399 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
400 			RTE_ETHER_ADDR_LEN);
401 		eth.mask.ether_type = mask->type;
402 		/* Remove unwanted bits from values. */
403 		for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
404 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
405 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
406 		}
407 		eth.val.ether_type &= eth.mask.ether_type;
408 	}
409 	flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
410 }
411 
412 /**
413  * Update the VLAN tag in the Verbs Ethernet specification.
414  * This function assumes that the input is valid and there is space to add
415  * the requested item.
416  *
417  * @param[in, out] attr
418  *   Pointer to Verbs attributes structure.
419  * @param[in] eth
420  *   Verbs structure containing the VLAN information to copy.
421  */
422 static void
423 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
424 			    struct ibv_flow_spec_eth *eth)
425 {
426 	unsigned int i;
427 	const enum ibv_flow_spec_type search = eth->type;
428 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
429 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
430 
431 	for (i = 0; i != attr->num_of_specs; ++i) {
432 		if (hdr->type == search) {
433 			struct ibv_flow_spec_eth *e =
434 				(struct ibv_flow_spec_eth *)hdr;
435 
436 			e->val.vlan_tag = eth->val.vlan_tag;
437 			e->mask.vlan_tag = eth->mask.vlan_tag;
438 			e->val.ether_type = eth->val.ether_type;
439 			e->mask.ether_type = eth->mask.ether_type;
440 			break;
441 		}
442 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
443 	}
444 }
445 
446 /**
447  * Convert the @p item into a Verbs specification. This function assumes that
448  * the input is valid and that there is space to insert the requested item
449  * into the flow.
450  *
451  * @param[in, out] dev_flow
452  *   Pointer to dev_flow structure.
453  * @param[in] item
454  *   Item specification.
455  * @param[in] item_flags
456  *   Parsed item flags.
457  */
458 static void
459 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
460 			       const struct rte_flow_item *item,
461 			       uint64_t item_flags)
462 {
463 	const struct rte_flow_item_vlan *spec = item->spec;
464 	const struct rte_flow_item_vlan *mask = item->mask;
465 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
466 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
467 	struct ibv_flow_spec_eth eth = {
468 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
469 		.size = size,
470 	};
471 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
472 				      MLX5_FLOW_LAYER_OUTER_L2;
473 
474 	if (!mask)
475 		mask = &rte_flow_item_vlan_mask;
476 	if (spec) {
477 		eth.val.vlan_tag = spec->tci;
478 		eth.mask.vlan_tag = mask->tci;
479 		eth.val.vlan_tag &= eth.mask.vlan_tag;
480 		eth.val.ether_type = spec->inner_type;
481 		eth.mask.ether_type = mask->inner_type;
482 		eth.val.ether_type &= eth.mask.ether_type;
483 	}
484 	if (!(item_flags & l2m))
485 		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
486 	else
487 		flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
488 	if (!tunnel)
489 		dev_flow->handle->vf_vlan.tag =
490 			rte_be_to_cpu_16(spec->tci) & 0x0fff;
491 }
492 
493 /**
494  * Convert the @p item into a Verbs specification. This function assumes that
495  * the input is valid and that there is space to insert the requested item
496  * into the flow.
497  *
498  * @param[in, out] dev_flow
499  *   Pointer to dev_flow structure.
500  * @param[in] item
501  *   Item specification.
502  * @param[in] item_flags
503  *   Parsed item flags.
504  */
505 static void
506 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
507 			       const struct rte_flow_item *item,
508 			       uint64_t item_flags)
509 {
510 	const struct rte_flow_item_ipv4 *spec = item->spec;
511 	const struct rte_flow_item_ipv4 *mask = item->mask;
512 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
513 	struct ibv_flow_spec_ipv4_ext ipv4 = {
514 		.type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
515 		.size = size,
516 	};
517 
518 	if (!mask)
519 		mask = &rte_flow_item_ipv4_mask;
520 	if (spec) {
521 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
522 			.src_ip = spec->hdr.src_addr,
523 			.dst_ip = spec->hdr.dst_addr,
524 			.proto = spec->hdr.next_proto_id,
525 			.tos = spec->hdr.type_of_service,
526 		};
527 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
528 			.src_ip = mask->hdr.src_addr,
529 			.dst_ip = mask->hdr.dst_addr,
530 			.proto = mask->hdr.next_proto_id,
531 			.tos = mask->hdr.type_of_service,
532 		};
533 		/* Remove unwanted bits from values. */
534 		ipv4.val.src_ip &= ipv4.mask.src_ip;
535 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
536 		ipv4.val.proto &= ipv4.mask.proto;
537 		ipv4.val.tos &= ipv4.mask.tos;
538 	}
539 	flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
540 }
541 
542 /**
543  * Convert the @p item into a Verbs specification. This function assumes that
544  * the input is valid and that there is space to insert the requested item
545  * into the flow.
546  *
547  * @param[in, out] dev_flow
548  *   Pointer to dev_flow structure.
549  * @param[in] item
550  *   Item specification.
551  * @param[in] item_flags
552  *   Parsed item flags.
553  */
554 static void
555 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
556 			       const struct rte_flow_item *item,
557 			       uint64_t item_flags)
558 {
559 	const struct rte_flow_item_ipv6 *spec = item->spec;
560 	const struct rte_flow_item_ipv6 *mask = item->mask;
561 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
562 	struct ibv_flow_spec_ipv6 ipv6 = {
563 		.type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
564 		.size = size,
565 	};
566 
567 	if (!mask)
568 		mask = &rte_flow_item_ipv6_mask;
569 	if (spec) {
570 		unsigned int i;
571 		uint32_t vtc_flow_val;
572 		uint32_t vtc_flow_mask;
573 
574 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
575 		       RTE_DIM(ipv6.val.src_ip));
576 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
577 		       RTE_DIM(ipv6.val.dst_ip));
578 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
579 		       RTE_DIM(ipv6.mask.src_ip));
580 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
581 		       RTE_DIM(ipv6.mask.dst_ip));
582 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
583 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
584 		ipv6.val.flow_label =
585 			rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
586 					 RTE_IPV6_HDR_FL_SHIFT);
587 		ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
588 					 RTE_IPV6_HDR_TC_SHIFT;
589 		ipv6.val.next_hdr = spec->hdr.proto;
590 		ipv6.mask.flow_label =
591 			rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
592 					 RTE_IPV6_HDR_FL_SHIFT);
593 		ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
594 					  RTE_IPV6_HDR_TC_SHIFT;
595 		ipv6.mask.next_hdr = mask->hdr.proto;
596 		/* Remove unwanted bits from values. */
597 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
598 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
599 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
600 		}
601 		ipv6.val.flow_label &= ipv6.mask.flow_label;
602 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
603 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
604 	}
605 	flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
606 }
607 
608 /**
609  * Convert the @p item into a Verbs specification. This function assumes that
610  * the input is valid and that there is space to insert the requested item
611  * into the flow.
612  *
613  * @param[in, out] dev_flow
614  *   Pointer to dev_flow structure.
615  * @param[in] item
616  *   Item specification.
617  * @param[in] item_flags
618  *   Parsed item flags.
619  */
620 static void
621 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
622 			      const struct rte_flow_item *item,
623 			      uint64_t item_flags __rte_unused)
624 {
625 	const struct rte_flow_item_tcp *spec = item->spec;
626 	const struct rte_flow_item_tcp *mask = item->mask;
627 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
628 	struct ibv_flow_spec_tcp_udp tcp = {
629 		.type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
630 		.size = size,
631 	};
632 
633 	if (!mask)
634 		mask = &rte_flow_item_tcp_mask;
635 	if (spec) {
636 		tcp.val.dst_port = spec->hdr.dst_port;
637 		tcp.val.src_port = spec->hdr.src_port;
638 		tcp.mask.dst_port = mask->hdr.dst_port;
639 		tcp.mask.src_port = mask->hdr.src_port;
640 		/* Remove unwanted bits from values. */
641 		tcp.val.src_port &= tcp.mask.src_port;
642 		tcp.val.dst_port &= tcp.mask.dst_port;
643 	}
644 	flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
645 }
646 
647 /**
648  * Convert the @p item into a Verbs specification. This function assumes that
649  * the input is valid and that there is space to insert the requested item
650  * into the flow.
651  *
652  * @param[in, out] dev_flow
653  *   Pointer to dev_flow structure.
654  * @param[in] item
655  *   Item specification.
656  * @param[in] item_flags
657  *   Parsed item flags.
658  */
659 static void
660 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
661 			      const struct rte_flow_item *item,
662 			      uint64_t item_flags __rte_unused)
663 {
664 	const struct rte_flow_item_udp *spec = item->spec;
665 	const struct rte_flow_item_udp *mask = item->mask;
666 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
667 	struct ibv_flow_spec_tcp_udp udp = {
668 		.type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
669 		.size = size,
670 	};
671 
672 	if (!mask)
673 		mask = &rte_flow_item_udp_mask;
674 	if (spec) {
675 		udp.val.dst_port = spec->hdr.dst_port;
676 		udp.val.src_port = spec->hdr.src_port;
677 		udp.mask.dst_port = mask->hdr.dst_port;
678 		udp.mask.src_port = mask->hdr.src_port;
679 		/* Remove unwanted bits from values. */
680 		udp.val.src_port &= udp.mask.src_port;
681 		udp.val.dst_port &= udp.mask.dst_port;
682 	}
683 	flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
684 }
685 
686 /**
687  * Convert the @p item into a Verbs specification. This function assumes that
688  * the input is valid and that there is space to insert the requested item
689  * into the flow.
690  *
691  * @param[in, out] dev_flow
692  *   Pointer to dev_flow structure.
693  * @param[in] item
694  *   Item specification.
695  * @param[in] item_flags
696  *   Parsed item flags.
697  */
698 static void
699 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
700 				const struct rte_flow_item *item,
701 				uint64_t item_flags __rte_unused)
702 {
703 	const struct rte_flow_item_vxlan *spec = item->spec;
704 	const struct rte_flow_item_vxlan *mask = item->mask;
705 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
706 	struct ibv_flow_spec_tunnel vxlan = {
707 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
708 		.size = size,
709 	};
710 	union vni {
711 		uint32_t vlan_id;
712 		uint8_t vni[4];
713 	} id = { .vlan_id = 0, };
714 
715 	if (!mask)
716 		mask = &rte_flow_item_vxlan_mask;
717 	if (spec) {
718 		memcpy(&id.vni[1], spec->vni, 3);
719 		vxlan.val.tunnel_id = id.vlan_id;
720 		memcpy(&id.vni[1], mask->vni, 3);
721 		vxlan.mask.tunnel_id = id.vlan_id;
722 		/* Remove unwanted bits from values. */
723 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
724 	}
725 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
726 }
727 
728 /**
729  * Convert the @p item into a Verbs specification. This function assumes that
730  * the input is valid and that there is space to insert the requested item
731  * into the flow.
732  *
733  * @param[in, out] dev_flow
734  *   Pointer to dev_flow structure.
735  * @param[in] item
736  *   Item specification.
737  * @param[in] item_flags
738  *   Parsed item flags.
739  */
740 static void
741 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
742 				    const struct rte_flow_item *item,
743 				    uint64_t item_flags __rte_unused)
744 {
745 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
746 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
747 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
748 	struct ibv_flow_spec_tunnel vxlan_gpe = {
749 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
750 		.size = size,
751 	};
752 	union vni {
753 		uint32_t vlan_id;
754 		uint8_t vni[4];
755 	} id = { .vlan_id = 0, };
756 
757 	if (!mask)
758 		mask = &rte_flow_item_vxlan_gpe_mask;
759 	if (spec) {
760 		memcpy(&id.vni[1], spec->vni, 3);
761 		vxlan_gpe.val.tunnel_id = id.vlan_id;
762 		memcpy(&id.vni[1], mask->vni, 3);
763 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
764 		/* Remove unwanted bits from values. */
765 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
766 	}
767 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
768 }
769 
770 /**
771  * Update the protocol in Verbs IPv4/IPv6 spec.
772  *
773  * @param[in, out] attr
774  *   Pointer to Verbs attributes structure.
775  * @param[in] search
776  *   Specification type to search in order to update the IP protocol.
777  * @param[in] protocol
778  *   Protocol value to set if none is present in the specification.
779  */
780 static void
781 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
782 				       enum ibv_flow_spec_type search,
783 				       uint8_t protocol)
784 {
785 	unsigned int i;
786 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
787 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
788 
789 	if (!attr)
790 		return;
791 	for (i = 0; i != attr->num_of_specs; ++i) {
792 		if (hdr->type == search) {
793 			union {
794 				struct ibv_flow_spec_ipv4_ext *ipv4;
795 				struct ibv_flow_spec_ipv6 *ipv6;
796 			} ip;
797 
798 			switch (search) {
799 			case IBV_FLOW_SPEC_IPV4_EXT:
800 				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
801 				if (!ip.ipv4->val.proto) {
802 					ip.ipv4->val.proto = protocol;
803 					ip.ipv4->mask.proto = 0xff;
804 				}
805 				break;
806 			case IBV_FLOW_SPEC_IPV6:
807 				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
808 				if (!ip.ipv6->val.next_hdr) {
809 					ip.ipv6->val.next_hdr = protocol;
810 					ip.ipv6->mask.next_hdr = 0xff;
811 				}
812 				break;
813 			default:
814 				break;
815 			}
816 			break;
817 		}
818 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
819 	}
820 }
821 
822 /**
823  * Convert the @p item into a Verbs specification. This function assumes that
824  * the input is valid and that there is space to insert the requested item
825  * into the flow.
826  *
827  * @param[in, out] dev_flow
828  *   Pointer to dev_flow structure.
829  * @param[in] item
830  *   Item specification.
831  * @param[in] item_flags
832  *   Parsed item flags.
833  */
834 static void
835 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
836 			      const struct rte_flow_item *item __rte_unused,
837 			      uint64_t item_flags)
838 {
839 	struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
840 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
841 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
842 	struct ibv_flow_spec_tunnel tunnel = {
843 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
844 		.size = size,
845 	};
846 #else
847 	const struct rte_flow_item_gre *spec = item->spec;
848 	const struct rte_flow_item_gre *mask = item->mask;
849 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
850 	struct ibv_flow_spec_gre tunnel = {
851 		.type = IBV_FLOW_SPEC_GRE,
852 		.size = size,
853 	};
854 
855 	if (!mask)
856 		mask = &rte_flow_item_gre_mask;
857 	if (spec) {
858 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
859 		tunnel.val.protocol = spec->protocol;
860 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
861 		tunnel.mask.protocol = mask->protocol;
862 		/* Remove unwanted bits from values. */
863 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
864 		tunnel.val.protocol &= tunnel.mask.protocol;
865 		tunnel.val.key &= tunnel.mask.key;
866 	}
867 #endif
868 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
869 		flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
870 						       IBV_FLOW_SPEC_IPV4_EXT,
871 						       IPPROTO_GRE);
872 	else
873 		flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
874 						       IBV_FLOW_SPEC_IPV6,
875 						       IPPROTO_GRE);
876 	flow_verbs_spec_add(verbs, &tunnel, size);
877 }
878 
879 /**
880  * Convert the @p action into a Verbs specification. This function assumes that
881  * the input is valid and that there is space to insert the requested action
882  * into the flow. This function also return the action that was added.
883  *
884  * @param[in, out] dev_flow
885  *   Pointer to dev_flow structure.
886  * @param[in] item
887  *   Item specification.
888  * @param[in] item_flags
889  *   Parsed item flags.
890  */
891 static void
892 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
893 			       const struct rte_flow_item *item __rte_unused,
894 			       uint64_t item_flags __rte_unused)
895 {
896 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
897 	const struct rte_flow_item_mpls *spec = item->spec;
898 	const struct rte_flow_item_mpls *mask = item->mask;
899 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
900 	struct ibv_flow_spec_mpls mpls = {
901 		.type = IBV_FLOW_SPEC_MPLS,
902 		.size = size,
903 	};
904 
905 	if (!mask)
906 		mask = &rte_flow_item_mpls_mask;
907 	if (spec) {
908 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
909 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
910 		/* Remove unwanted bits from values.  */
911 		mpls.val.label &= mpls.mask.label;
912 	}
913 	flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
914 #endif
915 }
916 
917 /**
918  * Convert the @p action into a Verbs specification. This function assumes that
919  * the input is valid and that there is space to insert the requested action
920  * into the flow.
921  *
922  * @param[in] dev_flow
923  *   Pointer to mlx5_flow.
924  * @param[in] action
925  *   Action configuration.
926  */
927 static void
928 flow_verbs_translate_action_drop
929 	(struct mlx5_flow *dev_flow,
930 	 const struct rte_flow_action *action __rte_unused)
931 {
932 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
933 	struct ibv_flow_spec_action_drop drop = {
934 			.type = IBV_FLOW_SPEC_ACTION_DROP,
935 			.size = size,
936 	};
937 
938 	flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
939 }
940 
941 /**
942  * Convert the @p action into a Verbs specification. This function assumes that
943  * the input is valid and that there is space to insert the requested action
944  * into the flow.
945  *
946  * @param[in] rss_desc
947  *   Pointer to mlx5_flow_rss_desc.
948  * @param[in] action
949  *   Action configuration.
950  */
951 static void
952 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
953 				  const struct rte_flow_action *action)
954 {
955 	const struct rte_flow_action_queue *queue = action->conf;
956 
957 	rss_desc->queue[0] = queue->index;
958 	rss_desc->queue_num = 1;
959 }
960 
961 /**
962  * Convert the @p action into a Verbs specification. This function assumes that
963  * the input is valid and that there is space to insert the requested action
964  * into the flow.
965  *
966  * @param[in] rss_desc
967  *   Pointer to mlx5_flow_rss_desc.
968  * @param[in] action
969  *   Action configuration.
970  */
971 static void
972 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
973 				const struct rte_flow_action *action)
974 {
975 	const struct rte_flow_action_rss *rss = action->conf;
976 	const uint8_t *rss_key;
977 
978 	memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
979 	rss_desc->queue_num = rss->queue_num;
980 	/* NULL RSS key indicates default RSS key. */
981 	rss_key = !rss->key ? rss_hash_default_key : rss->key;
982 	memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
983 	/*
984 	 * rss->level and rss.types should be set in advance when expanding
985 	 * items for RSS.
986 	 */
987 }
988 
989 /**
990  * Convert the @p action into a Verbs specification. This function assumes that
991  * the input is valid and that there is space to insert the requested action
992  * into the flow.
993  *
994  * @param[in] dev_flow
995  *   Pointer to mlx5_flow.
996  * @param[in] action
997  *   Action configuration.
998  */
999 static void
1000 flow_verbs_translate_action_flag
1001 	(struct mlx5_flow *dev_flow,
1002 	 const struct rte_flow_action *action __rte_unused)
1003 {
1004 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1005 	struct ibv_flow_spec_action_tag tag = {
1006 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1007 		.size = size,
1008 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1009 	};
1010 
1011 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1012 }
1013 
1014 /**
1015  * Convert the @p action into a Verbs specification. This function assumes that
1016  * the input is valid and that there is space to insert the requested action
1017  * into the flow.
1018  *
1019  * @param[in] dev_flow
1020  *   Pointer to mlx5_flow.
1021  * @param[in] action
1022  *   Action configuration.
1023  */
1024 static void
1025 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1026 				 const struct rte_flow_action *action)
1027 {
1028 	const struct rte_flow_action_mark *mark = action->conf;
1029 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1030 	struct ibv_flow_spec_action_tag tag = {
1031 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1032 		.size = size,
1033 		.tag_id = mlx5_flow_mark_set(mark->id),
1034 	};
1035 
1036 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1037 }
1038 
1039 /**
1040  * Convert the @p action into a Verbs specification. This function assumes that
1041  * the input is valid and that there is space to insert the requested action
1042  * into the flow.
1043  *
1044  * @param[in] dev
1045  *   Pointer to the Ethernet device structure.
1046  * @param[in] action
1047  *   Action configuration.
1048  * @param[in] dev_flow
1049  *   Pointer to mlx5_flow.
1050  * @param[out] error
1051  *   Pointer to error structure.
1052  *
1053  * @return
1054  *   0 On success else a negative errno value is returned and rte_errno is set.
1055  */
1056 static int
1057 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1058 				  const struct rte_flow_action *action,
1059 				  struct rte_eth_dev *dev,
1060 				  struct rte_flow_error *error)
1061 {
1062 	const struct rte_flow_action_count *count = action->conf;
1063 	struct rte_flow *flow = dev_flow->flow;
1064 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1065 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1066 	struct mlx5_flow_counter_pool *pool;
1067 	struct mlx5_flow_counter *cnt = NULL;
1068 	struct mlx5_flow_counter_ext *cnt_ext;
1069 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1070 	struct ibv_flow_spec_counter_action counter = {
1071 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1072 		.size = size,
1073 	};
1074 #endif
1075 
1076 	if (!flow->counter) {
1077 		flow->counter = flow_verbs_counter_new(dev, count->shared,
1078 						       count->id);
1079 		if (!flow->counter)
1080 			return rte_flow_error_set(error, rte_errno,
1081 						  RTE_FLOW_ERROR_TYPE_ACTION,
1082 						  action,
1083 						  "cannot get counter"
1084 						  " context.");
1085 	}
1086 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1087 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1088 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1089 	counter.counter_set_handle = cnt_ext->cs->handle;
1090 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1091 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1092 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1093 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1094 	counter.counters = cnt_ext->cs;
1095 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1096 #endif
1097 	return 0;
1098 }
1099 
1100 /**
1101  * Internal validation function. For validating both actions and items.
1102  *
1103  * @param[in] dev
1104  *   Pointer to the Ethernet device structure.
1105  * @param[in] attr
1106  *   Pointer to the flow attributes.
1107  * @param[in] items
1108  *   Pointer to the list of items.
1109  * @param[in] actions
1110  *   Pointer to the list of actions.
1111  * @param[in] external
1112  *   This flow rule is created by request external to PMD.
1113  * @param[in] hairpin
1114  *   Number of hairpin TX actions, 0 means classic flow.
1115  * @param[out] error
1116  *   Pointer to the error structure.
1117  *
1118  * @return
1119  *   0 on success, a negative errno value otherwise and rte_errno is set.
1120  */
1121 static int
1122 flow_verbs_validate(struct rte_eth_dev *dev,
1123 		    const struct rte_flow_attr *attr,
1124 		    const struct rte_flow_item items[],
1125 		    const struct rte_flow_action actions[],
1126 		    bool external __rte_unused,
1127 		    int hairpin __rte_unused,
1128 		    struct rte_flow_error *error)
1129 {
1130 	int ret;
1131 	uint64_t action_flags = 0;
1132 	uint64_t item_flags = 0;
1133 	uint64_t last_item = 0;
1134 	uint8_t next_protocol = 0xff;
1135 	uint16_t ether_type = 0;
1136 
1137 	if (items == NULL)
1138 		return -1;
1139 	ret = mlx5_flow_validate_attributes(dev, attr, error);
1140 	if (ret < 0)
1141 		return ret;
1142 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1143 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1144 		int ret = 0;
1145 
1146 		switch (items->type) {
1147 		case RTE_FLOW_ITEM_TYPE_VOID:
1148 			break;
1149 		case RTE_FLOW_ITEM_TYPE_ETH:
1150 			ret = mlx5_flow_validate_item_eth(items, item_flags,
1151 							  error);
1152 			if (ret < 0)
1153 				return ret;
1154 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1155 					     MLX5_FLOW_LAYER_OUTER_L2;
1156 			if (items->mask != NULL && items->spec != NULL) {
1157 				ether_type =
1158 					((const struct rte_flow_item_eth *)
1159 					 items->spec)->type;
1160 				ether_type &=
1161 					((const struct rte_flow_item_eth *)
1162 					 items->mask)->type;
1163 				ether_type = rte_be_to_cpu_16(ether_type);
1164 			} else {
1165 				ether_type = 0;
1166 			}
1167 			break;
1168 		case RTE_FLOW_ITEM_TYPE_VLAN:
1169 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
1170 							   dev, error);
1171 			if (ret < 0)
1172 				return ret;
1173 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1174 					      MLX5_FLOW_LAYER_INNER_VLAN) :
1175 					     (MLX5_FLOW_LAYER_OUTER_L2 |
1176 					      MLX5_FLOW_LAYER_OUTER_VLAN);
1177 			if (items->mask != NULL && items->spec != NULL) {
1178 				ether_type =
1179 					((const struct rte_flow_item_vlan *)
1180 					 items->spec)->inner_type;
1181 				ether_type &=
1182 					((const struct rte_flow_item_vlan *)
1183 					 items->mask)->inner_type;
1184 				ether_type = rte_be_to_cpu_16(ether_type);
1185 			} else {
1186 				ether_type = 0;
1187 			}
1188 			break;
1189 		case RTE_FLOW_ITEM_TYPE_IPV4:
1190 			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1191 							   last_item,
1192 							   ether_type, NULL,
1193 							   error);
1194 			if (ret < 0)
1195 				return ret;
1196 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1197 					     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1198 			if (items->mask != NULL &&
1199 			    ((const struct rte_flow_item_ipv4 *)
1200 			     items->mask)->hdr.next_proto_id) {
1201 				next_protocol =
1202 					((const struct rte_flow_item_ipv4 *)
1203 					 (items->spec))->hdr.next_proto_id;
1204 				next_protocol &=
1205 					((const struct rte_flow_item_ipv4 *)
1206 					 (items->mask))->hdr.next_proto_id;
1207 			} else {
1208 				/* Reset for inner layer. */
1209 				next_protocol = 0xff;
1210 			}
1211 			break;
1212 		case RTE_FLOW_ITEM_TYPE_IPV6:
1213 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1214 							   last_item,
1215 							   ether_type, NULL,
1216 							   error);
1217 			if (ret < 0)
1218 				return ret;
1219 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1220 					     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1221 			if (items->mask != NULL &&
1222 			    ((const struct rte_flow_item_ipv6 *)
1223 			     items->mask)->hdr.proto) {
1224 				next_protocol =
1225 					((const struct rte_flow_item_ipv6 *)
1226 					 items->spec)->hdr.proto;
1227 				next_protocol &=
1228 					((const struct rte_flow_item_ipv6 *)
1229 					 items->mask)->hdr.proto;
1230 			} else {
1231 				/* Reset for inner layer. */
1232 				next_protocol = 0xff;
1233 			}
1234 			break;
1235 		case RTE_FLOW_ITEM_TYPE_UDP:
1236 			ret = mlx5_flow_validate_item_udp(items, item_flags,
1237 							  next_protocol,
1238 							  error);
1239 			if (ret < 0)
1240 				return ret;
1241 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1242 					     MLX5_FLOW_LAYER_OUTER_L4_UDP;
1243 			break;
1244 		case RTE_FLOW_ITEM_TYPE_TCP:
1245 			ret = mlx5_flow_validate_item_tcp
1246 						(items, item_flags,
1247 						 next_protocol,
1248 						 &rte_flow_item_tcp_mask,
1249 						 error);
1250 			if (ret < 0)
1251 				return ret;
1252 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1253 					     MLX5_FLOW_LAYER_OUTER_L4_TCP;
1254 			break;
1255 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1256 			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1257 							    error);
1258 			if (ret < 0)
1259 				return ret;
1260 			last_item = MLX5_FLOW_LAYER_VXLAN;
1261 			break;
1262 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1263 			ret = mlx5_flow_validate_item_vxlan_gpe(items,
1264 								item_flags,
1265 								dev, error);
1266 			if (ret < 0)
1267 				return ret;
1268 			last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1269 			break;
1270 		case RTE_FLOW_ITEM_TYPE_GRE:
1271 			ret = mlx5_flow_validate_item_gre(items, item_flags,
1272 							  next_protocol, error);
1273 			if (ret < 0)
1274 				return ret;
1275 			last_item = MLX5_FLOW_LAYER_GRE;
1276 			break;
1277 		case RTE_FLOW_ITEM_TYPE_MPLS:
1278 			ret = mlx5_flow_validate_item_mpls(dev, items,
1279 							   item_flags,
1280 							   last_item, error);
1281 			if (ret < 0)
1282 				return ret;
1283 			last_item = MLX5_FLOW_LAYER_MPLS;
1284 			break;
1285 		default:
1286 			return rte_flow_error_set(error, ENOTSUP,
1287 						  RTE_FLOW_ERROR_TYPE_ITEM,
1288 						  NULL, "item not supported");
1289 		}
1290 		item_flags |= last_item;
1291 	}
1292 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1293 		switch (actions->type) {
1294 		case RTE_FLOW_ACTION_TYPE_VOID:
1295 			break;
1296 		case RTE_FLOW_ACTION_TYPE_FLAG:
1297 			ret = mlx5_flow_validate_action_flag(action_flags,
1298 							     attr,
1299 							     error);
1300 			if (ret < 0)
1301 				return ret;
1302 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1303 			break;
1304 		case RTE_FLOW_ACTION_TYPE_MARK:
1305 			ret = mlx5_flow_validate_action_mark(actions,
1306 							     action_flags,
1307 							     attr,
1308 							     error);
1309 			if (ret < 0)
1310 				return ret;
1311 			action_flags |= MLX5_FLOW_ACTION_MARK;
1312 			break;
1313 		case RTE_FLOW_ACTION_TYPE_DROP:
1314 			ret = mlx5_flow_validate_action_drop(action_flags,
1315 							     attr,
1316 							     error);
1317 			if (ret < 0)
1318 				return ret;
1319 			action_flags |= MLX5_FLOW_ACTION_DROP;
1320 			break;
1321 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1322 			ret = mlx5_flow_validate_action_queue(actions,
1323 							      action_flags, dev,
1324 							      attr,
1325 							      error);
1326 			if (ret < 0)
1327 				return ret;
1328 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1329 			break;
1330 		case RTE_FLOW_ACTION_TYPE_RSS:
1331 			ret = mlx5_flow_validate_action_rss(actions,
1332 							    action_flags, dev,
1333 							    attr, item_flags,
1334 							    error);
1335 			if (ret < 0)
1336 				return ret;
1337 			action_flags |= MLX5_FLOW_ACTION_RSS;
1338 			break;
1339 		case RTE_FLOW_ACTION_TYPE_COUNT:
1340 			ret = mlx5_flow_validate_action_count(dev, attr, error);
1341 			if (ret < 0)
1342 				return ret;
1343 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1344 			break;
1345 		default:
1346 			return rte_flow_error_set(error, ENOTSUP,
1347 						  RTE_FLOW_ERROR_TYPE_ACTION,
1348 						  actions,
1349 						  "action not supported");
1350 		}
1351 	}
1352 	/*
1353 	 * Validate the drop action mutual exclusion with other actions.
1354 	 * Drop action is mutually-exclusive with any other action, except for
1355 	 * Count action.
1356 	 */
1357 	if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1358 	    (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1359 		return rte_flow_error_set(error, EINVAL,
1360 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1361 					  "Drop action is mutually-exclusive "
1362 					  "with any other action, except for "
1363 					  "Count action");
1364 	if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1365 		return rte_flow_error_set(error, EINVAL,
1366 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
1367 					  "no fate action is found");
1368 	return 0;
1369 }
1370 
1371 /**
1372  * Calculate the required bytes that are needed for the action part of the verbs
1373  * flow.
1374  *
1375  * @param[in] actions
1376  *   Pointer to the list of actions.
1377  *
1378  * @return
1379  *   The size of the memory needed for all actions.
1380  */
1381 static int
1382 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1383 {
1384 	int size = 0;
1385 
1386 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1387 		switch (actions->type) {
1388 		case RTE_FLOW_ACTION_TYPE_VOID:
1389 			break;
1390 		case RTE_FLOW_ACTION_TYPE_FLAG:
1391 			size += sizeof(struct ibv_flow_spec_action_tag);
1392 			break;
1393 		case RTE_FLOW_ACTION_TYPE_MARK:
1394 			size += sizeof(struct ibv_flow_spec_action_tag);
1395 			break;
1396 		case RTE_FLOW_ACTION_TYPE_DROP:
1397 			size += sizeof(struct ibv_flow_spec_action_drop);
1398 			break;
1399 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1400 			break;
1401 		case RTE_FLOW_ACTION_TYPE_RSS:
1402 			break;
1403 		case RTE_FLOW_ACTION_TYPE_COUNT:
1404 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1405 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1406 			size += sizeof(struct ibv_flow_spec_counter_action);
1407 #endif
1408 			break;
1409 		default:
1410 			break;
1411 		}
1412 	}
1413 	return size;
1414 }
1415 
1416 /**
1417  * Calculate the required bytes that are needed for the item part of the verbs
1418  * flow.
1419  *
1420  * @param[in] items
1421  *   Pointer to the list of items.
1422  *
1423  * @return
1424  *   The size of the memory needed for all items.
1425  */
1426 static int
1427 flow_verbs_get_items_size(const struct rte_flow_item items[])
1428 {
1429 	int size = 0;
1430 
1431 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1432 		switch (items->type) {
1433 		case RTE_FLOW_ITEM_TYPE_VOID:
1434 			break;
1435 		case RTE_FLOW_ITEM_TYPE_ETH:
1436 			size += sizeof(struct ibv_flow_spec_eth);
1437 			break;
1438 		case RTE_FLOW_ITEM_TYPE_VLAN:
1439 			size += sizeof(struct ibv_flow_spec_eth);
1440 			break;
1441 		case RTE_FLOW_ITEM_TYPE_IPV4:
1442 			size += sizeof(struct ibv_flow_spec_ipv4_ext);
1443 			break;
1444 		case RTE_FLOW_ITEM_TYPE_IPV6:
1445 			size += sizeof(struct ibv_flow_spec_ipv6);
1446 			break;
1447 		case RTE_FLOW_ITEM_TYPE_UDP:
1448 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1449 			break;
1450 		case RTE_FLOW_ITEM_TYPE_TCP:
1451 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1452 			break;
1453 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1454 			size += sizeof(struct ibv_flow_spec_tunnel);
1455 			break;
1456 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1457 			size += sizeof(struct ibv_flow_spec_tunnel);
1458 			break;
1459 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1460 		case RTE_FLOW_ITEM_TYPE_GRE:
1461 			size += sizeof(struct ibv_flow_spec_gre);
1462 			break;
1463 		case RTE_FLOW_ITEM_TYPE_MPLS:
1464 			size += sizeof(struct ibv_flow_spec_mpls);
1465 			break;
1466 #else
1467 		case RTE_FLOW_ITEM_TYPE_GRE:
1468 			size += sizeof(struct ibv_flow_spec_tunnel);
1469 			break;
1470 #endif
1471 		default:
1472 			break;
1473 		}
1474 	}
1475 	return size;
1476 }
1477 
1478 /**
1479  * Internal preparation function. Allocate mlx5_flow with the required size.
1480  * The required size is calculate based on the actions and items. This function
1481  * also returns the detected actions and items for later use.
1482  *
1483  * @param[in] dev
1484  *   Pointer to Ethernet device.
1485  * @param[in] attr
1486  *   Pointer to the flow attributes.
1487  * @param[in] items
1488  *   Pointer to the list of items.
1489  * @param[in] actions
1490  *   Pointer to the list of actions.
1491  * @param[out] error
1492  *   Pointer to the error structure.
1493  *
1494  * @return
1495  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1496  *   is set.
1497  */
1498 static struct mlx5_flow *
1499 flow_verbs_prepare(struct rte_eth_dev *dev,
1500 		   const struct rte_flow_attr *attr __rte_unused,
1501 		   const struct rte_flow_item items[],
1502 		   const struct rte_flow_action actions[],
1503 		   struct rte_flow_error *error)
1504 {
1505 	size_t size = 0;
1506 	uint32_t handle_idx = 0;
1507 	struct mlx5_flow *dev_flow;
1508 	struct mlx5_flow_handle *dev_handle;
1509 	struct mlx5_priv *priv = dev->data->dev_private;
1510 
1511 	size += flow_verbs_get_actions_size(actions);
1512 	size += flow_verbs_get_items_size(items);
1513 	if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1514 		rte_flow_error_set(error, E2BIG,
1515 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1516 				   "Verbs spec/action size too large");
1517 		return NULL;
1518 	}
1519 	/* In case of corrupting the memory. */
1520 	if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1521 		rte_flow_error_set(error, ENOSPC,
1522 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1523 				   "not free temporary device flow");
1524 		return NULL;
1525 	}
1526 	dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1527 				   &handle_idx);
1528 	if (!dev_handle) {
1529 		rte_flow_error_set(error, ENOMEM,
1530 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1531 				   "not enough memory to create flow handle");
1532 		return NULL;
1533 	}
1534 	/* No multi-thread supporting. */
1535 	dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1536 	dev_flow->handle = dev_handle;
1537 	dev_flow->handle_idx = handle_idx;
1538 	/* Memcpy is used, only size needs to be cleared to 0. */
1539 	dev_flow->verbs.size = 0;
1540 	dev_flow->verbs.attr.num_of_specs = 0;
1541 	dev_flow->ingress = attr->ingress;
1542 	dev_flow->hash_fields = 0;
1543 	/* Need to set transfer attribute: not supported in Verbs mode. */
1544 	return dev_flow;
1545 }
1546 
1547 /**
1548  * Fill the flow with verb spec.
1549  *
1550  * @param[in] dev
1551  *   Pointer to Ethernet device.
1552  * @param[in, out] dev_flow
1553  *   Pointer to the mlx5 flow.
1554  * @param[in] attr
1555  *   Pointer to the flow attributes.
1556  * @param[in] items
1557  *   Pointer to the list of items.
1558  * @param[in] actions
1559  *   Pointer to the list of actions.
1560  * @param[out] error
1561  *   Pointer to the error structure.
1562  *
1563  * @return
1564  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1565  */
1566 static int
1567 flow_verbs_translate(struct rte_eth_dev *dev,
1568 		     struct mlx5_flow *dev_flow,
1569 		     const struct rte_flow_attr *attr,
1570 		     const struct rte_flow_item items[],
1571 		     const struct rte_flow_action actions[],
1572 		     struct rte_flow_error *error)
1573 {
1574 	uint64_t item_flags = 0;
1575 	uint64_t action_flags = 0;
1576 	uint64_t priority = attr->priority;
1577 	uint32_t subpriority = 0;
1578 	struct mlx5_priv *priv = dev->data->dev_private;
1579 	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1580 					      priv->rss_desc)
1581 					      [!!priv->flow_nested_idx];
1582 
1583 	if (priority == MLX5_FLOW_PRIO_RSVD)
1584 		priority = priv->config.flow_prio - 1;
1585 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1586 		int ret;
1587 
1588 		switch (actions->type) {
1589 		case RTE_FLOW_ACTION_TYPE_VOID:
1590 			break;
1591 		case RTE_FLOW_ACTION_TYPE_FLAG:
1592 			flow_verbs_translate_action_flag(dev_flow, actions);
1593 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1594 			dev_flow->handle->mark = 1;
1595 			break;
1596 		case RTE_FLOW_ACTION_TYPE_MARK:
1597 			flow_verbs_translate_action_mark(dev_flow, actions);
1598 			action_flags |= MLX5_FLOW_ACTION_MARK;
1599 			dev_flow->handle->mark = 1;
1600 			break;
1601 		case RTE_FLOW_ACTION_TYPE_DROP:
1602 			flow_verbs_translate_action_drop(dev_flow, actions);
1603 			action_flags |= MLX5_FLOW_ACTION_DROP;
1604 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1605 			break;
1606 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1607 			flow_verbs_translate_action_queue(rss_desc, actions);
1608 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1609 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1610 			break;
1611 		case RTE_FLOW_ACTION_TYPE_RSS:
1612 			flow_verbs_translate_action_rss(rss_desc, actions);
1613 			action_flags |= MLX5_FLOW_ACTION_RSS;
1614 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1615 			break;
1616 		case RTE_FLOW_ACTION_TYPE_COUNT:
1617 			ret = flow_verbs_translate_action_count(dev_flow,
1618 								actions,
1619 								dev, error);
1620 			if (ret < 0)
1621 				return ret;
1622 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1623 			break;
1624 		default:
1625 			return rte_flow_error_set(error, ENOTSUP,
1626 						  RTE_FLOW_ERROR_TYPE_ACTION,
1627 						  actions,
1628 						  "action not supported");
1629 		}
1630 	}
1631 	dev_flow->act_flags = action_flags;
1632 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1633 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1634 
1635 		switch (items->type) {
1636 		case RTE_FLOW_ITEM_TYPE_VOID:
1637 			break;
1638 		case RTE_FLOW_ITEM_TYPE_ETH:
1639 			flow_verbs_translate_item_eth(dev_flow, items,
1640 						      item_flags);
1641 			subpriority = MLX5_PRIORITY_MAP_L2;
1642 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1643 					       MLX5_FLOW_LAYER_OUTER_L2;
1644 			break;
1645 		case RTE_FLOW_ITEM_TYPE_VLAN:
1646 			flow_verbs_translate_item_vlan(dev_flow, items,
1647 						       item_flags);
1648 			subpriority = MLX5_PRIORITY_MAP_L2;
1649 			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1650 						MLX5_FLOW_LAYER_INNER_VLAN) :
1651 					       (MLX5_FLOW_LAYER_OUTER_L2 |
1652 						MLX5_FLOW_LAYER_OUTER_VLAN);
1653 			break;
1654 		case RTE_FLOW_ITEM_TYPE_IPV4:
1655 			flow_verbs_translate_item_ipv4(dev_flow, items,
1656 						       item_flags);
1657 			subpriority = MLX5_PRIORITY_MAP_L3;
1658 			dev_flow->hash_fields |=
1659 				mlx5_flow_hashfields_adjust
1660 					(rss_desc, tunnel,
1661 					 MLX5_IPV4_LAYER_TYPES,
1662 					 MLX5_IPV4_IBV_RX_HASH);
1663 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1664 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1665 			break;
1666 		case RTE_FLOW_ITEM_TYPE_IPV6:
1667 			flow_verbs_translate_item_ipv6(dev_flow, items,
1668 						       item_flags);
1669 			subpriority = MLX5_PRIORITY_MAP_L3;
1670 			dev_flow->hash_fields |=
1671 				mlx5_flow_hashfields_adjust
1672 					(rss_desc, tunnel,
1673 					 MLX5_IPV6_LAYER_TYPES,
1674 					 MLX5_IPV6_IBV_RX_HASH);
1675 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1676 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1677 			break;
1678 		case RTE_FLOW_ITEM_TYPE_TCP:
1679 			flow_verbs_translate_item_tcp(dev_flow, items,
1680 						      item_flags);
1681 			subpriority = MLX5_PRIORITY_MAP_L4;
1682 			dev_flow->hash_fields |=
1683 				mlx5_flow_hashfields_adjust
1684 					(rss_desc, tunnel, ETH_RSS_TCP,
1685 					 (IBV_RX_HASH_SRC_PORT_TCP |
1686 					  IBV_RX_HASH_DST_PORT_TCP));
1687 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1688 					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
1689 			break;
1690 		case RTE_FLOW_ITEM_TYPE_UDP:
1691 			flow_verbs_translate_item_udp(dev_flow, items,
1692 						      item_flags);
1693 			subpriority = MLX5_PRIORITY_MAP_L4;
1694 			dev_flow->hash_fields |=
1695 				mlx5_flow_hashfields_adjust
1696 					(rss_desc, tunnel, ETH_RSS_UDP,
1697 					 (IBV_RX_HASH_SRC_PORT_UDP |
1698 					  IBV_RX_HASH_DST_PORT_UDP));
1699 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1700 					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
1701 			break;
1702 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1703 			flow_verbs_translate_item_vxlan(dev_flow, items,
1704 							item_flags);
1705 			subpriority = MLX5_PRIORITY_MAP_L2;
1706 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
1707 			break;
1708 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1709 			flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1710 							    item_flags);
1711 			subpriority = MLX5_PRIORITY_MAP_L2;
1712 			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1713 			break;
1714 		case RTE_FLOW_ITEM_TYPE_GRE:
1715 			flow_verbs_translate_item_gre(dev_flow, items,
1716 						      item_flags);
1717 			subpriority = MLX5_PRIORITY_MAP_L2;
1718 			item_flags |= MLX5_FLOW_LAYER_GRE;
1719 			break;
1720 		case RTE_FLOW_ITEM_TYPE_MPLS:
1721 			flow_verbs_translate_item_mpls(dev_flow, items,
1722 						       item_flags);
1723 			subpriority = MLX5_PRIORITY_MAP_L2;
1724 			item_flags |= MLX5_FLOW_LAYER_MPLS;
1725 			break;
1726 		default:
1727 			return rte_flow_error_set(error, ENOTSUP,
1728 						  RTE_FLOW_ERROR_TYPE_ITEM,
1729 						  NULL,
1730 						  "item not supported");
1731 		}
1732 	}
1733 	dev_flow->handle->layers = item_flags;
1734 	/* Other members of attr will be ignored. */
1735 	dev_flow->verbs.attr.priority =
1736 		mlx5_flow_adjust_priority(dev, priority, subpriority);
1737 	dev_flow->verbs.attr.port = (uint8_t)priv->ibv_port;
1738 	return 0;
1739 }
1740 
1741 /**
1742  * Remove the flow from the NIC but keeps it in memory.
1743  *
1744  * @param[in] dev
1745  *   Pointer to the Ethernet device structure.
1746  * @param[in, out] flow
1747  *   Pointer to flow structure.
1748  */
1749 static void
1750 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1751 {
1752 	struct mlx5_priv *priv = dev->data->dev_private;
1753 	struct mlx5_flow_handle *handle;
1754 	uint32_t handle_idx;
1755 
1756 	if (!flow)
1757 		return;
1758 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1759 		       handle_idx, handle, next) {
1760 		if (handle->ib_flow) {
1761 			claim_zero(mlx5_glue->destroy_flow(handle->ib_flow));
1762 			handle->ib_flow = NULL;
1763 		}
1764 		/* hrxq is union, don't touch it only the flag is set. */
1765 		if (handle->rix_hrxq) {
1766 			if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1767 				mlx5_hrxq_drop_release(dev);
1768 				handle->rix_hrxq = 0;
1769 			} else if (handle->fate_action ==
1770 				   MLX5_FLOW_FATE_QUEUE) {
1771 				mlx5_hrxq_release(dev, handle->rix_hrxq);
1772 				handle->rix_hrxq = 0;
1773 			}
1774 		}
1775 		if (handle->vf_vlan.tag && handle->vf_vlan.created)
1776 			mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1777 	}
1778 }
1779 
1780 /**
1781  * Remove the flow from the NIC and the memory.
1782  *
1783  * @param[in] dev
1784  *   Pointer to the Ethernet device structure.
1785  * @param[in, out] flow
1786  *   Pointer to flow structure.
1787  */
1788 static void
1789 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1790 {
1791 	struct mlx5_priv *priv = dev->data->dev_private;
1792 	struct mlx5_flow_handle *handle;
1793 
1794 	if (!flow)
1795 		return;
1796 	flow_verbs_remove(dev, flow);
1797 	while (flow->dev_handles) {
1798 		uint32_t tmp_idx = flow->dev_handles;
1799 
1800 		handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1801 				   tmp_idx);
1802 		if (!handle)
1803 			return;
1804 		flow->dev_handles = handle->next.next;
1805 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1806 			   tmp_idx);
1807 	}
1808 	if (flow->counter) {
1809 		flow_verbs_counter_release(dev, flow->counter);
1810 		flow->counter = 0;
1811 	}
1812 }
1813 
1814 /**
1815  * Apply the flow to the NIC.
1816  *
1817  * @param[in] dev
1818  *   Pointer to the Ethernet device structure.
1819  * @param[in, out] flow
1820  *   Pointer to flow structure.
1821  * @param[out] error
1822  *   Pointer to error structure.
1823  *
1824  * @return
1825  *   0 on success, a negative errno value otherwise and rte_errno is set.
1826  */
1827 static int
1828 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1829 		 struct rte_flow_error *error)
1830 {
1831 	struct mlx5_priv *priv = dev->data->dev_private;
1832 	struct mlx5_flow_handle *handle;
1833 	struct mlx5_flow *dev_flow;
1834 	struct mlx5_hrxq *hrxq;
1835 	uint32_t dev_handles;
1836 	int err;
1837 	int idx;
1838 
1839 	for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1840 		dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1841 		handle = dev_flow->handle;
1842 		if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1843 			hrxq = mlx5_hrxq_drop_new(dev);
1844 			if (!hrxq) {
1845 				rte_flow_error_set
1846 					(error, errno,
1847 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1848 					 "cannot get drop hash queue");
1849 				goto error;
1850 			}
1851 		} else {
1852 			uint32_t hrxq_idx;
1853 			struct mlx5_flow_rss_desc *rss_desc =
1854 				&((struct mlx5_flow_rss_desc *)priv->rss_desc)
1855 				[!!priv->flow_nested_idx];
1856 
1857 			MLX5_ASSERT(rss_desc->queue_num);
1858 			hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1859 					     MLX5_RSS_HASH_KEY_LEN,
1860 					     dev_flow->hash_fields,
1861 					     rss_desc->queue,
1862 					     rss_desc->queue_num);
1863 			if (!hrxq_idx)
1864 				hrxq_idx = mlx5_hrxq_new(dev, rss_desc->key,
1865 						MLX5_RSS_HASH_KEY_LEN,
1866 						dev_flow->hash_fields,
1867 						rss_desc->queue,
1868 						rss_desc->queue_num,
1869 						!!(handle->layers &
1870 						MLX5_FLOW_LAYER_TUNNEL));
1871 			hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1872 					 hrxq_idx);
1873 			if (!hrxq) {
1874 				rte_flow_error_set
1875 					(error, rte_errno,
1876 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1877 					 "cannot get hash queue");
1878 				goto error;
1879 			}
1880 			handle->rix_hrxq = hrxq_idx;
1881 		}
1882 		MLX5_ASSERT(hrxq);
1883 		handle->ib_flow = mlx5_glue->create_flow(hrxq->qp,
1884 						     &dev_flow->verbs.attr);
1885 		if (!handle->ib_flow) {
1886 			rte_flow_error_set(error, errno,
1887 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1888 					   NULL,
1889 					   "hardware refuses to create flow");
1890 			goto error;
1891 		}
1892 		if (priv->vmwa_context &&
1893 		    handle->vf_vlan.tag && !handle->vf_vlan.created) {
1894 			/*
1895 			 * The rule contains the VLAN pattern.
1896 			 * For VF we are going to create VLAN
1897 			 * interface to make hypervisor set correct
1898 			 * e-Switch vport context.
1899 			 */
1900 			mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
1901 		}
1902 	}
1903 	return 0;
1904 error:
1905 	err = rte_errno; /* Save rte_errno before cleanup. */
1906 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1907 		       dev_handles, handle, next) {
1908 		/* hrxq is union, don't touch it only the flag is set. */
1909 		if (handle->rix_hrxq) {
1910 			if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1911 				mlx5_hrxq_drop_release(dev);
1912 				handle->rix_hrxq = 0;
1913 			} else if (handle->fate_action ==
1914 				   MLX5_FLOW_FATE_QUEUE) {
1915 				mlx5_hrxq_release(dev, handle->rix_hrxq);
1916 				handle->rix_hrxq = 0;
1917 			}
1918 		}
1919 		if (handle->vf_vlan.tag && handle->vf_vlan.created)
1920 			mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1921 	}
1922 	rte_errno = err; /* Restore rte_errno. */
1923 	return -rte_errno;
1924 }
1925 
1926 /**
1927  * Query a flow.
1928  *
1929  * @see rte_flow_query()
1930  * @see rte_flow_ops
1931  */
1932 static int
1933 flow_verbs_query(struct rte_eth_dev *dev,
1934 		 struct rte_flow *flow,
1935 		 const struct rte_flow_action *actions,
1936 		 void *data,
1937 		 struct rte_flow_error *error)
1938 {
1939 	int ret = -EINVAL;
1940 
1941 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1942 		switch (actions->type) {
1943 		case RTE_FLOW_ACTION_TYPE_VOID:
1944 			break;
1945 		case RTE_FLOW_ACTION_TYPE_COUNT:
1946 			ret = flow_verbs_counter_query(dev, flow, data, error);
1947 			break;
1948 		default:
1949 			return rte_flow_error_set(error, ENOTSUP,
1950 						  RTE_FLOW_ERROR_TYPE_ACTION,
1951 						  actions,
1952 						  "action not supported");
1953 		}
1954 	}
1955 	return ret;
1956 }
1957 
1958 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1959 	.validate = flow_verbs_validate,
1960 	.prepare = flow_verbs_prepare,
1961 	.translate = flow_verbs_translate,
1962 	.apply = flow_verbs_apply,
1963 	.remove = flow_verbs_remove,
1964 	.destroy = flow_verbs_destroy,
1965 	.query = flow_verbs_query,
1966 };
1967