xref: /dpdk/drivers/net/mlx5/mlx5_flow_verbs.c (revision cb440babbd45a80c059f8bc80e87c48d09086fd7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4 
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20 
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include <mlx5_glue.h>
30 #include <mlx5_prm.h>
31 
32 #include "mlx5_defs.h"
33 #include "mlx5.h"
34 #include "mlx5_flow.h"
35 #include "mlx5_rxtx.h"
36 
37 #define VERBS_SPEC_INNER(item_flags) \
38 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
39 
40 /**
41  * Get Verbs flow counter by index.
42  *
43  * @param[in] dev
44  *   Pointer to the Ethernet device structure.
45  * @param[in] idx
46  *   mlx5 flow counter index in the container.
47  * @param[out] ppool
48  *   mlx5 flow counter pool in the container,
49  *
50  * @return
51  *   A pointer to the counter, NULL otherwise.
52  */
53 static struct mlx5_flow_counter *
54 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
55 			      uint32_t idx,
56 			      struct mlx5_flow_counter_pool **ppool)
57 {
58 	struct mlx5_priv *priv = dev->data->dev_private;
59 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
60 	struct mlx5_flow_counter_pool *pool;
61 
62 	idx--;
63 	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
64 	MLX5_ASSERT(pool);
65 	if (ppool)
66 		*ppool = pool;
67 	return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
68 }
69 
70 /**
71  * Create Verbs flow counter with Verbs library.
72  *
73  * @param[in] dev
74  *   Pointer to the Ethernet device structure.
75  * @param[in, out] counter
76  *   mlx5 flow counter object, contains the counter id,
77  *   handle of created Verbs flow counter is returned
78  *   in cs field (if counters are supported).
79  *
80  * @return
81  *   0 On success else a negative errno value is returned
82  *   and rte_errno is set.
83  */
84 static int
85 flow_verbs_counter_create(struct rte_eth_dev *dev,
86 			  struct mlx5_flow_counter_ext *counter)
87 {
88 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
89 	struct mlx5_priv *priv = dev->data->dev_private;
90 	struct ibv_context *ctx = priv->sh->ctx;
91 	struct ibv_counter_set_init_attr init = {
92 			 .counter_set_id = counter->id};
93 
94 	counter->cs = mlx5_glue->create_counter_set(ctx, &init);
95 	if (!counter->cs) {
96 		rte_errno = ENOTSUP;
97 		return -ENOTSUP;
98 	}
99 	return 0;
100 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
101 	struct mlx5_priv *priv = dev->data->dev_private;
102 	struct ibv_context *ctx = priv->sh->ctx;
103 	struct ibv_counters_init_attr init = {0};
104 	struct ibv_counter_attach_attr attach;
105 	int ret;
106 
107 	memset(&attach, 0, sizeof(attach));
108 	counter->cs = mlx5_glue->create_counters(ctx, &init);
109 	if (!counter->cs) {
110 		rte_errno = ENOTSUP;
111 		return -ENOTSUP;
112 	}
113 	attach.counter_desc = IBV_COUNTER_PACKETS;
114 	attach.index = 0;
115 	ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
116 	if (!ret) {
117 		attach.counter_desc = IBV_COUNTER_BYTES;
118 		attach.index = 1;
119 		ret = mlx5_glue->attach_counters
120 					(counter->cs, &attach, NULL);
121 	}
122 	if (ret) {
123 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
124 		counter->cs = NULL;
125 		rte_errno = ret;
126 		return -ret;
127 	}
128 	return 0;
129 #else
130 	(void)dev;
131 	(void)counter;
132 	rte_errno = ENOTSUP;
133 	return -ENOTSUP;
134 #endif
135 }
136 
137 /**
138  * Get a flow counter.
139  *
140  * @param[in] dev
141  *   Pointer to the Ethernet device structure.
142  * @param[in] shared
143  *   Indicate if this counter is shared with other flows.
144  * @param[in] id
145  *   Counter identifier.
146  *
147  * @return
148  *   Index to the counter, 0 otherwise and rte_errno is set.
149  */
150 static uint32_t
151 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
152 {
153 	struct mlx5_priv *priv = dev->data->dev_private;
154 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
155 	struct mlx5_flow_counter_pool *pool = NULL;
156 	struct mlx5_flow_counter_ext *cnt_ext = NULL;
157 	struct mlx5_flow_counter *cnt = NULL;
158 	uint32_t n_valid = rte_atomic16_read(&cont->n_valid);
159 	uint32_t pool_idx;
160 	uint32_t i;
161 	int ret;
162 
163 	if (shared) {
164 		for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
165 			pool = cont->pools[pool_idx];
166 			for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
167 				cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
168 				if (cnt_ext->shared && cnt_ext->id == id) {
169 					cnt_ext->ref_cnt++;
170 					return MLX5_MAKE_CNT_IDX(pool_idx, i);
171 				}
172 			}
173 		}
174 	}
175 	for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
176 		pool = cont->pools[pool_idx];
177 		if (!pool)
178 			continue;
179 		cnt = TAILQ_FIRST(&pool->counters[0]);
180 		if (cnt)
181 			break;
182 	}
183 	if (!cnt) {
184 		struct mlx5_flow_counter_pool **pools;
185 		uint32_t size;
186 
187 		if (n_valid == cont->n) {
188 			/* Resize the container pool array. */
189 			size = sizeof(struct mlx5_flow_counter_pool *) *
190 				     (n_valid + MLX5_CNT_CONTAINER_RESIZE);
191 			pools = rte_zmalloc(__func__, size, 0);
192 			if (!pools)
193 				return 0;
194 			if (n_valid) {
195 				memcpy(pools, cont->pools,
196 				       sizeof(struct mlx5_flow_counter_pool *) *
197 				       n_valid);
198 				rte_free(cont->pools);
199 			}
200 			cont->pools = pools;
201 			cont->n += MLX5_CNT_CONTAINER_RESIZE;
202 		}
203 		/* Allocate memory for new pool*/
204 		size = sizeof(*pool) + (sizeof(*cnt_ext) + sizeof(*cnt)) *
205 		       MLX5_COUNTERS_PER_POOL;
206 		pool = rte_calloc(__func__, 1, size, 0);
207 		if (!pool)
208 			return 0;
209 		pool->type |= CNT_POOL_TYPE_EXT;
210 		for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
211 			cnt = MLX5_POOL_GET_CNT(pool, i);
212 			TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
213 		}
214 		cnt = MLX5_POOL_GET_CNT(pool, 0);
215 		cont->pools[n_valid] = pool;
216 		pool_idx = n_valid;
217 		rte_atomic16_add(&cont->n_valid, 1);
218 		TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
219 	}
220 	i = MLX5_CNT_ARRAY_IDX(pool, cnt);
221 	cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
222 	cnt_ext->id = id;
223 	cnt_ext->shared = shared;
224 	cnt_ext->ref_cnt = 1;
225 	cnt->hits = 0;
226 	cnt->bytes = 0;
227 	/* Create counter with Verbs. */
228 	ret = flow_verbs_counter_create(dev, cnt_ext);
229 	if (!ret) {
230 		TAILQ_REMOVE(&pool->counters[0], cnt, next);
231 		return MLX5_MAKE_CNT_IDX(pool_idx, i);
232 	}
233 	/* Some error occurred in Verbs library. */
234 	rte_errno = -ret;
235 	return 0;
236 }
237 
238 /**
239  * Release a flow counter.
240  *
241  * @param[in] dev
242  *   Pointer to the Ethernet device structure.
243  * @param[in] counter
244  *   Index to the counter handler.
245  */
246 static void
247 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
248 {
249 	struct mlx5_flow_counter_pool *pool;
250 	struct mlx5_flow_counter *cnt;
251 	struct mlx5_flow_counter_ext *cnt_ext;
252 
253 	cnt = flow_verbs_counter_get_by_idx(dev, counter,
254 					    &pool);
255 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
256 	if (--cnt_ext->ref_cnt == 0) {
257 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
258 		claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
259 		cnt_ext->cs = NULL;
260 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
261 		claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
262 		cnt_ext->cs = NULL;
263 #endif
264 		TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
265 	}
266 }
267 
268 /**
269  * Query a flow counter via Verbs library call.
270  *
271  * @see rte_flow_query()
272  * @see rte_flow_ops
273  */
274 static int
275 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
276 			 struct rte_flow *flow, void *data,
277 			 struct rte_flow_error *error)
278 {
279 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
280 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
281 	if (flow->counter) {
282 		struct mlx5_flow_counter_pool *pool;
283 		struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
284 						(dev, flow->counter, &pool);
285 		struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
286 						(pool, cnt);
287 		struct rte_flow_query_count *qc = data;
288 		uint64_t counters[2] = {0, 0};
289 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
290 		struct ibv_query_counter_set_attr query_cs_attr = {
291 			.cs = cnt_ext->cs,
292 			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
293 		};
294 		struct ibv_counter_set_data query_out = {
295 			.out = counters,
296 			.outlen = 2 * sizeof(uint64_t),
297 		};
298 		int err = mlx5_glue->query_counter_set(&query_cs_attr,
299 						       &query_out);
300 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
301 		int err = mlx5_glue->query_counters
302 			       (cnt_ext->cs, counters,
303 				RTE_DIM(counters),
304 				IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
305 #endif
306 		if (err)
307 			return rte_flow_error_set
308 				(error, err,
309 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
310 				 NULL,
311 				 "cannot read counter");
312 		qc->hits_set = 1;
313 		qc->bytes_set = 1;
314 		qc->hits = counters[0] - cnt->hits;
315 		qc->bytes = counters[1] - cnt->bytes;
316 		if (qc->reset) {
317 			cnt->hits = counters[0];
318 			cnt->bytes = counters[1];
319 		}
320 		return 0;
321 	}
322 	return rte_flow_error_set(error, EINVAL,
323 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
324 				  NULL,
325 				  "flow does not have counter");
326 #else
327 	(void)flow;
328 	(void)data;
329 	return rte_flow_error_set(error, ENOTSUP,
330 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
331 				  NULL,
332 				  "counters are not available");
333 #endif
334 }
335 
336 /**
337  * Add a verbs item specification into @p verbs.
338  *
339  * @param[out] verbs
340  *   Pointer to verbs structure.
341  * @param[in] src
342  *   Create specification.
343  * @param[in] size
344  *   Size in bytes of the specification to copy.
345  */
346 static void
347 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
348 		    void *src, unsigned int size)
349 {
350 	void *dst;
351 
352 	if (!verbs)
353 		return;
354 	MLX5_ASSERT(verbs->specs);
355 	dst = (void *)(verbs->specs + verbs->size);
356 	memcpy(dst, src, size);
357 	++verbs->attr.num_of_specs;
358 	verbs->size += size;
359 }
360 
361 /**
362  * Convert the @p item into a Verbs specification. This function assumes that
363  * the input is valid and that there is space to insert the requested item
364  * into the flow.
365  *
366  * @param[in, out] dev_flow
367  *   Pointer to dev_flow structure.
368  * @param[in] item
369  *   Item specification.
370  * @param[in] item_flags
371  *   Parsed item flags.
372  */
373 static void
374 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
375 			      const struct rte_flow_item *item,
376 			      uint64_t item_flags)
377 {
378 	const struct rte_flow_item_eth *spec = item->spec;
379 	const struct rte_flow_item_eth *mask = item->mask;
380 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
381 	struct ibv_flow_spec_eth eth = {
382 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
383 		.size = size,
384 	};
385 
386 	if (!mask)
387 		mask = &rte_flow_item_eth_mask;
388 	if (spec) {
389 		unsigned int i;
390 
391 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
392 			RTE_ETHER_ADDR_LEN);
393 		memcpy(&eth.val.src_mac, spec->src.addr_bytes,
394 			RTE_ETHER_ADDR_LEN);
395 		eth.val.ether_type = spec->type;
396 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
397 			RTE_ETHER_ADDR_LEN);
398 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
399 			RTE_ETHER_ADDR_LEN);
400 		eth.mask.ether_type = mask->type;
401 		/* Remove unwanted bits from values. */
402 		for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
403 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
404 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
405 		}
406 		eth.val.ether_type &= eth.mask.ether_type;
407 	}
408 	flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
409 }
410 
411 /**
412  * Update the VLAN tag in the Verbs Ethernet specification.
413  * This function assumes that the input is valid and there is space to add
414  * the requested item.
415  *
416  * @param[in, out] attr
417  *   Pointer to Verbs attributes structure.
418  * @param[in] eth
419  *   Verbs structure containing the VLAN information to copy.
420  */
421 static void
422 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
423 			    struct ibv_flow_spec_eth *eth)
424 {
425 	unsigned int i;
426 	const enum ibv_flow_spec_type search = eth->type;
427 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
428 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
429 
430 	for (i = 0; i != attr->num_of_specs; ++i) {
431 		if (hdr->type == search) {
432 			struct ibv_flow_spec_eth *e =
433 				(struct ibv_flow_spec_eth *)hdr;
434 
435 			e->val.vlan_tag = eth->val.vlan_tag;
436 			e->mask.vlan_tag = eth->mask.vlan_tag;
437 			e->val.ether_type = eth->val.ether_type;
438 			e->mask.ether_type = eth->mask.ether_type;
439 			break;
440 		}
441 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
442 	}
443 }
444 
445 /**
446  * Convert the @p item into a Verbs specification. This function assumes that
447  * the input is valid and that there is space to insert the requested item
448  * into the flow.
449  *
450  * @param[in, out] dev_flow
451  *   Pointer to dev_flow structure.
452  * @param[in] item
453  *   Item specification.
454  * @param[in] item_flags
455  *   Parsed item flags.
456  */
457 static void
458 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
459 			       const struct rte_flow_item *item,
460 			       uint64_t item_flags)
461 {
462 	const struct rte_flow_item_vlan *spec = item->spec;
463 	const struct rte_flow_item_vlan *mask = item->mask;
464 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
465 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
466 	struct ibv_flow_spec_eth eth = {
467 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
468 		.size = size,
469 	};
470 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
471 				      MLX5_FLOW_LAYER_OUTER_L2;
472 
473 	if (!mask)
474 		mask = &rte_flow_item_vlan_mask;
475 	if (spec) {
476 		eth.val.vlan_tag = spec->tci;
477 		eth.mask.vlan_tag = mask->tci;
478 		eth.val.vlan_tag &= eth.mask.vlan_tag;
479 		eth.val.ether_type = spec->inner_type;
480 		eth.mask.ether_type = mask->inner_type;
481 		eth.val.ether_type &= eth.mask.ether_type;
482 	}
483 	if (!(item_flags & l2m))
484 		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
485 	else
486 		flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
487 	if (!tunnel)
488 		dev_flow->handle->vf_vlan.tag =
489 			rte_be_to_cpu_16(spec->tci) & 0x0fff;
490 }
491 
492 /**
493  * Convert the @p item into a Verbs specification. This function assumes that
494  * the input is valid and that there is space to insert the requested item
495  * into the flow.
496  *
497  * @param[in, out] dev_flow
498  *   Pointer to dev_flow structure.
499  * @param[in] item
500  *   Item specification.
501  * @param[in] item_flags
502  *   Parsed item flags.
503  */
504 static void
505 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
506 			       const struct rte_flow_item *item,
507 			       uint64_t item_flags)
508 {
509 	const struct rte_flow_item_ipv4 *spec = item->spec;
510 	const struct rte_flow_item_ipv4 *mask = item->mask;
511 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
512 	struct ibv_flow_spec_ipv4_ext ipv4 = {
513 		.type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
514 		.size = size,
515 	};
516 
517 	if (!mask)
518 		mask = &rte_flow_item_ipv4_mask;
519 	if (spec) {
520 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
521 			.src_ip = spec->hdr.src_addr,
522 			.dst_ip = spec->hdr.dst_addr,
523 			.proto = spec->hdr.next_proto_id,
524 			.tos = spec->hdr.type_of_service,
525 		};
526 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
527 			.src_ip = mask->hdr.src_addr,
528 			.dst_ip = mask->hdr.dst_addr,
529 			.proto = mask->hdr.next_proto_id,
530 			.tos = mask->hdr.type_of_service,
531 		};
532 		/* Remove unwanted bits from values. */
533 		ipv4.val.src_ip &= ipv4.mask.src_ip;
534 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
535 		ipv4.val.proto &= ipv4.mask.proto;
536 		ipv4.val.tos &= ipv4.mask.tos;
537 	}
538 	flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
539 }
540 
541 /**
542  * Convert the @p item into a Verbs specification. This function assumes that
543  * the input is valid and that there is space to insert the requested item
544  * into the flow.
545  *
546  * @param[in, out] dev_flow
547  *   Pointer to dev_flow structure.
548  * @param[in] item
549  *   Item specification.
550  * @param[in] item_flags
551  *   Parsed item flags.
552  */
553 static void
554 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
555 			       const struct rte_flow_item *item,
556 			       uint64_t item_flags)
557 {
558 	const struct rte_flow_item_ipv6 *spec = item->spec;
559 	const struct rte_flow_item_ipv6 *mask = item->mask;
560 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
561 	struct ibv_flow_spec_ipv6 ipv6 = {
562 		.type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
563 		.size = size,
564 	};
565 
566 	if (!mask)
567 		mask = &rte_flow_item_ipv6_mask;
568 	if (spec) {
569 		unsigned int i;
570 		uint32_t vtc_flow_val;
571 		uint32_t vtc_flow_mask;
572 
573 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
574 		       RTE_DIM(ipv6.val.src_ip));
575 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
576 		       RTE_DIM(ipv6.val.dst_ip));
577 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
578 		       RTE_DIM(ipv6.mask.src_ip));
579 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
580 		       RTE_DIM(ipv6.mask.dst_ip));
581 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
582 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
583 		ipv6.val.flow_label =
584 			rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
585 					 RTE_IPV6_HDR_FL_SHIFT);
586 		ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
587 					 RTE_IPV6_HDR_TC_SHIFT;
588 		ipv6.val.next_hdr = spec->hdr.proto;
589 		ipv6.mask.flow_label =
590 			rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
591 					 RTE_IPV6_HDR_FL_SHIFT);
592 		ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
593 					  RTE_IPV6_HDR_TC_SHIFT;
594 		ipv6.mask.next_hdr = mask->hdr.proto;
595 		/* Remove unwanted bits from values. */
596 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
597 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
598 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
599 		}
600 		ipv6.val.flow_label &= ipv6.mask.flow_label;
601 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
602 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
603 	}
604 	flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
605 }
606 
607 /**
608  * Convert the @p item into a Verbs specification. This function assumes that
609  * the input is valid and that there is space to insert the requested item
610  * into the flow.
611  *
612  * @param[in, out] dev_flow
613  *   Pointer to dev_flow structure.
614  * @param[in] item
615  *   Item specification.
616  * @param[in] item_flags
617  *   Parsed item flags.
618  */
619 static void
620 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
621 			      const struct rte_flow_item *item,
622 			      uint64_t item_flags __rte_unused)
623 {
624 	const struct rte_flow_item_tcp *spec = item->spec;
625 	const struct rte_flow_item_tcp *mask = item->mask;
626 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
627 	struct ibv_flow_spec_tcp_udp tcp = {
628 		.type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
629 		.size = size,
630 	};
631 
632 	if (!mask)
633 		mask = &rte_flow_item_tcp_mask;
634 	if (spec) {
635 		tcp.val.dst_port = spec->hdr.dst_port;
636 		tcp.val.src_port = spec->hdr.src_port;
637 		tcp.mask.dst_port = mask->hdr.dst_port;
638 		tcp.mask.src_port = mask->hdr.src_port;
639 		/* Remove unwanted bits from values. */
640 		tcp.val.src_port &= tcp.mask.src_port;
641 		tcp.val.dst_port &= tcp.mask.dst_port;
642 	}
643 	flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
644 }
645 
646 /**
647  * Convert the @p item into a Verbs specification. This function assumes that
648  * the input is valid and that there is space to insert the requested item
649  * into the flow.
650  *
651  * @param[in, out] dev_flow
652  *   Pointer to dev_flow structure.
653  * @param[in] item
654  *   Item specification.
655  * @param[in] item_flags
656  *   Parsed item flags.
657  */
658 static void
659 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
660 			      const struct rte_flow_item *item,
661 			      uint64_t item_flags __rte_unused)
662 {
663 	const struct rte_flow_item_udp *spec = item->spec;
664 	const struct rte_flow_item_udp *mask = item->mask;
665 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
666 	struct ibv_flow_spec_tcp_udp udp = {
667 		.type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
668 		.size = size,
669 	};
670 
671 	if (!mask)
672 		mask = &rte_flow_item_udp_mask;
673 	if (spec) {
674 		udp.val.dst_port = spec->hdr.dst_port;
675 		udp.val.src_port = spec->hdr.src_port;
676 		udp.mask.dst_port = mask->hdr.dst_port;
677 		udp.mask.src_port = mask->hdr.src_port;
678 		/* Remove unwanted bits from values. */
679 		udp.val.src_port &= udp.mask.src_port;
680 		udp.val.dst_port &= udp.mask.dst_port;
681 	}
682 	item++;
683 	while (item->type == RTE_FLOW_ITEM_TYPE_VOID)
684 		item++;
685 	if (!(udp.val.dst_port & udp.mask.dst_port)) {
686 		switch ((item)->type) {
687 		case RTE_FLOW_ITEM_TYPE_VXLAN:
688 			udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN);
689 			udp.mask.dst_port = 0xffff;
690 			break;
691 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
692 			udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE);
693 			udp.mask.dst_port = 0xffff;
694 			break;
695 		case RTE_FLOW_ITEM_TYPE_MPLS:
696 			udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS);
697 			udp.mask.dst_port = 0xffff;
698 			break;
699 		default:
700 			break;
701 		}
702 	}
703 
704 	flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
705 }
706 
707 /**
708  * Convert the @p item into a Verbs specification. This function assumes that
709  * the input is valid and that there is space to insert the requested item
710  * into the flow.
711  *
712  * @param[in, out] dev_flow
713  *   Pointer to dev_flow structure.
714  * @param[in] item
715  *   Item specification.
716  * @param[in] item_flags
717  *   Parsed item flags.
718  */
719 static void
720 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
721 				const struct rte_flow_item *item,
722 				uint64_t item_flags __rte_unused)
723 {
724 	const struct rte_flow_item_vxlan *spec = item->spec;
725 	const struct rte_flow_item_vxlan *mask = item->mask;
726 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
727 	struct ibv_flow_spec_tunnel vxlan = {
728 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
729 		.size = size,
730 	};
731 	union vni {
732 		uint32_t vlan_id;
733 		uint8_t vni[4];
734 	} id = { .vlan_id = 0, };
735 
736 	if (!mask)
737 		mask = &rte_flow_item_vxlan_mask;
738 	if (spec) {
739 		memcpy(&id.vni[1], spec->vni, 3);
740 		vxlan.val.tunnel_id = id.vlan_id;
741 		memcpy(&id.vni[1], mask->vni, 3);
742 		vxlan.mask.tunnel_id = id.vlan_id;
743 		/* Remove unwanted bits from values. */
744 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
745 	}
746 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
747 }
748 
749 /**
750  * Convert the @p item into a Verbs specification. This function assumes that
751  * the input is valid and that there is space to insert the requested item
752  * into the flow.
753  *
754  * @param[in, out] dev_flow
755  *   Pointer to dev_flow structure.
756  * @param[in] item
757  *   Item specification.
758  * @param[in] item_flags
759  *   Parsed item flags.
760  */
761 static void
762 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
763 				    const struct rte_flow_item *item,
764 				    uint64_t item_flags __rte_unused)
765 {
766 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
767 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
768 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
769 	struct ibv_flow_spec_tunnel vxlan_gpe = {
770 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
771 		.size = size,
772 	};
773 	union vni {
774 		uint32_t vlan_id;
775 		uint8_t vni[4];
776 	} id = { .vlan_id = 0, };
777 
778 	if (!mask)
779 		mask = &rte_flow_item_vxlan_gpe_mask;
780 	if (spec) {
781 		memcpy(&id.vni[1], spec->vni, 3);
782 		vxlan_gpe.val.tunnel_id = id.vlan_id;
783 		memcpy(&id.vni[1], mask->vni, 3);
784 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
785 		/* Remove unwanted bits from values. */
786 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
787 	}
788 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
789 }
790 
791 /**
792  * Update the protocol in Verbs IPv4/IPv6 spec.
793  *
794  * @param[in, out] attr
795  *   Pointer to Verbs attributes structure.
796  * @param[in] search
797  *   Specification type to search in order to update the IP protocol.
798  * @param[in] protocol
799  *   Protocol value to set if none is present in the specification.
800  */
801 static void
802 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
803 				       enum ibv_flow_spec_type search,
804 				       uint8_t protocol)
805 {
806 	unsigned int i;
807 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
808 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
809 
810 	if (!attr)
811 		return;
812 	for (i = 0; i != attr->num_of_specs; ++i) {
813 		if (hdr->type == search) {
814 			union {
815 				struct ibv_flow_spec_ipv4_ext *ipv4;
816 				struct ibv_flow_spec_ipv6 *ipv6;
817 			} ip;
818 
819 			switch (search) {
820 			case IBV_FLOW_SPEC_IPV4_EXT:
821 				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
822 				if (!ip.ipv4->val.proto) {
823 					ip.ipv4->val.proto = protocol;
824 					ip.ipv4->mask.proto = 0xff;
825 				}
826 				break;
827 			case IBV_FLOW_SPEC_IPV6:
828 				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
829 				if (!ip.ipv6->val.next_hdr) {
830 					ip.ipv6->val.next_hdr = protocol;
831 					ip.ipv6->mask.next_hdr = 0xff;
832 				}
833 				break;
834 			default:
835 				break;
836 			}
837 			break;
838 		}
839 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
840 	}
841 }
842 
843 /**
844  * Convert the @p item into a Verbs specification. This function assumes that
845  * the input is valid and that there is space to insert the requested item
846  * into the flow.
847  *
848  * @param[in, out] dev_flow
849  *   Pointer to dev_flow structure.
850  * @param[in] item
851  *   Item specification.
852  * @param[in] item_flags
853  *   Parsed item flags.
854  */
855 static void
856 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
857 			      const struct rte_flow_item *item __rte_unused,
858 			      uint64_t item_flags)
859 {
860 	struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
861 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
862 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
863 	struct ibv_flow_spec_tunnel tunnel = {
864 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
865 		.size = size,
866 	};
867 #else
868 	const struct rte_flow_item_gre *spec = item->spec;
869 	const struct rte_flow_item_gre *mask = item->mask;
870 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
871 	struct ibv_flow_spec_gre tunnel = {
872 		.type = IBV_FLOW_SPEC_GRE,
873 		.size = size,
874 	};
875 
876 	if (!mask)
877 		mask = &rte_flow_item_gre_mask;
878 	if (spec) {
879 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
880 		tunnel.val.protocol = spec->protocol;
881 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
882 		tunnel.mask.protocol = mask->protocol;
883 		/* Remove unwanted bits from values. */
884 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
885 		tunnel.val.protocol &= tunnel.mask.protocol;
886 		tunnel.val.key &= tunnel.mask.key;
887 	}
888 #endif
889 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
890 		flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
891 						       IBV_FLOW_SPEC_IPV4_EXT,
892 						       IPPROTO_GRE);
893 	else
894 		flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
895 						       IBV_FLOW_SPEC_IPV6,
896 						       IPPROTO_GRE);
897 	flow_verbs_spec_add(verbs, &tunnel, size);
898 }
899 
900 /**
901  * Convert the @p action into a Verbs specification. This function assumes that
902  * the input is valid and that there is space to insert the requested action
903  * into the flow. This function also return the action that was added.
904  *
905  * @param[in, out] dev_flow
906  *   Pointer to dev_flow structure.
907  * @param[in] item
908  *   Item specification.
909  * @param[in] item_flags
910  *   Parsed item flags.
911  */
912 static void
913 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
914 			       const struct rte_flow_item *item __rte_unused,
915 			       uint64_t item_flags __rte_unused)
916 {
917 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
918 	const struct rte_flow_item_mpls *spec = item->spec;
919 	const struct rte_flow_item_mpls *mask = item->mask;
920 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
921 	struct ibv_flow_spec_mpls mpls = {
922 		.type = IBV_FLOW_SPEC_MPLS,
923 		.size = size,
924 	};
925 
926 	if (!mask)
927 		mask = &rte_flow_item_mpls_mask;
928 	if (spec) {
929 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
930 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
931 		/* Remove unwanted bits from values.  */
932 		mpls.val.label &= mpls.mask.label;
933 	}
934 	flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
935 #endif
936 }
937 
938 /**
939  * Convert the @p action into a Verbs specification. This function assumes that
940  * the input is valid and that there is space to insert the requested action
941  * into the flow.
942  *
943  * @param[in] dev_flow
944  *   Pointer to mlx5_flow.
945  * @param[in] action
946  *   Action configuration.
947  */
948 static void
949 flow_verbs_translate_action_drop
950 	(struct mlx5_flow *dev_flow,
951 	 const struct rte_flow_action *action __rte_unused)
952 {
953 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
954 	struct ibv_flow_spec_action_drop drop = {
955 			.type = IBV_FLOW_SPEC_ACTION_DROP,
956 			.size = size,
957 	};
958 
959 	flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
960 }
961 
962 /**
963  * Convert the @p action into a Verbs specification. This function assumes that
964  * the input is valid and that there is space to insert the requested action
965  * into the flow.
966  *
967  * @param[in] rss_desc
968  *   Pointer to mlx5_flow_rss_desc.
969  * @param[in] action
970  *   Action configuration.
971  */
972 static void
973 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
974 				  const struct rte_flow_action *action)
975 {
976 	const struct rte_flow_action_queue *queue = action->conf;
977 
978 	rss_desc->queue[0] = queue->index;
979 	rss_desc->queue_num = 1;
980 }
981 
982 /**
983  * Convert the @p action into a Verbs specification. This function assumes that
984  * the input is valid and that there is space to insert the requested action
985  * into the flow.
986  *
987  * @param[in] rss_desc
988  *   Pointer to mlx5_flow_rss_desc.
989  * @param[in] action
990  *   Action configuration.
991  */
992 static void
993 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
994 				const struct rte_flow_action *action)
995 {
996 	const struct rte_flow_action_rss *rss = action->conf;
997 	const uint8_t *rss_key;
998 
999 	memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
1000 	rss_desc->queue_num = rss->queue_num;
1001 	/* NULL RSS key indicates default RSS key. */
1002 	rss_key = !rss->key ? rss_hash_default_key : rss->key;
1003 	memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
1004 	/*
1005 	 * rss->level and rss.types should be set in advance when expanding
1006 	 * items for RSS.
1007 	 */
1008 }
1009 
1010 /**
1011  * Convert the @p action into a Verbs specification. This function assumes that
1012  * the input is valid and that there is space to insert the requested action
1013  * into the flow.
1014  *
1015  * @param[in] dev_flow
1016  *   Pointer to mlx5_flow.
1017  * @param[in] action
1018  *   Action configuration.
1019  */
1020 static void
1021 flow_verbs_translate_action_flag
1022 	(struct mlx5_flow *dev_flow,
1023 	 const struct rte_flow_action *action __rte_unused)
1024 {
1025 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1026 	struct ibv_flow_spec_action_tag tag = {
1027 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1028 		.size = size,
1029 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1030 	};
1031 
1032 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1033 }
1034 
1035 /**
1036  * Convert the @p action into a Verbs specification. This function assumes that
1037  * the input is valid and that there is space to insert the requested action
1038  * into the flow.
1039  *
1040  * @param[in] dev_flow
1041  *   Pointer to mlx5_flow.
1042  * @param[in] action
1043  *   Action configuration.
1044  */
1045 static void
1046 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1047 				 const struct rte_flow_action *action)
1048 {
1049 	const struct rte_flow_action_mark *mark = action->conf;
1050 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1051 	struct ibv_flow_spec_action_tag tag = {
1052 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1053 		.size = size,
1054 		.tag_id = mlx5_flow_mark_set(mark->id),
1055 	};
1056 
1057 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1058 }
1059 
1060 /**
1061  * Convert the @p action into a Verbs specification. This function assumes that
1062  * the input is valid and that there is space to insert the requested action
1063  * into the flow.
1064  *
1065  * @param[in] dev
1066  *   Pointer to the Ethernet device structure.
1067  * @param[in] action
1068  *   Action configuration.
1069  * @param[in] dev_flow
1070  *   Pointer to mlx5_flow.
1071  * @param[out] error
1072  *   Pointer to error structure.
1073  *
1074  * @return
1075  *   0 On success else a negative errno value is returned and rte_errno is set.
1076  */
1077 static int
1078 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1079 				  const struct rte_flow_action *action,
1080 				  struct rte_eth_dev *dev,
1081 				  struct rte_flow_error *error)
1082 {
1083 	const struct rte_flow_action_count *count = action->conf;
1084 	struct rte_flow *flow = dev_flow->flow;
1085 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1086 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1087 	struct mlx5_flow_counter_pool *pool;
1088 	struct mlx5_flow_counter *cnt = NULL;
1089 	struct mlx5_flow_counter_ext *cnt_ext;
1090 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1091 	struct ibv_flow_spec_counter_action counter = {
1092 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1093 		.size = size,
1094 	};
1095 #endif
1096 
1097 	if (!flow->counter) {
1098 		flow->counter = flow_verbs_counter_new(dev, count->shared,
1099 						       count->id);
1100 		if (!flow->counter)
1101 			return rte_flow_error_set(error, rte_errno,
1102 						  RTE_FLOW_ERROR_TYPE_ACTION,
1103 						  action,
1104 						  "cannot get counter"
1105 						  " context.");
1106 	}
1107 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1108 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1109 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1110 	counter.counter_set_handle = cnt_ext->cs->handle;
1111 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1112 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1113 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1114 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1115 	counter.counters = cnt_ext->cs;
1116 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1117 #endif
1118 	return 0;
1119 }
1120 
1121 /**
1122  * Internal validation function. For validating both actions and items.
1123  *
1124  * @param[in] dev
1125  *   Pointer to the Ethernet device structure.
1126  * @param[in] attr
1127  *   Pointer to the flow attributes.
1128  * @param[in] items
1129  *   Pointer to the list of items.
1130  * @param[in] actions
1131  *   Pointer to the list of actions.
1132  * @param[in] external
1133  *   This flow rule is created by request external to PMD.
1134  * @param[in] hairpin
1135  *   Number of hairpin TX actions, 0 means classic flow.
1136  * @param[out] error
1137  *   Pointer to the error structure.
1138  *
1139  * @return
1140  *   0 on success, a negative errno value otherwise and rte_errno is set.
1141  */
1142 static int
1143 flow_verbs_validate(struct rte_eth_dev *dev,
1144 		    const struct rte_flow_attr *attr,
1145 		    const struct rte_flow_item items[],
1146 		    const struct rte_flow_action actions[],
1147 		    bool external __rte_unused,
1148 		    int hairpin __rte_unused,
1149 		    struct rte_flow_error *error)
1150 {
1151 	int ret;
1152 	uint64_t action_flags = 0;
1153 	uint64_t item_flags = 0;
1154 	uint64_t last_item = 0;
1155 	uint8_t next_protocol = 0xff;
1156 	uint16_t ether_type = 0;
1157 
1158 	if (items == NULL)
1159 		return -1;
1160 	ret = mlx5_flow_validate_attributes(dev, attr, error);
1161 	if (ret < 0)
1162 		return ret;
1163 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1164 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1165 		int ret = 0;
1166 
1167 		switch (items->type) {
1168 		case RTE_FLOW_ITEM_TYPE_VOID:
1169 			break;
1170 		case RTE_FLOW_ITEM_TYPE_ETH:
1171 			ret = mlx5_flow_validate_item_eth(items, item_flags,
1172 							  error);
1173 			if (ret < 0)
1174 				return ret;
1175 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1176 					     MLX5_FLOW_LAYER_OUTER_L2;
1177 			if (items->mask != NULL && items->spec != NULL) {
1178 				ether_type =
1179 					((const struct rte_flow_item_eth *)
1180 					 items->spec)->type;
1181 				ether_type &=
1182 					((const struct rte_flow_item_eth *)
1183 					 items->mask)->type;
1184 				ether_type = rte_be_to_cpu_16(ether_type);
1185 			} else {
1186 				ether_type = 0;
1187 			}
1188 			break;
1189 		case RTE_FLOW_ITEM_TYPE_VLAN:
1190 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
1191 							   dev, error);
1192 			if (ret < 0)
1193 				return ret;
1194 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1195 					      MLX5_FLOW_LAYER_INNER_VLAN) :
1196 					     (MLX5_FLOW_LAYER_OUTER_L2 |
1197 					      MLX5_FLOW_LAYER_OUTER_VLAN);
1198 			if (items->mask != NULL && items->spec != NULL) {
1199 				ether_type =
1200 					((const struct rte_flow_item_vlan *)
1201 					 items->spec)->inner_type;
1202 				ether_type &=
1203 					((const struct rte_flow_item_vlan *)
1204 					 items->mask)->inner_type;
1205 				ether_type = rte_be_to_cpu_16(ether_type);
1206 			} else {
1207 				ether_type = 0;
1208 			}
1209 			break;
1210 		case RTE_FLOW_ITEM_TYPE_IPV4:
1211 			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1212 							   last_item,
1213 							   ether_type, NULL,
1214 							   error);
1215 			if (ret < 0)
1216 				return ret;
1217 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1218 					     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1219 			if (items->mask != NULL &&
1220 			    ((const struct rte_flow_item_ipv4 *)
1221 			     items->mask)->hdr.next_proto_id) {
1222 				next_protocol =
1223 					((const struct rte_flow_item_ipv4 *)
1224 					 (items->spec))->hdr.next_proto_id;
1225 				next_protocol &=
1226 					((const struct rte_flow_item_ipv4 *)
1227 					 (items->mask))->hdr.next_proto_id;
1228 			} else {
1229 				/* Reset for inner layer. */
1230 				next_protocol = 0xff;
1231 			}
1232 			break;
1233 		case RTE_FLOW_ITEM_TYPE_IPV6:
1234 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1235 							   last_item,
1236 							   ether_type, NULL,
1237 							   error);
1238 			if (ret < 0)
1239 				return ret;
1240 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1241 					     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1242 			if (items->mask != NULL &&
1243 			    ((const struct rte_flow_item_ipv6 *)
1244 			     items->mask)->hdr.proto) {
1245 				next_protocol =
1246 					((const struct rte_flow_item_ipv6 *)
1247 					 items->spec)->hdr.proto;
1248 				next_protocol &=
1249 					((const struct rte_flow_item_ipv6 *)
1250 					 items->mask)->hdr.proto;
1251 			} else {
1252 				/* Reset for inner layer. */
1253 				next_protocol = 0xff;
1254 			}
1255 			break;
1256 		case RTE_FLOW_ITEM_TYPE_UDP:
1257 			ret = mlx5_flow_validate_item_udp(items, item_flags,
1258 							  next_protocol,
1259 							  error);
1260 			if (ret < 0)
1261 				return ret;
1262 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1263 					     MLX5_FLOW_LAYER_OUTER_L4_UDP;
1264 			break;
1265 		case RTE_FLOW_ITEM_TYPE_TCP:
1266 			ret = mlx5_flow_validate_item_tcp
1267 						(items, item_flags,
1268 						 next_protocol,
1269 						 &rte_flow_item_tcp_mask,
1270 						 error);
1271 			if (ret < 0)
1272 				return ret;
1273 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1274 					     MLX5_FLOW_LAYER_OUTER_L4_TCP;
1275 			break;
1276 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1277 			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1278 							    error);
1279 			if (ret < 0)
1280 				return ret;
1281 			last_item = MLX5_FLOW_LAYER_VXLAN;
1282 			break;
1283 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1284 			ret = mlx5_flow_validate_item_vxlan_gpe(items,
1285 								item_flags,
1286 								dev, error);
1287 			if (ret < 0)
1288 				return ret;
1289 			last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1290 			break;
1291 		case RTE_FLOW_ITEM_TYPE_GRE:
1292 			ret = mlx5_flow_validate_item_gre(items, item_flags,
1293 							  next_protocol, error);
1294 			if (ret < 0)
1295 				return ret;
1296 			last_item = MLX5_FLOW_LAYER_GRE;
1297 			break;
1298 		case RTE_FLOW_ITEM_TYPE_MPLS:
1299 			ret = mlx5_flow_validate_item_mpls(dev, items,
1300 							   item_flags,
1301 							   last_item, error);
1302 			if (ret < 0)
1303 				return ret;
1304 			last_item = MLX5_FLOW_LAYER_MPLS;
1305 			break;
1306 		default:
1307 			return rte_flow_error_set(error, ENOTSUP,
1308 						  RTE_FLOW_ERROR_TYPE_ITEM,
1309 						  NULL, "item not supported");
1310 		}
1311 		item_flags |= last_item;
1312 	}
1313 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1314 		switch (actions->type) {
1315 		case RTE_FLOW_ACTION_TYPE_VOID:
1316 			break;
1317 		case RTE_FLOW_ACTION_TYPE_FLAG:
1318 			ret = mlx5_flow_validate_action_flag(action_flags,
1319 							     attr,
1320 							     error);
1321 			if (ret < 0)
1322 				return ret;
1323 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1324 			break;
1325 		case RTE_FLOW_ACTION_TYPE_MARK:
1326 			ret = mlx5_flow_validate_action_mark(actions,
1327 							     action_flags,
1328 							     attr,
1329 							     error);
1330 			if (ret < 0)
1331 				return ret;
1332 			action_flags |= MLX5_FLOW_ACTION_MARK;
1333 			break;
1334 		case RTE_FLOW_ACTION_TYPE_DROP:
1335 			ret = mlx5_flow_validate_action_drop(action_flags,
1336 							     attr,
1337 							     error);
1338 			if (ret < 0)
1339 				return ret;
1340 			action_flags |= MLX5_FLOW_ACTION_DROP;
1341 			break;
1342 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1343 			ret = mlx5_flow_validate_action_queue(actions,
1344 							      action_flags, dev,
1345 							      attr,
1346 							      error);
1347 			if (ret < 0)
1348 				return ret;
1349 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1350 			break;
1351 		case RTE_FLOW_ACTION_TYPE_RSS:
1352 			ret = mlx5_flow_validate_action_rss(actions,
1353 							    action_flags, dev,
1354 							    attr, item_flags,
1355 							    error);
1356 			if (ret < 0)
1357 				return ret;
1358 			action_flags |= MLX5_FLOW_ACTION_RSS;
1359 			break;
1360 		case RTE_FLOW_ACTION_TYPE_COUNT:
1361 			ret = mlx5_flow_validate_action_count(dev, attr, error);
1362 			if (ret < 0)
1363 				return ret;
1364 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1365 			break;
1366 		default:
1367 			return rte_flow_error_set(error, ENOTSUP,
1368 						  RTE_FLOW_ERROR_TYPE_ACTION,
1369 						  actions,
1370 						  "action not supported");
1371 		}
1372 	}
1373 	/*
1374 	 * Validate the drop action mutual exclusion with other actions.
1375 	 * Drop action is mutually-exclusive with any other action, except for
1376 	 * Count action.
1377 	 */
1378 	if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1379 	    (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1380 		return rte_flow_error_set(error, EINVAL,
1381 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1382 					  "Drop action is mutually-exclusive "
1383 					  "with any other action, except for "
1384 					  "Count action");
1385 	if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1386 		return rte_flow_error_set(error, EINVAL,
1387 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
1388 					  "no fate action is found");
1389 	return 0;
1390 }
1391 
1392 /**
1393  * Calculate the required bytes that are needed for the action part of the verbs
1394  * flow.
1395  *
1396  * @param[in] actions
1397  *   Pointer to the list of actions.
1398  *
1399  * @return
1400  *   The size of the memory needed for all actions.
1401  */
1402 static int
1403 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1404 {
1405 	int size = 0;
1406 
1407 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1408 		switch (actions->type) {
1409 		case RTE_FLOW_ACTION_TYPE_VOID:
1410 			break;
1411 		case RTE_FLOW_ACTION_TYPE_FLAG:
1412 			size += sizeof(struct ibv_flow_spec_action_tag);
1413 			break;
1414 		case RTE_FLOW_ACTION_TYPE_MARK:
1415 			size += sizeof(struct ibv_flow_spec_action_tag);
1416 			break;
1417 		case RTE_FLOW_ACTION_TYPE_DROP:
1418 			size += sizeof(struct ibv_flow_spec_action_drop);
1419 			break;
1420 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1421 			break;
1422 		case RTE_FLOW_ACTION_TYPE_RSS:
1423 			break;
1424 		case RTE_FLOW_ACTION_TYPE_COUNT:
1425 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1426 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1427 			size += sizeof(struct ibv_flow_spec_counter_action);
1428 #endif
1429 			break;
1430 		default:
1431 			break;
1432 		}
1433 	}
1434 	return size;
1435 }
1436 
1437 /**
1438  * Calculate the required bytes that are needed for the item part of the verbs
1439  * flow.
1440  *
1441  * @param[in] items
1442  *   Pointer to the list of items.
1443  *
1444  * @return
1445  *   The size of the memory needed for all items.
1446  */
1447 static int
1448 flow_verbs_get_items_size(const struct rte_flow_item items[])
1449 {
1450 	int size = 0;
1451 
1452 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1453 		switch (items->type) {
1454 		case RTE_FLOW_ITEM_TYPE_VOID:
1455 			break;
1456 		case RTE_FLOW_ITEM_TYPE_ETH:
1457 			size += sizeof(struct ibv_flow_spec_eth);
1458 			break;
1459 		case RTE_FLOW_ITEM_TYPE_VLAN:
1460 			size += sizeof(struct ibv_flow_spec_eth);
1461 			break;
1462 		case RTE_FLOW_ITEM_TYPE_IPV4:
1463 			size += sizeof(struct ibv_flow_spec_ipv4_ext);
1464 			break;
1465 		case RTE_FLOW_ITEM_TYPE_IPV6:
1466 			size += sizeof(struct ibv_flow_spec_ipv6);
1467 			break;
1468 		case RTE_FLOW_ITEM_TYPE_UDP:
1469 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1470 			break;
1471 		case RTE_FLOW_ITEM_TYPE_TCP:
1472 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1473 			break;
1474 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1475 			size += sizeof(struct ibv_flow_spec_tunnel);
1476 			break;
1477 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1478 			size += sizeof(struct ibv_flow_spec_tunnel);
1479 			break;
1480 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1481 		case RTE_FLOW_ITEM_TYPE_GRE:
1482 			size += sizeof(struct ibv_flow_spec_gre);
1483 			break;
1484 		case RTE_FLOW_ITEM_TYPE_MPLS:
1485 			size += sizeof(struct ibv_flow_spec_mpls);
1486 			break;
1487 #else
1488 		case RTE_FLOW_ITEM_TYPE_GRE:
1489 			size += sizeof(struct ibv_flow_spec_tunnel);
1490 			break;
1491 #endif
1492 		default:
1493 			break;
1494 		}
1495 	}
1496 	return size;
1497 }
1498 
1499 /**
1500  * Internal preparation function. Allocate mlx5_flow with the required size.
1501  * The required size is calculate based on the actions and items. This function
1502  * also returns the detected actions and items for later use.
1503  *
1504  * @param[in] dev
1505  *   Pointer to Ethernet device.
1506  * @param[in] attr
1507  *   Pointer to the flow attributes.
1508  * @param[in] items
1509  *   Pointer to the list of items.
1510  * @param[in] actions
1511  *   Pointer to the list of actions.
1512  * @param[out] error
1513  *   Pointer to the error structure.
1514  *
1515  * @return
1516  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1517  *   is set.
1518  */
1519 static struct mlx5_flow *
1520 flow_verbs_prepare(struct rte_eth_dev *dev,
1521 		   const struct rte_flow_attr *attr __rte_unused,
1522 		   const struct rte_flow_item items[],
1523 		   const struct rte_flow_action actions[],
1524 		   struct rte_flow_error *error)
1525 {
1526 	size_t size = 0;
1527 	uint32_t handle_idx = 0;
1528 	struct mlx5_flow *dev_flow;
1529 	struct mlx5_flow_handle *dev_handle;
1530 	struct mlx5_priv *priv = dev->data->dev_private;
1531 
1532 	size += flow_verbs_get_actions_size(actions);
1533 	size += flow_verbs_get_items_size(items);
1534 	if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1535 		rte_flow_error_set(error, E2BIG,
1536 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1537 				   "Verbs spec/action size too large");
1538 		return NULL;
1539 	}
1540 	/* In case of corrupting the memory. */
1541 	if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1542 		rte_flow_error_set(error, ENOSPC,
1543 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1544 				   "not free temporary device flow");
1545 		return NULL;
1546 	}
1547 	dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1548 				   &handle_idx);
1549 	if (!dev_handle) {
1550 		rte_flow_error_set(error, ENOMEM,
1551 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1552 				   "not enough memory to create flow handle");
1553 		return NULL;
1554 	}
1555 	/* No multi-thread supporting. */
1556 	dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1557 	dev_flow->handle = dev_handle;
1558 	dev_flow->handle_idx = handle_idx;
1559 	/* Memcpy is used, only size needs to be cleared to 0. */
1560 	dev_flow->verbs.size = 0;
1561 	dev_flow->verbs.attr.num_of_specs = 0;
1562 	dev_flow->ingress = attr->ingress;
1563 	dev_flow->hash_fields = 0;
1564 	/* Need to set transfer attribute: not supported in Verbs mode. */
1565 	return dev_flow;
1566 }
1567 
1568 /**
1569  * Fill the flow with verb spec.
1570  *
1571  * @param[in] dev
1572  *   Pointer to Ethernet device.
1573  * @param[in, out] dev_flow
1574  *   Pointer to the mlx5 flow.
1575  * @param[in] attr
1576  *   Pointer to the flow attributes.
1577  * @param[in] items
1578  *   Pointer to the list of items.
1579  * @param[in] actions
1580  *   Pointer to the list of actions.
1581  * @param[out] error
1582  *   Pointer to the error structure.
1583  *
1584  * @return
1585  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1586  */
1587 static int
1588 flow_verbs_translate(struct rte_eth_dev *dev,
1589 		     struct mlx5_flow *dev_flow,
1590 		     const struct rte_flow_attr *attr,
1591 		     const struct rte_flow_item items[],
1592 		     const struct rte_flow_action actions[],
1593 		     struct rte_flow_error *error)
1594 {
1595 	uint64_t item_flags = 0;
1596 	uint64_t action_flags = 0;
1597 	uint64_t priority = attr->priority;
1598 	uint32_t subpriority = 0;
1599 	struct mlx5_priv *priv = dev->data->dev_private;
1600 	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1601 					      priv->rss_desc)
1602 					      [!!priv->flow_nested_idx];
1603 
1604 	if (priority == MLX5_FLOW_PRIO_RSVD)
1605 		priority = priv->config.flow_prio - 1;
1606 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1607 		int ret;
1608 
1609 		switch (actions->type) {
1610 		case RTE_FLOW_ACTION_TYPE_VOID:
1611 			break;
1612 		case RTE_FLOW_ACTION_TYPE_FLAG:
1613 			flow_verbs_translate_action_flag(dev_flow, actions);
1614 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1615 			dev_flow->handle->mark = 1;
1616 			break;
1617 		case RTE_FLOW_ACTION_TYPE_MARK:
1618 			flow_verbs_translate_action_mark(dev_flow, actions);
1619 			action_flags |= MLX5_FLOW_ACTION_MARK;
1620 			dev_flow->handle->mark = 1;
1621 			break;
1622 		case RTE_FLOW_ACTION_TYPE_DROP:
1623 			flow_verbs_translate_action_drop(dev_flow, actions);
1624 			action_flags |= MLX5_FLOW_ACTION_DROP;
1625 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1626 			break;
1627 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1628 			flow_verbs_translate_action_queue(rss_desc, actions);
1629 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1630 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1631 			break;
1632 		case RTE_FLOW_ACTION_TYPE_RSS:
1633 			flow_verbs_translate_action_rss(rss_desc, actions);
1634 			action_flags |= MLX5_FLOW_ACTION_RSS;
1635 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1636 			break;
1637 		case RTE_FLOW_ACTION_TYPE_COUNT:
1638 			ret = flow_verbs_translate_action_count(dev_flow,
1639 								actions,
1640 								dev, error);
1641 			if (ret < 0)
1642 				return ret;
1643 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1644 			break;
1645 		default:
1646 			return rte_flow_error_set(error, ENOTSUP,
1647 						  RTE_FLOW_ERROR_TYPE_ACTION,
1648 						  actions,
1649 						  "action not supported");
1650 		}
1651 	}
1652 	dev_flow->act_flags = action_flags;
1653 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1654 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1655 
1656 		switch (items->type) {
1657 		case RTE_FLOW_ITEM_TYPE_VOID:
1658 			break;
1659 		case RTE_FLOW_ITEM_TYPE_ETH:
1660 			flow_verbs_translate_item_eth(dev_flow, items,
1661 						      item_flags);
1662 			subpriority = MLX5_PRIORITY_MAP_L2;
1663 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1664 					       MLX5_FLOW_LAYER_OUTER_L2;
1665 			break;
1666 		case RTE_FLOW_ITEM_TYPE_VLAN:
1667 			flow_verbs_translate_item_vlan(dev_flow, items,
1668 						       item_flags);
1669 			subpriority = MLX5_PRIORITY_MAP_L2;
1670 			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1671 						MLX5_FLOW_LAYER_INNER_VLAN) :
1672 					       (MLX5_FLOW_LAYER_OUTER_L2 |
1673 						MLX5_FLOW_LAYER_OUTER_VLAN);
1674 			break;
1675 		case RTE_FLOW_ITEM_TYPE_IPV4:
1676 			flow_verbs_translate_item_ipv4(dev_flow, items,
1677 						       item_flags);
1678 			subpriority = MLX5_PRIORITY_MAP_L3;
1679 			dev_flow->hash_fields |=
1680 				mlx5_flow_hashfields_adjust
1681 					(rss_desc, tunnel,
1682 					 MLX5_IPV4_LAYER_TYPES,
1683 					 MLX5_IPV4_IBV_RX_HASH);
1684 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1685 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1686 			break;
1687 		case RTE_FLOW_ITEM_TYPE_IPV6:
1688 			flow_verbs_translate_item_ipv6(dev_flow, items,
1689 						       item_flags);
1690 			subpriority = MLX5_PRIORITY_MAP_L3;
1691 			dev_flow->hash_fields |=
1692 				mlx5_flow_hashfields_adjust
1693 					(rss_desc, tunnel,
1694 					 MLX5_IPV6_LAYER_TYPES,
1695 					 MLX5_IPV6_IBV_RX_HASH);
1696 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1697 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1698 			break;
1699 		case RTE_FLOW_ITEM_TYPE_TCP:
1700 			flow_verbs_translate_item_tcp(dev_flow, items,
1701 						      item_flags);
1702 			subpriority = MLX5_PRIORITY_MAP_L4;
1703 			dev_flow->hash_fields |=
1704 				mlx5_flow_hashfields_adjust
1705 					(rss_desc, tunnel, ETH_RSS_TCP,
1706 					 (IBV_RX_HASH_SRC_PORT_TCP |
1707 					  IBV_RX_HASH_DST_PORT_TCP));
1708 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1709 					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
1710 			break;
1711 		case RTE_FLOW_ITEM_TYPE_UDP:
1712 			flow_verbs_translate_item_udp(dev_flow, items,
1713 						      item_flags);
1714 			subpriority = MLX5_PRIORITY_MAP_L4;
1715 			dev_flow->hash_fields |=
1716 				mlx5_flow_hashfields_adjust
1717 					(rss_desc, tunnel, ETH_RSS_UDP,
1718 					 (IBV_RX_HASH_SRC_PORT_UDP |
1719 					  IBV_RX_HASH_DST_PORT_UDP));
1720 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1721 					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
1722 			break;
1723 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1724 			flow_verbs_translate_item_vxlan(dev_flow, items,
1725 							item_flags);
1726 			subpriority = MLX5_PRIORITY_MAP_L2;
1727 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
1728 			break;
1729 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1730 			flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1731 							    item_flags);
1732 			subpriority = MLX5_PRIORITY_MAP_L2;
1733 			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1734 			break;
1735 		case RTE_FLOW_ITEM_TYPE_GRE:
1736 			flow_verbs_translate_item_gre(dev_flow, items,
1737 						      item_flags);
1738 			subpriority = MLX5_PRIORITY_MAP_L2;
1739 			item_flags |= MLX5_FLOW_LAYER_GRE;
1740 			break;
1741 		case RTE_FLOW_ITEM_TYPE_MPLS:
1742 			flow_verbs_translate_item_mpls(dev_flow, items,
1743 						       item_flags);
1744 			subpriority = MLX5_PRIORITY_MAP_L2;
1745 			item_flags |= MLX5_FLOW_LAYER_MPLS;
1746 			break;
1747 		default:
1748 			return rte_flow_error_set(error, ENOTSUP,
1749 						  RTE_FLOW_ERROR_TYPE_ITEM,
1750 						  NULL,
1751 						  "item not supported");
1752 		}
1753 	}
1754 	dev_flow->handle->layers = item_flags;
1755 	/* Other members of attr will be ignored. */
1756 	dev_flow->verbs.attr.priority =
1757 		mlx5_flow_adjust_priority(dev, priority, subpriority);
1758 	dev_flow->verbs.attr.port = (uint8_t)priv->dev_port;
1759 	return 0;
1760 }
1761 
1762 /**
1763  * Remove the flow from the NIC but keeps it in memory.
1764  *
1765  * @param[in] dev
1766  *   Pointer to the Ethernet device structure.
1767  * @param[in, out] flow
1768  *   Pointer to flow structure.
1769  */
1770 static void
1771 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1772 {
1773 	struct mlx5_priv *priv = dev->data->dev_private;
1774 	struct mlx5_flow_handle *handle;
1775 	uint32_t handle_idx;
1776 
1777 	if (!flow)
1778 		return;
1779 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1780 		       handle_idx, handle, next) {
1781 		if (handle->drv_flow) {
1782 			claim_zero(mlx5_glue->destroy_flow(handle->drv_flow));
1783 			handle->drv_flow = NULL;
1784 		}
1785 		/* hrxq is union, don't touch it only the flag is set. */
1786 		if (handle->rix_hrxq) {
1787 			if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1788 				mlx5_hrxq_drop_release(dev);
1789 				handle->rix_hrxq = 0;
1790 			} else if (handle->fate_action ==
1791 				   MLX5_FLOW_FATE_QUEUE) {
1792 				mlx5_hrxq_release(dev, handle->rix_hrxq);
1793 				handle->rix_hrxq = 0;
1794 			}
1795 		}
1796 		if (handle->vf_vlan.tag && handle->vf_vlan.created)
1797 			mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1798 	}
1799 }
1800 
1801 /**
1802  * Remove the flow from the NIC and the memory.
1803  *
1804  * @param[in] dev
1805  *   Pointer to the Ethernet device structure.
1806  * @param[in, out] flow
1807  *   Pointer to flow structure.
1808  */
1809 static void
1810 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1811 {
1812 	struct mlx5_priv *priv = dev->data->dev_private;
1813 	struct mlx5_flow_handle *handle;
1814 
1815 	if (!flow)
1816 		return;
1817 	flow_verbs_remove(dev, flow);
1818 	while (flow->dev_handles) {
1819 		uint32_t tmp_idx = flow->dev_handles;
1820 
1821 		handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1822 				   tmp_idx);
1823 		if (!handle)
1824 			return;
1825 		flow->dev_handles = handle->next.next;
1826 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1827 			   tmp_idx);
1828 	}
1829 	if (flow->counter) {
1830 		flow_verbs_counter_release(dev, flow->counter);
1831 		flow->counter = 0;
1832 	}
1833 }
1834 
1835 /**
1836  * Apply the flow to the NIC.
1837  *
1838  * @param[in] dev
1839  *   Pointer to the Ethernet device structure.
1840  * @param[in, out] flow
1841  *   Pointer to flow structure.
1842  * @param[out] error
1843  *   Pointer to error structure.
1844  *
1845  * @return
1846  *   0 on success, a negative errno value otherwise and rte_errno is set.
1847  */
1848 static int
1849 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1850 		 struct rte_flow_error *error)
1851 {
1852 	struct mlx5_priv *priv = dev->data->dev_private;
1853 	struct mlx5_flow_handle *handle;
1854 	struct mlx5_flow *dev_flow;
1855 	struct mlx5_hrxq *hrxq;
1856 	uint32_t dev_handles;
1857 	int err;
1858 	int idx;
1859 
1860 	for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1861 		dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1862 		handle = dev_flow->handle;
1863 		if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1864 			hrxq = mlx5_hrxq_drop_new(dev);
1865 			if (!hrxq) {
1866 				rte_flow_error_set
1867 					(error, errno,
1868 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1869 					 "cannot get drop hash queue");
1870 				goto error;
1871 			}
1872 		} else {
1873 			uint32_t hrxq_idx;
1874 			struct mlx5_flow_rss_desc *rss_desc =
1875 				&((struct mlx5_flow_rss_desc *)priv->rss_desc)
1876 				[!!priv->flow_nested_idx];
1877 
1878 			MLX5_ASSERT(rss_desc->queue_num);
1879 			hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1880 					     MLX5_RSS_HASH_KEY_LEN,
1881 					     dev_flow->hash_fields,
1882 					     rss_desc->queue,
1883 					     rss_desc->queue_num);
1884 			if (!hrxq_idx)
1885 				hrxq_idx = mlx5_hrxq_new(dev, rss_desc->key,
1886 						MLX5_RSS_HASH_KEY_LEN,
1887 						dev_flow->hash_fields,
1888 						rss_desc->queue,
1889 						rss_desc->queue_num,
1890 						!!(handle->layers &
1891 						MLX5_FLOW_LAYER_TUNNEL));
1892 			hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1893 					 hrxq_idx);
1894 			if (!hrxq) {
1895 				rte_flow_error_set
1896 					(error, rte_errno,
1897 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1898 					 "cannot get hash queue");
1899 				goto error;
1900 			}
1901 			handle->rix_hrxq = hrxq_idx;
1902 		}
1903 		MLX5_ASSERT(hrxq);
1904 		handle->drv_flow = mlx5_glue->create_flow
1905 					(hrxq->qp, &dev_flow->verbs.attr);
1906 		if (!handle->drv_flow) {
1907 			rte_flow_error_set(error, errno,
1908 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1909 					   NULL,
1910 					   "hardware refuses to create flow");
1911 			goto error;
1912 		}
1913 		if (priv->vmwa_context &&
1914 		    handle->vf_vlan.tag && !handle->vf_vlan.created) {
1915 			/*
1916 			 * The rule contains the VLAN pattern.
1917 			 * For VF we are going to create VLAN
1918 			 * interface to make hypervisor set correct
1919 			 * e-Switch vport context.
1920 			 */
1921 			mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
1922 		}
1923 	}
1924 	return 0;
1925 error:
1926 	err = rte_errno; /* Save rte_errno before cleanup. */
1927 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1928 		       dev_handles, handle, next) {
1929 		/* hrxq is union, don't touch it only the flag is set. */
1930 		if (handle->rix_hrxq) {
1931 			if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1932 				mlx5_hrxq_drop_release(dev);
1933 				handle->rix_hrxq = 0;
1934 			} else if (handle->fate_action ==
1935 				   MLX5_FLOW_FATE_QUEUE) {
1936 				mlx5_hrxq_release(dev, handle->rix_hrxq);
1937 				handle->rix_hrxq = 0;
1938 			}
1939 		}
1940 		if (handle->vf_vlan.tag && handle->vf_vlan.created)
1941 			mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1942 	}
1943 	rte_errno = err; /* Restore rte_errno. */
1944 	return -rte_errno;
1945 }
1946 
1947 /**
1948  * Query a flow.
1949  *
1950  * @see rte_flow_query()
1951  * @see rte_flow_ops
1952  */
1953 static int
1954 flow_verbs_query(struct rte_eth_dev *dev,
1955 		 struct rte_flow *flow,
1956 		 const struct rte_flow_action *actions,
1957 		 void *data,
1958 		 struct rte_flow_error *error)
1959 {
1960 	int ret = -EINVAL;
1961 
1962 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1963 		switch (actions->type) {
1964 		case RTE_FLOW_ACTION_TYPE_VOID:
1965 			break;
1966 		case RTE_FLOW_ACTION_TYPE_COUNT:
1967 			ret = flow_verbs_counter_query(dev, flow, data, error);
1968 			break;
1969 		default:
1970 			return rte_flow_error_set(error, ENOTSUP,
1971 						  RTE_FLOW_ERROR_TYPE_ACTION,
1972 						  actions,
1973 						  "action not supported");
1974 		}
1975 	}
1976 	return ret;
1977 }
1978 
1979 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1980 	.validate = flow_verbs_validate,
1981 	.prepare = flow_verbs_prepare,
1982 	.translate = flow_verbs_translate,
1983 	.apply = flow_verbs_apply,
1984 	.remove = flow_verbs_remove,
1985 	.destroy = flow_verbs_destroy,
1986 	.query = flow_verbs_query,
1987 };
1988