xref: /dpdk/drivers/net/mlx5/mlx5_flow_verbs.c (revision f5057be340e44f3edc0fe90fa875eb89a4c49b4f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4 
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 #include <rte_common.h>
12 #include <rte_ether.h>
13 #include <rte_ethdev_driver.h>
14 #include <rte_flow.h>
15 #include <rte_flow_driver.h>
16 #include <rte_malloc.h>
17 #include <rte_ip.h>
18 
19 #include <mlx5_glue.h>
20 #include <mlx5_prm.h>
21 #include <mlx5_malloc.h>
22 
23 #include "mlx5_defs.h"
24 #include "mlx5.h"
25 #include "mlx5_flow.h"
26 #include "mlx5_rxtx.h"
27 
28 #define VERBS_SPEC_INNER(item_flags) \
29 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
30 
31 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
32 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
33 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
34 };
35 
36 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
37 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
38 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
39 	{ 9, 10, 11 }, { 12, 13, 14 },
40 };
41 
42 /**
43  * Discover the maximum number of priority available.
44  *
45  * @param[in] dev
46  *   Pointer to the Ethernet device structure.
47  *
48  * @return
49  *   number of supported flow priority on success, a negative errno
50  *   value otherwise and rte_errno is set.
51  */
52 int
53 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
54 {
55 	struct mlx5_priv *priv = dev->data->dev_private;
56 	struct {
57 		struct ibv_flow_attr attr;
58 		struct ibv_flow_spec_eth eth;
59 		struct ibv_flow_spec_action_drop drop;
60 	} flow_attr = {
61 		.attr = {
62 			.num_of_specs = 2,
63 			.port = (uint8_t)priv->dev_port,
64 		},
65 		.eth = {
66 			.type = IBV_FLOW_SPEC_ETH,
67 			.size = sizeof(struct ibv_flow_spec_eth),
68 		},
69 		.drop = {
70 			.size = sizeof(struct ibv_flow_spec_action_drop),
71 			.type = IBV_FLOW_SPEC_ACTION_DROP,
72 		},
73 	};
74 	struct ibv_flow *flow;
75 	struct mlx5_hrxq *drop = mlx5_drop_action_create(dev);
76 	uint16_t vprio[] = { 8, 16 };
77 	int i;
78 	int priority = 0;
79 
80 	if (!drop) {
81 		rte_errno = ENOTSUP;
82 		return -rte_errno;
83 	}
84 	for (i = 0; i != RTE_DIM(vprio); i++) {
85 		flow_attr.attr.priority = vprio[i] - 1;
86 		flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
87 		if (!flow)
88 			break;
89 		claim_zero(mlx5_glue->destroy_flow(flow));
90 		priority = vprio[i];
91 	}
92 	mlx5_drop_action_destroy(dev);
93 	switch (priority) {
94 	case 8:
95 		priority = RTE_DIM(priority_map_3);
96 		break;
97 	case 16:
98 		priority = RTE_DIM(priority_map_5);
99 		break;
100 	default:
101 		rte_errno = ENOTSUP;
102 		DRV_LOG(ERR,
103 			"port %u verbs maximum priority: %d expected 8/16",
104 			dev->data->port_id, priority);
105 		return -rte_errno;
106 	}
107 	DRV_LOG(INFO, "port %u flow maximum priority: %d",
108 		dev->data->port_id, priority);
109 	return priority;
110 }
111 
112 /**
113  * Adjust flow priority based on the highest layer and the request priority.
114  *
115  * @param[in] dev
116  *   Pointer to the Ethernet device structure.
117  * @param[in] priority
118  *   The rule base priority.
119  * @param[in] subpriority
120  *   The priority based on the items.
121  *
122  * @return
123  *   The new priority.
124  */
125 uint32_t
126 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
127 				   uint32_t subpriority)
128 {
129 	uint32_t res = 0;
130 	struct mlx5_priv *priv = dev->data->dev_private;
131 
132 	switch (priv->config.flow_prio) {
133 	case RTE_DIM(priority_map_3):
134 		res = priority_map_3[priority][subpriority];
135 		break;
136 	case RTE_DIM(priority_map_5):
137 		res = priority_map_5[priority][subpriority];
138 		break;
139 	}
140 	return  res;
141 }
142 
143 /**
144  * Get Verbs flow counter by index.
145  *
146  * @param[in] dev
147  *   Pointer to the Ethernet device structure.
148  * @param[in] idx
149  *   mlx5 flow counter index in the container.
150  * @param[out] ppool
151  *   mlx5 flow counter pool in the container,
152  *
153  * @return
154  *   A pointer to the counter, NULL otherwise.
155  */
156 static struct mlx5_flow_counter *
157 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
158 			      uint32_t idx,
159 			      struct mlx5_flow_counter_pool **ppool)
160 {
161 	struct mlx5_priv *priv = dev->data->dev_private;
162 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
163 	struct mlx5_flow_counter_pool *pool;
164 
165 	idx--;
166 	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
167 	MLX5_ASSERT(pool);
168 	if (ppool)
169 		*ppool = pool;
170 	return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
171 }
172 
173 /**
174  * Create Verbs flow counter with Verbs library.
175  *
176  * @param[in] dev
177  *   Pointer to the Ethernet device structure.
178  * @param[in, out] counter
179  *   mlx5 flow counter object, contains the counter id,
180  *   handle of created Verbs flow counter is returned
181  *   in cs field (if counters are supported).
182  *
183  * @return
184  *   0 On success else a negative errno value is returned
185  *   and rte_errno is set.
186  */
187 static int
188 flow_verbs_counter_create(struct rte_eth_dev *dev,
189 			  struct mlx5_flow_counter_ext *counter)
190 {
191 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
192 	struct mlx5_priv *priv = dev->data->dev_private;
193 	struct ibv_context *ctx = priv->sh->ctx;
194 	struct ibv_counter_set_init_attr init = {
195 			 .counter_set_id = counter->id};
196 
197 	counter->cs = mlx5_glue->create_counter_set(ctx, &init);
198 	if (!counter->cs) {
199 		rte_errno = ENOTSUP;
200 		return -ENOTSUP;
201 	}
202 	return 0;
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204 	struct mlx5_priv *priv = dev->data->dev_private;
205 	struct ibv_context *ctx = priv->sh->ctx;
206 	struct ibv_counters_init_attr init = {0};
207 	struct ibv_counter_attach_attr attach;
208 	int ret;
209 
210 	memset(&attach, 0, sizeof(attach));
211 	counter->cs = mlx5_glue->create_counters(ctx, &init);
212 	if (!counter->cs) {
213 		rte_errno = ENOTSUP;
214 		return -ENOTSUP;
215 	}
216 	attach.counter_desc = IBV_COUNTER_PACKETS;
217 	attach.index = 0;
218 	ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
219 	if (!ret) {
220 		attach.counter_desc = IBV_COUNTER_BYTES;
221 		attach.index = 1;
222 		ret = mlx5_glue->attach_counters
223 					(counter->cs, &attach, NULL);
224 	}
225 	if (ret) {
226 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
227 		counter->cs = NULL;
228 		rte_errno = ret;
229 		return -ret;
230 	}
231 	return 0;
232 #else
233 	(void)dev;
234 	(void)counter;
235 	rte_errno = ENOTSUP;
236 	return -ENOTSUP;
237 #endif
238 }
239 
240 /**
241  * Get a flow counter.
242  *
243  * @param[in] dev
244  *   Pointer to the Ethernet device structure.
245  * @param[in] shared
246  *   Indicate if this counter is shared with other flows.
247  * @param[in] id
248  *   Counter identifier.
249  *
250  * @return
251  *   Index to the counter, 0 otherwise and rte_errno is set.
252  */
253 static uint32_t
254 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
255 {
256 	struct mlx5_priv *priv = dev->data->dev_private;
257 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
258 	struct mlx5_flow_counter_pool *pool = NULL;
259 	struct mlx5_flow_counter_ext *cnt_ext = NULL;
260 	struct mlx5_flow_counter *cnt = NULL;
261 	uint32_t n_valid = rte_atomic16_read(&cont->n_valid);
262 	uint32_t pool_idx;
263 	uint32_t i;
264 	int ret;
265 
266 	if (shared) {
267 		for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
268 			pool = cont->pools[pool_idx];
269 			for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
270 				cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
271 				if (cnt_ext->shared && cnt_ext->id == id) {
272 					cnt_ext->ref_cnt++;
273 					return MLX5_MAKE_CNT_IDX(pool_idx, i);
274 				}
275 			}
276 		}
277 	}
278 	for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
279 		pool = cont->pools[pool_idx];
280 		if (!pool)
281 			continue;
282 		cnt = TAILQ_FIRST(&pool->counters[0]);
283 		if (cnt)
284 			break;
285 	}
286 	if (!cnt) {
287 		struct mlx5_flow_counter_pool **pools;
288 		uint32_t size;
289 
290 		if (n_valid == cont->n) {
291 			/* Resize the container pool array. */
292 			size = sizeof(struct mlx5_flow_counter_pool *) *
293 				     (n_valid + MLX5_CNT_CONTAINER_RESIZE);
294 			pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
295 					    SOCKET_ID_ANY);
296 			if (!pools)
297 				return 0;
298 			if (n_valid) {
299 				memcpy(pools, cont->pools,
300 				       sizeof(struct mlx5_flow_counter_pool *) *
301 				       n_valid);
302 				mlx5_free(cont->pools);
303 			}
304 			cont->pools = pools;
305 			cont->n += MLX5_CNT_CONTAINER_RESIZE;
306 		}
307 		/* Allocate memory for new pool*/
308 		size = sizeof(*pool) + (sizeof(*cnt_ext) + sizeof(*cnt)) *
309 		       MLX5_COUNTERS_PER_POOL;
310 		pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
311 		if (!pool)
312 			return 0;
313 		pool->type |= CNT_POOL_TYPE_EXT;
314 		for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
315 			cnt = MLX5_POOL_GET_CNT(pool, i);
316 			TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
317 		}
318 		cnt = MLX5_POOL_GET_CNT(pool, 0);
319 		cont->pools[n_valid] = pool;
320 		pool_idx = n_valid;
321 		rte_atomic16_add(&cont->n_valid, 1);
322 		TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
323 	}
324 	i = MLX5_CNT_ARRAY_IDX(pool, cnt);
325 	cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
326 	cnt_ext->id = id;
327 	cnt_ext->shared = shared;
328 	cnt_ext->ref_cnt = 1;
329 	cnt->hits = 0;
330 	cnt->bytes = 0;
331 	/* Create counter with Verbs. */
332 	ret = flow_verbs_counter_create(dev, cnt_ext);
333 	if (!ret) {
334 		TAILQ_REMOVE(&pool->counters[0], cnt, next);
335 		return MLX5_MAKE_CNT_IDX(pool_idx, i);
336 	}
337 	/* Some error occurred in Verbs library. */
338 	rte_errno = -ret;
339 	return 0;
340 }
341 
342 /**
343  * Release a flow counter.
344  *
345  * @param[in] dev
346  *   Pointer to the Ethernet device structure.
347  * @param[in] counter
348  *   Index to the counter handler.
349  */
350 static void
351 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
352 {
353 	struct mlx5_flow_counter_pool *pool;
354 	struct mlx5_flow_counter *cnt;
355 	struct mlx5_flow_counter_ext *cnt_ext;
356 
357 	cnt = flow_verbs_counter_get_by_idx(dev, counter,
358 					    &pool);
359 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
360 	if (--cnt_ext->ref_cnt == 0) {
361 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
362 		claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
363 		cnt_ext->cs = NULL;
364 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
365 		claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
366 		cnt_ext->cs = NULL;
367 #endif
368 		TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
369 	}
370 }
371 
372 /**
373  * Query a flow counter via Verbs library call.
374  *
375  * @see rte_flow_query()
376  * @see rte_flow_ops
377  */
378 static int
379 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
380 			 struct rte_flow *flow, void *data,
381 			 struct rte_flow_error *error)
382 {
383 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
384 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
385 	if (flow->counter) {
386 		struct mlx5_flow_counter_pool *pool;
387 		struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
388 						(dev, flow->counter, &pool);
389 		struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
390 						(pool, cnt);
391 		struct rte_flow_query_count *qc = data;
392 		uint64_t counters[2] = {0, 0};
393 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
394 		struct ibv_query_counter_set_attr query_cs_attr = {
395 			.cs = cnt_ext->cs,
396 			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
397 		};
398 		struct ibv_counter_set_data query_out = {
399 			.out = counters,
400 			.outlen = 2 * sizeof(uint64_t),
401 		};
402 		int err = mlx5_glue->query_counter_set(&query_cs_attr,
403 						       &query_out);
404 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
405 		int err = mlx5_glue->query_counters
406 			       (cnt_ext->cs, counters,
407 				RTE_DIM(counters),
408 				IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
409 #endif
410 		if (err)
411 			return rte_flow_error_set
412 				(error, err,
413 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
414 				 NULL,
415 				 "cannot read counter");
416 		qc->hits_set = 1;
417 		qc->bytes_set = 1;
418 		qc->hits = counters[0] - cnt->hits;
419 		qc->bytes = counters[1] - cnt->bytes;
420 		if (qc->reset) {
421 			cnt->hits = counters[0];
422 			cnt->bytes = counters[1];
423 		}
424 		return 0;
425 	}
426 	return rte_flow_error_set(error, EINVAL,
427 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
428 				  NULL,
429 				  "flow does not have counter");
430 #else
431 	(void)flow;
432 	(void)data;
433 	return rte_flow_error_set(error, ENOTSUP,
434 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
435 				  NULL,
436 				  "counters are not available");
437 #endif
438 }
439 
440 /**
441  * Add a verbs item specification into @p verbs.
442  *
443  * @param[out] verbs
444  *   Pointer to verbs structure.
445  * @param[in] src
446  *   Create specification.
447  * @param[in] size
448  *   Size in bytes of the specification to copy.
449  */
450 static void
451 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
452 		    void *src, unsigned int size)
453 {
454 	void *dst;
455 
456 	if (!verbs)
457 		return;
458 	MLX5_ASSERT(verbs->specs);
459 	dst = (void *)(verbs->specs + verbs->size);
460 	memcpy(dst, src, size);
461 	++verbs->attr.num_of_specs;
462 	verbs->size += size;
463 }
464 
465 /**
466  * Convert the @p item into a Verbs specification. This function assumes that
467  * the input is valid and that there is space to insert the requested item
468  * into the flow.
469  *
470  * @param[in, out] dev_flow
471  *   Pointer to dev_flow structure.
472  * @param[in] item
473  *   Item specification.
474  * @param[in] item_flags
475  *   Parsed item flags.
476  */
477 static void
478 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
479 			      const struct rte_flow_item *item,
480 			      uint64_t item_flags)
481 {
482 	const struct rte_flow_item_eth *spec = item->spec;
483 	const struct rte_flow_item_eth *mask = item->mask;
484 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
485 	struct ibv_flow_spec_eth eth = {
486 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
487 		.size = size,
488 	};
489 
490 	if (!mask)
491 		mask = &rte_flow_item_eth_mask;
492 	if (spec) {
493 		unsigned int i;
494 
495 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
496 			RTE_ETHER_ADDR_LEN);
497 		memcpy(&eth.val.src_mac, spec->src.addr_bytes,
498 			RTE_ETHER_ADDR_LEN);
499 		eth.val.ether_type = spec->type;
500 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
501 			RTE_ETHER_ADDR_LEN);
502 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
503 			RTE_ETHER_ADDR_LEN);
504 		eth.mask.ether_type = mask->type;
505 		/* Remove unwanted bits from values. */
506 		for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
507 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
508 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
509 		}
510 		eth.val.ether_type &= eth.mask.ether_type;
511 	}
512 	flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
513 }
514 
515 /**
516  * Update the VLAN tag in the Verbs Ethernet specification.
517  * This function assumes that the input is valid and there is space to add
518  * the requested item.
519  *
520  * @param[in, out] attr
521  *   Pointer to Verbs attributes structure.
522  * @param[in] eth
523  *   Verbs structure containing the VLAN information to copy.
524  */
525 static void
526 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
527 			    struct ibv_flow_spec_eth *eth)
528 {
529 	unsigned int i;
530 	const enum ibv_flow_spec_type search = eth->type;
531 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
532 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
533 
534 	for (i = 0; i != attr->num_of_specs; ++i) {
535 		if (hdr->type == search) {
536 			struct ibv_flow_spec_eth *e =
537 				(struct ibv_flow_spec_eth *)hdr;
538 
539 			e->val.vlan_tag = eth->val.vlan_tag;
540 			e->mask.vlan_tag = eth->mask.vlan_tag;
541 			e->val.ether_type = eth->val.ether_type;
542 			e->mask.ether_type = eth->mask.ether_type;
543 			break;
544 		}
545 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
546 	}
547 }
548 
549 /**
550  * Convert the @p item into a Verbs specification. This function assumes that
551  * the input is valid and that there is space to insert the requested item
552  * into the flow.
553  *
554  * @param[in, out] dev_flow
555  *   Pointer to dev_flow structure.
556  * @param[in] item
557  *   Item specification.
558  * @param[in] item_flags
559  *   Parsed item flags.
560  */
561 static void
562 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
563 			       const struct rte_flow_item *item,
564 			       uint64_t item_flags)
565 {
566 	const struct rte_flow_item_vlan *spec = item->spec;
567 	const struct rte_flow_item_vlan *mask = item->mask;
568 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
569 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
570 	struct ibv_flow_spec_eth eth = {
571 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
572 		.size = size,
573 	};
574 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
575 				      MLX5_FLOW_LAYER_OUTER_L2;
576 
577 	if (!mask)
578 		mask = &rte_flow_item_vlan_mask;
579 	if (spec) {
580 		eth.val.vlan_tag = spec->tci;
581 		eth.mask.vlan_tag = mask->tci;
582 		eth.val.vlan_tag &= eth.mask.vlan_tag;
583 		eth.val.ether_type = spec->inner_type;
584 		eth.mask.ether_type = mask->inner_type;
585 		eth.val.ether_type &= eth.mask.ether_type;
586 	}
587 	if (!(item_flags & l2m))
588 		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
589 	else
590 		flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
591 	if (!tunnel)
592 		dev_flow->handle->vf_vlan.tag =
593 			rte_be_to_cpu_16(spec->tci) & 0x0fff;
594 }
595 
596 /**
597  * Convert the @p item into a Verbs specification. This function assumes that
598  * the input is valid and that there is space to insert the requested item
599  * into the flow.
600  *
601  * @param[in, out] dev_flow
602  *   Pointer to dev_flow structure.
603  * @param[in] item
604  *   Item specification.
605  * @param[in] item_flags
606  *   Parsed item flags.
607  */
608 static void
609 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
610 			       const struct rte_flow_item *item,
611 			       uint64_t item_flags)
612 {
613 	const struct rte_flow_item_ipv4 *spec = item->spec;
614 	const struct rte_flow_item_ipv4 *mask = item->mask;
615 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
616 	struct ibv_flow_spec_ipv4_ext ipv4 = {
617 		.type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
618 		.size = size,
619 	};
620 
621 	if (!mask)
622 		mask = &rte_flow_item_ipv4_mask;
623 	if (spec) {
624 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
625 			.src_ip = spec->hdr.src_addr,
626 			.dst_ip = spec->hdr.dst_addr,
627 			.proto = spec->hdr.next_proto_id,
628 			.tos = spec->hdr.type_of_service,
629 		};
630 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
631 			.src_ip = mask->hdr.src_addr,
632 			.dst_ip = mask->hdr.dst_addr,
633 			.proto = mask->hdr.next_proto_id,
634 			.tos = mask->hdr.type_of_service,
635 		};
636 		/* Remove unwanted bits from values. */
637 		ipv4.val.src_ip &= ipv4.mask.src_ip;
638 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
639 		ipv4.val.proto &= ipv4.mask.proto;
640 		ipv4.val.tos &= ipv4.mask.tos;
641 	}
642 	flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
643 }
644 
645 /**
646  * Convert the @p item into a Verbs specification. This function assumes that
647  * the input is valid and that there is space to insert the requested item
648  * into the flow.
649  *
650  * @param[in, out] dev_flow
651  *   Pointer to dev_flow structure.
652  * @param[in] item
653  *   Item specification.
654  * @param[in] item_flags
655  *   Parsed item flags.
656  */
657 static void
658 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
659 			       const struct rte_flow_item *item,
660 			       uint64_t item_flags)
661 {
662 	const struct rte_flow_item_ipv6 *spec = item->spec;
663 	const struct rte_flow_item_ipv6 *mask = item->mask;
664 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
665 	struct ibv_flow_spec_ipv6 ipv6 = {
666 		.type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
667 		.size = size,
668 	};
669 
670 	if (!mask)
671 		mask = &rte_flow_item_ipv6_mask;
672 	if (spec) {
673 		unsigned int i;
674 		uint32_t vtc_flow_val;
675 		uint32_t vtc_flow_mask;
676 
677 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
678 		       RTE_DIM(ipv6.val.src_ip));
679 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
680 		       RTE_DIM(ipv6.val.dst_ip));
681 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
682 		       RTE_DIM(ipv6.mask.src_ip));
683 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
684 		       RTE_DIM(ipv6.mask.dst_ip));
685 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
686 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
687 		ipv6.val.flow_label =
688 			rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
689 					 RTE_IPV6_HDR_FL_SHIFT);
690 		ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
691 					 RTE_IPV6_HDR_TC_SHIFT;
692 		ipv6.val.next_hdr = spec->hdr.proto;
693 		ipv6.mask.flow_label =
694 			rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
695 					 RTE_IPV6_HDR_FL_SHIFT);
696 		ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
697 					  RTE_IPV6_HDR_TC_SHIFT;
698 		ipv6.mask.next_hdr = mask->hdr.proto;
699 		/* Remove unwanted bits from values. */
700 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
701 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
702 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
703 		}
704 		ipv6.val.flow_label &= ipv6.mask.flow_label;
705 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
706 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
707 	}
708 	flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
709 }
710 
711 /**
712  * Convert the @p item into a Verbs specification. This function assumes that
713  * the input is valid and that there is space to insert the requested item
714  * into the flow.
715  *
716  * @param[in, out] dev_flow
717  *   Pointer to dev_flow structure.
718  * @param[in] item
719  *   Item specification.
720  * @param[in] item_flags
721  *   Parsed item flags.
722  */
723 static void
724 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
725 			      const struct rte_flow_item *item,
726 			      uint64_t item_flags __rte_unused)
727 {
728 	const struct rte_flow_item_tcp *spec = item->spec;
729 	const struct rte_flow_item_tcp *mask = item->mask;
730 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
731 	struct ibv_flow_spec_tcp_udp tcp = {
732 		.type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
733 		.size = size,
734 	};
735 
736 	if (!mask)
737 		mask = &rte_flow_item_tcp_mask;
738 	if (spec) {
739 		tcp.val.dst_port = spec->hdr.dst_port;
740 		tcp.val.src_port = spec->hdr.src_port;
741 		tcp.mask.dst_port = mask->hdr.dst_port;
742 		tcp.mask.src_port = mask->hdr.src_port;
743 		/* Remove unwanted bits from values. */
744 		tcp.val.src_port &= tcp.mask.src_port;
745 		tcp.val.dst_port &= tcp.mask.dst_port;
746 	}
747 	flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
748 }
749 
750 /**
751  * Convert the @p item into a Verbs specification. This function assumes that
752  * the input is valid and that there is space to insert the requested item
753  * into the flow.
754  *
755  * @param[in, out] dev_flow
756  *   Pointer to dev_flow structure.
757  * @param[in] item
758  *   Item specification.
759  * @param[in] item_flags
760  *   Parsed item flags.
761  */
762 static void
763 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
764 			      const struct rte_flow_item *item,
765 			      uint64_t item_flags __rte_unused)
766 {
767 	const struct rte_flow_item_udp *spec = item->spec;
768 	const struct rte_flow_item_udp *mask = item->mask;
769 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
770 	struct ibv_flow_spec_tcp_udp udp = {
771 		.type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
772 		.size = size,
773 	};
774 
775 	if (!mask)
776 		mask = &rte_flow_item_udp_mask;
777 	if (spec) {
778 		udp.val.dst_port = spec->hdr.dst_port;
779 		udp.val.src_port = spec->hdr.src_port;
780 		udp.mask.dst_port = mask->hdr.dst_port;
781 		udp.mask.src_port = mask->hdr.src_port;
782 		/* Remove unwanted bits from values. */
783 		udp.val.src_port &= udp.mask.src_port;
784 		udp.val.dst_port &= udp.mask.dst_port;
785 	}
786 	item++;
787 	while (item->type == RTE_FLOW_ITEM_TYPE_VOID)
788 		item++;
789 	if (!(udp.val.dst_port & udp.mask.dst_port)) {
790 		switch ((item)->type) {
791 		case RTE_FLOW_ITEM_TYPE_VXLAN:
792 			udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN);
793 			udp.mask.dst_port = 0xffff;
794 			break;
795 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
796 			udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE);
797 			udp.mask.dst_port = 0xffff;
798 			break;
799 		case RTE_FLOW_ITEM_TYPE_MPLS:
800 			udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS);
801 			udp.mask.dst_port = 0xffff;
802 			break;
803 		default:
804 			break;
805 		}
806 	}
807 
808 	flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
809 }
810 
811 /**
812  * Convert the @p item into a Verbs specification. This function assumes that
813  * the input is valid and that there is space to insert the requested item
814  * into the flow.
815  *
816  * @param[in, out] dev_flow
817  *   Pointer to dev_flow structure.
818  * @param[in] item
819  *   Item specification.
820  * @param[in] item_flags
821  *   Parsed item flags.
822  */
823 static void
824 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
825 				const struct rte_flow_item *item,
826 				uint64_t item_flags __rte_unused)
827 {
828 	const struct rte_flow_item_vxlan *spec = item->spec;
829 	const struct rte_flow_item_vxlan *mask = item->mask;
830 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
831 	struct ibv_flow_spec_tunnel vxlan = {
832 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
833 		.size = size,
834 	};
835 	union vni {
836 		uint32_t vlan_id;
837 		uint8_t vni[4];
838 	} id = { .vlan_id = 0, };
839 
840 	if (!mask)
841 		mask = &rte_flow_item_vxlan_mask;
842 	if (spec) {
843 		memcpy(&id.vni[1], spec->vni, 3);
844 		vxlan.val.tunnel_id = id.vlan_id;
845 		memcpy(&id.vni[1], mask->vni, 3);
846 		vxlan.mask.tunnel_id = id.vlan_id;
847 		/* Remove unwanted bits from values. */
848 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
849 	}
850 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
851 }
852 
853 /**
854  * Convert the @p item into a Verbs specification. This function assumes that
855  * the input is valid and that there is space to insert the requested item
856  * into the flow.
857  *
858  * @param[in, out] dev_flow
859  *   Pointer to dev_flow structure.
860  * @param[in] item
861  *   Item specification.
862  * @param[in] item_flags
863  *   Parsed item flags.
864  */
865 static void
866 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
867 				    const struct rte_flow_item *item,
868 				    uint64_t item_flags __rte_unused)
869 {
870 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
871 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
872 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
873 	struct ibv_flow_spec_tunnel vxlan_gpe = {
874 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
875 		.size = size,
876 	};
877 	union vni {
878 		uint32_t vlan_id;
879 		uint8_t vni[4];
880 	} id = { .vlan_id = 0, };
881 
882 	if (!mask)
883 		mask = &rte_flow_item_vxlan_gpe_mask;
884 	if (spec) {
885 		memcpy(&id.vni[1], spec->vni, 3);
886 		vxlan_gpe.val.tunnel_id = id.vlan_id;
887 		memcpy(&id.vni[1], mask->vni, 3);
888 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
889 		/* Remove unwanted bits from values. */
890 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
891 	}
892 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
893 }
894 
895 /**
896  * Update the protocol in Verbs IPv4/IPv6 spec.
897  *
898  * @param[in, out] attr
899  *   Pointer to Verbs attributes structure.
900  * @param[in] search
901  *   Specification type to search in order to update the IP protocol.
902  * @param[in] protocol
903  *   Protocol value to set if none is present in the specification.
904  */
905 static void
906 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
907 				       enum ibv_flow_spec_type search,
908 				       uint8_t protocol)
909 {
910 	unsigned int i;
911 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
912 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
913 
914 	if (!attr)
915 		return;
916 	for (i = 0; i != attr->num_of_specs; ++i) {
917 		if (hdr->type == search) {
918 			union {
919 				struct ibv_flow_spec_ipv4_ext *ipv4;
920 				struct ibv_flow_spec_ipv6 *ipv6;
921 			} ip;
922 
923 			switch (search) {
924 			case IBV_FLOW_SPEC_IPV4_EXT:
925 				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
926 				if (!ip.ipv4->val.proto) {
927 					ip.ipv4->val.proto = protocol;
928 					ip.ipv4->mask.proto = 0xff;
929 				}
930 				break;
931 			case IBV_FLOW_SPEC_IPV6:
932 				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
933 				if (!ip.ipv6->val.next_hdr) {
934 					ip.ipv6->val.next_hdr = protocol;
935 					ip.ipv6->mask.next_hdr = 0xff;
936 				}
937 				break;
938 			default:
939 				break;
940 			}
941 			break;
942 		}
943 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
944 	}
945 }
946 
947 /**
948  * Convert the @p item into a Verbs specification. This function assumes that
949  * the input is valid and that there is space to insert the requested item
950  * into the flow.
951  *
952  * @param[in, out] dev_flow
953  *   Pointer to dev_flow structure.
954  * @param[in] item
955  *   Item specification.
956  * @param[in] item_flags
957  *   Parsed item flags.
958  */
959 static void
960 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
961 			      const struct rte_flow_item *item __rte_unused,
962 			      uint64_t item_flags)
963 {
964 	struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
965 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
966 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
967 	struct ibv_flow_spec_tunnel tunnel = {
968 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
969 		.size = size,
970 	};
971 #else
972 	const struct rte_flow_item_gre *spec = item->spec;
973 	const struct rte_flow_item_gre *mask = item->mask;
974 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
975 	struct ibv_flow_spec_gre tunnel = {
976 		.type = IBV_FLOW_SPEC_GRE,
977 		.size = size,
978 	};
979 
980 	if (!mask)
981 		mask = &rte_flow_item_gre_mask;
982 	if (spec) {
983 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
984 		tunnel.val.protocol = spec->protocol;
985 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
986 		tunnel.mask.protocol = mask->protocol;
987 		/* Remove unwanted bits from values. */
988 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
989 		tunnel.val.protocol &= tunnel.mask.protocol;
990 		tunnel.val.key &= tunnel.mask.key;
991 	}
992 #endif
993 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
994 		flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
995 						       IBV_FLOW_SPEC_IPV4_EXT,
996 						       IPPROTO_GRE);
997 	else
998 		flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
999 						       IBV_FLOW_SPEC_IPV6,
1000 						       IPPROTO_GRE);
1001 	flow_verbs_spec_add(verbs, &tunnel, size);
1002 }
1003 
1004 /**
1005  * Convert the @p action into a Verbs specification. This function assumes that
1006  * the input is valid and that there is space to insert the requested action
1007  * into the flow. This function also return the action that was added.
1008  *
1009  * @param[in, out] dev_flow
1010  *   Pointer to dev_flow structure.
1011  * @param[in] item
1012  *   Item specification.
1013  * @param[in] item_flags
1014  *   Parsed item flags.
1015  */
1016 static void
1017 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
1018 			       const struct rte_flow_item *item __rte_unused,
1019 			       uint64_t item_flags __rte_unused)
1020 {
1021 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1022 	const struct rte_flow_item_mpls *spec = item->spec;
1023 	const struct rte_flow_item_mpls *mask = item->mask;
1024 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1025 	struct ibv_flow_spec_mpls mpls = {
1026 		.type = IBV_FLOW_SPEC_MPLS,
1027 		.size = size,
1028 	};
1029 
1030 	if (!mask)
1031 		mask = &rte_flow_item_mpls_mask;
1032 	if (spec) {
1033 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1034 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1035 		/* Remove unwanted bits from values.  */
1036 		mpls.val.label &= mpls.mask.label;
1037 	}
1038 	flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
1039 #endif
1040 }
1041 
1042 /**
1043  * Convert the @p action into a Verbs specification. This function assumes that
1044  * the input is valid and that there is space to insert the requested action
1045  * into the flow.
1046  *
1047  * @param[in] dev_flow
1048  *   Pointer to mlx5_flow.
1049  * @param[in] action
1050  *   Action configuration.
1051  */
1052 static void
1053 flow_verbs_translate_action_drop
1054 	(struct mlx5_flow *dev_flow,
1055 	 const struct rte_flow_action *action __rte_unused)
1056 {
1057 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1058 	struct ibv_flow_spec_action_drop drop = {
1059 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1060 			.size = size,
1061 	};
1062 
1063 	flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
1064 }
1065 
1066 /**
1067  * Convert the @p action into a Verbs specification. This function assumes that
1068  * the input is valid and that there is space to insert the requested action
1069  * into the flow.
1070  *
1071  * @param[in] rss_desc
1072  *   Pointer to mlx5_flow_rss_desc.
1073  * @param[in] action
1074  *   Action configuration.
1075  */
1076 static void
1077 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
1078 				  const struct rte_flow_action *action)
1079 {
1080 	const struct rte_flow_action_queue *queue = action->conf;
1081 
1082 	rss_desc->queue[0] = queue->index;
1083 	rss_desc->queue_num = 1;
1084 }
1085 
1086 /**
1087  * Convert the @p action into a Verbs specification. This function assumes that
1088  * the input is valid and that there is space to insert the requested action
1089  * into the flow.
1090  *
1091  * @param[in] rss_desc
1092  *   Pointer to mlx5_flow_rss_desc.
1093  * @param[in] action
1094  *   Action configuration.
1095  */
1096 static void
1097 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
1098 				const struct rte_flow_action *action)
1099 {
1100 	const struct rte_flow_action_rss *rss = action->conf;
1101 	const uint8_t *rss_key;
1102 
1103 	memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
1104 	rss_desc->queue_num = rss->queue_num;
1105 	/* NULL RSS key indicates default RSS key. */
1106 	rss_key = !rss->key ? rss_hash_default_key : rss->key;
1107 	memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
1108 	/*
1109 	 * rss->level and rss.types should be set in advance when expanding
1110 	 * items for RSS.
1111 	 */
1112 }
1113 
1114 /**
1115  * Convert the @p action into a Verbs specification. This function assumes that
1116  * the input is valid and that there is space to insert the requested action
1117  * into the flow.
1118  *
1119  * @param[in] dev_flow
1120  *   Pointer to mlx5_flow.
1121  * @param[in] action
1122  *   Action configuration.
1123  */
1124 static void
1125 flow_verbs_translate_action_flag
1126 	(struct mlx5_flow *dev_flow,
1127 	 const struct rte_flow_action *action __rte_unused)
1128 {
1129 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1130 	struct ibv_flow_spec_action_tag tag = {
1131 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1132 		.size = size,
1133 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1134 	};
1135 
1136 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1137 }
1138 
1139 /**
1140  * Convert the @p action into a Verbs specification. This function assumes that
1141  * the input is valid and that there is space to insert the requested action
1142  * into the flow.
1143  *
1144  * @param[in] dev_flow
1145  *   Pointer to mlx5_flow.
1146  * @param[in] action
1147  *   Action configuration.
1148  */
1149 static void
1150 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1151 				 const struct rte_flow_action *action)
1152 {
1153 	const struct rte_flow_action_mark *mark = action->conf;
1154 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1155 	struct ibv_flow_spec_action_tag tag = {
1156 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1157 		.size = size,
1158 		.tag_id = mlx5_flow_mark_set(mark->id),
1159 	};
1160 
1161 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1162 }
1163 
1164 /**
1165  * Convert the @p action into a Verbs specification. This function assumes that
1166  * the input is valid and that there is space to insert the requested action
1167  * into the flow.
1168  *
1169  * @param[in] dev
1170  *   Pointer to the Ethernet device structure.
1171  * @param[in] action
1172  *   Action configuration.
1173  * @param[in] dev_flow
1174  *   Pointer to mlx5_flow.
1175  * @param[out] error
1176  *   Pointer to error structure.
1177  *
1178  * @return
1179  *   0 On success else a negative errno value is returned and rte_errno is set.
1180  */
1181 static int
1182 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1183 				  const struct rte_flow_action *action,
1184 				  struct rte_eth_dev *dev,
1185 				  struct rte_flow_error *error)
1186 {
1187 	const struct rte_flow_action_count *count = action->conf;
1188 	struct rte_flow *flow = dev_flow->flow;
1189 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1190 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1191 	struct mlx5_flow_counter_pool *pool;
1192 	struct mlx5_flow_counter *cnt = NULL;
1193 	struct mlx5_flow_counter_ext *cnt_ext;
1194 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1195 	struct ibv_flow_spec_counter_action counter = {
1196 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
1197 		.size = size,
1198 	};
1199 #endif
1200 
1201 	if (!flow->counter) {
1202 		flow->counter = flow_verbs_counter_new(dev, count->shared,
1203 						       count->id);
1204 		if (!flow->counter)
1205 			return rte_flow_error_set(error, rte_errno,
1206 						  RTE_FLOW_ERROR_TYPE_ACTION,
1207 						  action,
1208 						  "cannot get counter"
1209 						  " context.");
1210 	}
1211 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1212 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1213 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1214 	counter.counter_set_handle = cnt_ext->cs->handle;
1215 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1216 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1217 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1218 	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1219 	counter.counters = cnt_ext->cs;
1220 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1221 #endif
1222 	return 0;
1223 }
1224 
1225 /**
1226  * Internal validation function. For validating both actions and items.
1227  *
1228  * @param[in] dev
1229  *   Pointer to the Ethernet device structure.
1230  * @param[in] attr
1231  *   Pointer to the flow attributes.
1232  * @param[in] items
1233  *   Pointer to the list of items.
1234  * @param[in] actions
1235  *   Pointer to the list of actions.
1236  * @param[in] external
1237  *   This flow rule is created by request external to PMD.
1238  * @param[in] hairpin
1239  *   Number of hairpin TX actions, 0 means classic flow.
1240  * @param[out] error
1241  *   Pointer to the error structure.
1242  *
1243  * @return
1244  *   0 on success, a negative errno value otherwise and rte_errno is set.
1245  */
1246 static int
1247 flow_verbs_validate(struct rte_eth_dev *dev,
1248 		    const struct rte_flow_attr *attr,
1249 		    const struct rte_flow_item items[],
1250 		    const struct rte_flow_action actions[],
1251 		    bool external __rte_unused,
1252 		    int hairpin __rte_unused,
1253 		    struct rte_flow_error *error)
1254 {
1255 	int ret;
1256 	uint64_t action_flags = 0;
1257 	uint64_t item_flags = 0;
1258 	uint64_t last_item = 0;
1259 	uint8_t next_protocol = 0xff;
1260 	uint16_t ether_type = 0;
1261 
1262 	if (items == NULL)
1263 		return -1;
1264 	ret = mlx5_flow_validate_attributes(dev, attr, error);
1265 	if (ret < 0)
1266 		return ret;
1267 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1268 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1269 		int ret = 0;
1270 
1271 		switch (items->type) {
1272 		case RTE_FLOW_ITEM_TYPE_VOID:
1273 			break;
1274 		case RTE_FLOW_ITEM_TYPE_ETH:
1275 			ret = mlx5_flow_validate_item_eth(items, item_flags,
1276 							  error);
1277 			if (ret < 0)
1278 				return ret;
1279 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1280 					     MLX5_FLOW_LAYER_OUTER_L2;
1281 			if (items->mask != NULL && items->spec != NULL) {
1282 				ether_type =
1283 					((const struct rte_flow_item_eth *)
1284 					 items->spec)->type;
1285 				ether_type &=
1286 					((const struct rte_flow_item_eth *)
1287 					 items->mask)->type;
1288 				ether_type = rte_be_to_cpu_16(ether_type);
1289 			} else {
1290 				ether_type = 0;
1291 			}
1292 			break;
1293 		case RTE_FLOW_ITEM_TYPE_VLAN:
1294 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
1295 							   dev, error);
1296 			if (ret < 0)
1297 				return ret;
1298 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1299 					      MLX5_FLOW_LAYER_INNER_VLAN) :
1300 					     (MLX5_FLOW_LAYER_OUTER_L2 |
1301 					      MLX5_FLOW_LAYER_OUTER_VLAN);
1302 			if (items->mask != NULL && items->spec != NULL) {
1303 				ether_type =
1304 					((const struct rte_flow_item_vlan *)
1305 					 items->spec)->inner_type;
1306 				ether_type &=
1307 					((const struct rte_flow_item_vlan *)
1308 					 items->mask)->inner_type;
1309 				ether_type = rte_be_to_cpu_16(ether_type);
1310 			} else {
1311 				ether_type = 0;
1312 			}
1313 			break;
1314 		case RTE_FLOW_ITEM_TYPE_IPV4:
1315 			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1316 							   last_item,
1317 							   ether_type, NULL,
1318 							   error);
1319 			if (ret < 0)
1320 				return ret;
1321 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1322 					     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1323 			if (items->mask != NULL &&
1324 			    ((const struct rte_flow_item_ipv4 *)
1325 			     items->mask)->hdr.next_proto_id) {
1326 				next_protocol =
1327 					((const struct rte_flow_item_ipv4 *)
1328 					 (items->spec))->hdr.next_proto_id;
1329 				next_protocol &=
1330 					((const struct rte_flow_item_ipv4 *)
1331 					 (items->mask))->hdr.next_proto_id;
1332 			} else {
1333 				/* Reset for inner layer. */
1334 				next_protocol = 0xff;
1335 			}
1336 			break;
1337 		case RTE_FLOW_ITEM_TYPE_IPV6:
1338 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1339 							   last_item,
1340 							   ether_type, NULL,
1341 							   error);
1342 			if (ret < 0)
1343 				return ret;
1344 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1345 					     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1346 			if (items->mask != NULL &&
1347 			    ((const struct rte_flow_item_ipv6 *)
1348 			     items->mask)->hdr.proto) {
1349 				next_protocol =
1350 					((const struct rte_flow_item_ipv6 *)
1351 					 items->spec)->hdr.proto;
1352 				next_protocol &=
1353 					((const struct rte_flow_item_ipv6 *)
1354 					 items->mask)->hdr.proto;
1355 			} else {
1356 				/* Reset for inner layer. */
1357 				next_protocol = 0xff;
1358 			}
1359 			break;
1360 		case RTE_FLOW_ITEM_TYPE_UDP:
1361 			ret = mlx5_flow_validate_item_udp(items, item_flags,
1362 							  next_protocol,
1363 							  error);
1364 			if (ret < 0)
1365 				return ret;
1366 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1367 					     MLX5_FLOW_LAYER_OUTER_L4_UDP;
1368 			break;
1369 		case RTE_FLOW_ITEM_TYPE_TCP:
1370 			ret = mlx5_flow_validate_item_tcp
1371 						(items, item_flags,
1372 						 next_protocol,
1373 						 &rte_flow_item_tcp_mask,
1374 						 error);
1375 			if (ret < 0)
1376 				return ret;
1377 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1378 					     MLX5_FLOW_LAYER_OUTER_L4_TCP;
1379 			break;
1380 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1381 			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1382 							    error);
1383 			if (ret < 0)
1384 				return ret;
1385 			last_item = MLX5_FLOW_LAYER_VXLAN;
1386 			break;
1387 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1388 			ret = mlx5_flow_validate_item_vxlan_gpe(items,
1389 								item_flags,
1390 								dev, error);
1391 			if (ret < 0)
1392 				return ret;
1393 			last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1394 			break;
1395 		case RTE_FLOW_ITEM_TYPE_GRE:
1396 			ret = mlx5_flow_validate_item_gre(items, item_flags,
1397 							  next_protocol, error);
1398 			if (ret < 0)
1399 				return ret;
1400 			last_item = MLX5_FLOW_LAYER_GRE;
1401 			break;
1402 		case RTE_FLOW_ITEM_TYPE_MPLS:
1403 			ret = mlx5_flow_validate_item_mpls(dev, items,
1404 							   item_flags,
1405 							   last_item, error);
1406 			if (ret < 0)
1407 				return ret;
1408 			last_item = MLX5_FLOW_LAYER_MPLS;
1409 			break;
1410 		default:
1411 			return rte_flow_error_set(error, ENOTSUP,
1412 						  RTE_FLOW_ERROR_TYPE_ITEM,
1413 						  NULL, "item not supported");
1414 		}
1415 		item_flags |= last_item;
1416 	}
1417 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1418 		switch (actions->type) {
1419 		case RTE_FLOW_ACTION_TYPE_VOID:
1420 			break;
1421 		case RTE_FLOW_ACTION_TYPE_FLAG:
1422 			ret = mlx5_flow_validate_action_flag(action_flags,
1423 							     attr,
1424 							     error);
1425 			if (ret < 0)
1426 				return ret;
1427 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1428 			break;
1429 		case RTE_FLOW_ACTION_TYPE_MARK:
1430 			ret = mlx5_flow_validate_action_mark(actions,
1431 							     action_flags,
1432 							     attr,
1433 							     error);
1434 			if (ret < 0)
1435 				return ret;
1436 			action_flags |= MLX5_FLOW_ACTION_MARK;
1437 			break;
1438 		case RTE_FLOW_ACTION_TYPE_DROP:
1439 			ret = mlx5_flow_validate_action_drop(action_flags,
1440 							     attr,
1441 							     error);
1442 			if (ret < 0)
1443 				return ret;
1444 			action_flags |= MLX5_FLOW_ACTION_DROP;
1445 			break;
1446 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1447 			ret = mlx5_flow_validate_action_queue(actions,
1448 							      action_flags, dev,
1449 							      attr,
1450 							      error);
1451 			if (ret < 0)
1452 				return ret;
1453 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1454 			break;
1455 		case RTE_FLOW_ACTION_TYPE_RSS:
1456 			ret = mlx5_flow_validate_action_rss(actions,
1457 							    action_flags, dev,
1458 							    attr, item_flags,
1459 							    error);
1460 			if (ret < 0)
1461 				return ret;
1462 			action_flags |= MLX5_FLOW_ACTION_RSS;
1463 			break;
1464 		case RTE_FLOW_ACTION_TYPE_COUNT:
1465 			ret = mlx5_flow_validate_action_count(dev, attr, error);
1466 			if (ret < 0)
1467 				return ret;
1468 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1469 			break;
1470 		default:
1471 			return rte_flow_error_set(error, ENOTSUP,
1472 						  RTE_FLOW_ERROR_TYPE_ACTION,
1473 						  actions,
1474 						  "action not supported");
1475 		}
1476 	}
1477 	/*
1478 	 * Validate the drop action mutual exclusion with other actions.
1479 	 * Drop action is mutually-exclusive with any other action, except for
1480 	 * Count action.
1481 	 */
1482 	if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1483 	    (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1484 		return rte_flow_error_set(error, EINVAL,
1485 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1486 					  "Drop action is mutually-exclusive "
1487 					  "with any other action, except for "
1488 					  "Count action");
1489 	if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1490 		return rte_flow_error_set(error, EINVAL,
1491 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
1492 					  "no fate action is found");
1493 	return 0;
1494 }
1495 
1496 /**
1497  * Calculate the required bytes that are needed for the action part of the verbs
1498  * flow.
1499  *
1500  * @param[in] actions
1501  *   Pointer to the list of actions.
1502  *
1503  * @return
1504  *   The size of the memory needed for all actions.
1505  */
1506 static int
1507 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1508 {
1509 	int size = 0;
1510 
1511 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1512 		switch (actions->type) {
1513 		case RTE_FLOW_ACTION_TYPE_VOID:
1514 			break;
1515 		case RTE_FLOW_ACTION_TYPE_FLAG:
1516 			size += sizeof(struct ibv_flow_spec_action_tag);
1517 			break;
1518 		case RTE_FLOW_ACTION_TYPE_MARK:
1519 			size += sizeof(struct ibv_flow_spec_action_tag);
1520 			break;
1521 		case RTE_FLOW_ACTION_TYPE_DROP:
1522 			size += sizeof(struct ibv_flow_spec_action_drop);
1523 			break;
1524 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1525 			break;
1526 		case RTE_FLOW_ACTION_TYPE_RSS:
1527 			break;
1528 		case RTE_FLOW_ACTION_TYPE_COUNT:
1529 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1530 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1531 			size += sizeof(struct ibv_flow_spec_counter_action);
1532 #endif
1533 			break;
1534 		default:
1535 			break;
1536 		}
1537 	}
1538 	return size;
1539 }
1540 
1541 /**
1542  * Calculate the required bytes that are needed for the item part of the verbs
1543  * flow.
1544  *
1545  * @param[in] items
1546  *   Pointer to the list of items.
1547  *
1548  * @return
1549  *   The size of the memory needed for all items.
1550  */
1551 static int
1552 flow_verbs_get_items_size(const struct rte_flow_item items[])
1553 {
1554 	int size = 0;
1555 
1556 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1557 		switch (items->type) {
1558 		case RTE_FLOW_ITEM_TYPE_VOID:
1559 			break;
1560 		case RTE_FLOW_ITEM_TYPE_ETH:
1561 			size += sizeof(struct ibv_flow_spec_eth);
1562 			break;
1563 		case RTE_FLOW_ITEM_TYPE_VLAN:
1564 			size += sizeof(struct ibv_flow_spec_eth);
1565 			break;
1566 		case RTE_FLOW_ITEM_TYPE_IPV4:
1567 			size += sizeof(struct ibv_flow_spec_ipv4_ext);
1568 			break;
1569 		case RTE_FLOW_ITEM_TYPE_IPV6:
1570 			size += sizeof(struct ibv_flow_spec_ipv6);
1571 			break;
1572 		case RTE_FLOW_ITEM_TYPE_UDP:
1573 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1574 			break;
1575 		case RTE_FLOW_ITEM_TYPE_TCP:
1576 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1577 			break;
1578 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1579 			size += sizeof(struct ibv_flow_spec_tunnel);
1580 			break;
1581 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1582 			size += sizeof(struct ibv_flow_spec_tunnel);
1583 			break;
1584 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1585 		case RTE_FLOW_ITEM_TYPE_GRE:
1586 			size += sizeof(struct ibv_flow_spec_gre);
1587 			break;
1588 		case RTE_FLOW_ITEM_TYPE_MPLS:
1589 			size += sizeof(struct ibv_flow_spec_mpls);
1590 			break;
1591 #else
1592 		case RTE_FLOW_ITEM_TYPE_GRE:
1593 			size += sizeof(struct ibv_flow_spec_tunnel);
1594 			break;
1595 #endif
1596 		default:
1597 			break;
1598 		}
1599 	}
1600 	return size;
1601 }
1602 
1603 /**
1604  * Internal preparation function. Allocate mlx5_flow with the required size.
1605  * The required size is calculate based on the actions and items. This function
1606  * also returns the detected actions and items for later use.
1607  *
1608  * @param[in] dev
1609  *   Pointer to Ethernet device.
1610  * @param[in] attr
1611  *   Pointer to the flow attributes.
1612  * @param[in] items
1613  *   Pointer to the list of items.
1614  * @param[in] actions
1615  *   Pointer to the list of actions.
1616  * @param[out] error
1617  *   Pointer to the error structure.
1618  *
1619  * @return
1620  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1621  *   is set.
1622  */
1623 static struct mlx5_flow *
1624 flow_verbs_prepare(struct rte_eth_dev *dev,
1625 		   const struct rte_flow_attr *attr __rte_unused,
1626 		   const struct rte_flow_item items[],
1627 		   const struct rte_flow_action actions[],
1628 		   struct rte_flow_error *error)
1629 {
1630 	size_t size = 0;
1631 	uint32_t handle_idx = 0;
1632 	struct mlx5_flow *dev_flow;
1633 	struct mlx5_flow_handle *dev_handle;
1634 	struct mlx5_priv *priv = dev->data->dev_private;
1635 
1636 	size += flow_verbs_get_actions_size(actions);
1637 	size += flow_verbs_get_items_size(items);
1638 	if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1639 		rte_flow_error_set(error, E2BIG,
1640 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1641 				   "Verbs spec/action size too large");
1642 		return NULL;
1643 	}
1644 	/* In case of corrupting the memory. */
1645 	if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1646 		rte_flow_error_set(error, ENOSPC,
1647 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1648 				   "not free temporary device flow");
1649 		return NULL;
1650 	}
1651 	dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1652 				   &handle_idx);
1653 	if (!dev_handle) {
1654 		rte_flow_error_set(error, ENOMEM,
1655 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1656 				   "not enough memory to create flow handle");
1657 		return NULL;
1658 	}
1659 	/* No multi-thread supporting. */
1660 	dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1661 	dev_flow->handle = dev_handle;
1662 	dev_flow->handle_idx = handle_idx;
1663 	/* Memcpy is used, only size needs to be cleared to 0. */
1664 	dev_flow->verbs.size = 0;
1665 	dev_flow->verbs.attr.num_of_specs = 0;
1666 	dev_flow->ingress = attr->ingress;
1667 	dev_flow->hash_fields = 0;
1668 	/* Need to set transfer attribute: not supported in Verbs mode. */
1669 	return dev_flow;
1670 }
1671 
1672 /**
1673  * Fill the flow with verb spec.
1674  *
1675  * @param[in] dev
1676  *   Pointer to Ethernet device.
1677  * @param[in, out] dev_flow
1678  *   Pointer to the mlx5 flow.
1679  * @param[in] attr
1680  *   Pointer to the flow attributes.
1681  * @param[in] items
1682  *   Pointer to the list of items.
1683  * @param[in] actions
1684  *   Pointer to the list of actions.
1685  * @param[out] error
1686  *   Pointer to the error structure.
1687  *
1688  * @return
1689  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1690  */
1691 static int
1692 flow_verbs_translate(struct rte_eth_dev *dev,
1693 		     struct mlx5_flow *dev_flow,
1694 		     const struct rte_flow_attr *attr,
1695 		     const struct rte_flow_item items[],
1696 		     const struct rte_flow_action actions[],
1697 		     struct rte_flow_error *error)
1698 {
1699 	uint64_t item_flags = 0;
1700 	uint64_t action_flags = 0;
1701 	uint64_t priority = attr->priority;
1702 	uint32_t subpriority = 0;
1703 	struct mlx5_priv *priv = dev->data->dev_private;
1704 	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1705 					      priv->rss_desc)
1706 					      [!!priv->flow_nested_idx];
1707 
1708 	if (priority == MLX5_FLOW_PRIO_RSVD)
1709 		priority = priv->config.flow_prio - 1;
1710 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1711 		int ret;
1712 
1713 		switch (actions->type) {
1714 		case RTE_FLOW_ACTION_TYPE_VOID:
1715 			break;
1716 		case RTE_FLOW_ACTION_TYPE_FLAG:
1717 			flow_verbs_translate_action_flag(dev_flow, actions);
1718 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1719 			dev_flow->handle->mark = 1;
1720 			break;
1721 		case RTE_FLOW_ACTION_TYPE_MARK:
1722 			flow_verbs_translate_action_mark(dev_flow, actions);
1723 			action_flags |= MLX5_FLOW_ACTION_MARK;
1724 			dev_flow->handle->mark = 1;
1725 			break;
1726 		case RTE_FLOW_ACTION_TYPE_DROP:
1727 			flow_verbs_translate_action_drop(dev_flow, actions);
1728 			action_flags |= MLX5_FLOW_ACTION_DROP;
1729 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1730 			break;
1731 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1732 			flow_verbs_translate_action_queue(rss_desc, actions);
1733 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1734 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1735 			break;
1736 		case RTE_FLOW_ACTION_TYPE_RSS:
1737 			flow_verbs_translate_action_rss(rss_desc, actions);
1738 			action_flags |= MLX5_FLOW_ACTION_RSS;
1739 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1740 			break;
1741 		case RTE_FLOW_ACTION_TYPE_COUNT:
1742 			ret = flow_verbs_translate_action_count(dev_flow,
1743 								actions,
1744 								dev, error);
1745 			if (ret < 0)
1746 				return ret;
1747 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1748 			break;
1749 		default:
1750 			return rte_flow_error_set(error, ENOTSUP,
1751 						  RTE_FLOW_ERROR_TYPE_ACTION,
1752 						  actions,
1753 						  "action not supported");
1754 		}
1755 	}
1756 	dev_flow->act_flags = action_flags;
1757 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1758 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1759 
1760 		switch (items->type) {
1761 		case RTE_FLOW_ITEM_TYPE_VOID:
1762 			break;
1763 		case RTE_FLOW_ITEM_TYPE_ETH:
1764 			flow_verbs_translate_item_eth(dev_flow, items,
1765 						      item_flags);
1766 			subpriority = MLX5_PRIORITY_MAP_L2;
1767 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1768 					       MLX5_FLOW_LAYER_OUTER_L2;
1769 			break;
1770 		case RTE_FLOW_ITEM_TYPE_VLAN:
1771 			flow_verbs_translate_item_vlan(dev_flow, items,
1772 						       item_flags);
1773 			subpriority = MLX5_PRIORITY_MAP_L2;
1774 			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1775 						MLX5_FLOW_LAYER_INNER_VLAN) :
1776 					       (MLX5_FLOW_LAYER_OUTER_L2 |
1777 						MLX5_FLOW_LAYER_OUTER_VLAN);
1778 			break;
1779 		case RTE_FLOW_ITEM_TYPE_IPV4:
1780 			flow_verbs_translate_item_ipv4(dev_flow, items,
1781 						       item_flags);
1782 			subpriority = MLX5_PRIORITY_MAP_L3;
1783 			dev_flow->hash_fields |=
1784 				mlx5_flow_hashfields_adjust
1785 					(rss_desc, tunnel,
1786 					 MLX5_IPV4_LAYER_TYPES,
1787 					 MLX5_IPV4_IBV_RX_HASH);
1788 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1789 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1790 			break;
1791 		case RTE_FLOW_ITEM_TYPE_IPV6:
1792 			flow_verbs_translate_item_ipv6(dev_flow, items,
1793 						       item_flags);
1794 			subpriority = MLX5_PRIORITY_MAP_L3;
1795 			dev_flow->hash_fields |=
1796 				mlx5_flow_hashfields_adjust
1797 					(rss_desc, tunnel,
1798 					 MLX5_IPV6_LAYER_TYPES,
1799 					 MLX5_IPV6_IBV_RX_HASH);
1800 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1801 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1802 			break;
1803 		case RTE_FLOW_ITEM_TYPE_TCP:
1804 			flow_verbs_translate_item_tcp(dev_flow, items,
1805 						      item_flags);
1806 			subpriority = MLX5_PRIORITY_MAP_L4;
1807 			dev_flow->hash_fields |=
1808 				mlx5_flow_hashfields_adjust
1809 					(rss_desc, tunnel, ETH_RSS_TCP,
1810 					 (IBV_RX_HASH_SRC_PORT_TCP |
1811 					  IBV_RX_HASH_DST_PORT_TCP));
1812 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1813 					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
1814 			break;
1815 		case RTE_FLOW_ITEM_TYPE_UDP:
1816 			flow_verbs_translate_item_udp(dev_flow, items,
1817 						      item_flags);
1818 			subpriority = MLX5_PRIORITY_MAP_L4;
1819 			dev_flow->hash_fields |=
1820 				mlx5_flow_hashfields_adjust
1821 					(rss_desc, tunnel, ETH_RSS_UDP,
1822 					 (IBV_RX_HASH_SRC_PORT_UDP |
1823 					  IBV_RX_HASH_DST_PORT_UDP));
1824 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1825 					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
1826 			break;
1827 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1828 			flow_verbs_translate_item_vxlan(dev_flow, items,
1829 							item_flags);
1830 			subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1831 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
1832 			break;
1833 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1834 			flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1835 							    item_flags);
1836 			subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1837 			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1838 			break;
1839 		case RTE_FLOW_ITEM_TYPE_GRE:
1840 			flow_verbs_translate_item_gre(dev_flow, items,
1841 						      item_flags);
1842 			subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1843 			item_flags |= MLX5_FLOW_LAYER_GRE;
1844 			break;
1845 		case RTE_FLOW_ITEM_TYPE_MPLS:
1846 			flow_verbs_translate_item_mpls(dev_flow, items,
1847 						       item_flags);
1848 			subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1849 			item_flags |= MLX5_FLOW_LAYER_MPLS;
1850 			break;
1851 		default:
1852 			return rte_flow_error_set(error, ENOTSUP,
1853 						  RTE_FLOW_ERROR_TYPE_ITEM,
1854 						  NULL,
1855 						  "item not supported");
1856 		}
1857 	}
1858 	dev_flow->handle->layers = item_flags;
1859 	/* Other members of attr will be ignored. */
1860 	dev_flow->verbs.attr.priority =
1861 		mlx5_flow_adjust_priority(dev, priority, subpriority);
1862 	dev_flow->verbs.attr.port = (uint8_t)priv->dev_port;
1863 	return 0;
1864 }
1865 
1866 /**
1867  * Remove the flow from the NIC but keeps it in memory.
1868  *
1869  * @param[in] dev
1870  *   Pointer to the Ethernet device structure.
1871  * @param[in, out] flow
1872  *   Pointer to flow structure.
1873  */
1874 static void
1875 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1876 {
1877 	struct mlx5_priv *priv = dev->data->dev_private;
1878 	struct mlx5_flow_handle *handle;
1879 	uint32_t handle_idx;
1880 
1881 	if (!flow)
1882 		return;
1883 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1884 		       handle_idx, handle, next) {
1885 		if (handle->drv_flow) {
1886 			claim_zero(mlx5_glue->destroy_flow(handle->drv_flow));
1887 			handle->drv_flow = NULL;
1888 		}
1889 		/* hrxq is union, don't touch it only the flag is set. */
1890 		if (handle->rix_hrxq) {
1891 			if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1892 				mlx5_drop_action_destroy(dev);
1893 				handle->rix_hrxq = 0;
1894 			} else if (handle->fate_action ==
1895 				   MLX5_FLOW_FATE_QUEUE) {
1896 				mlx5_hrxq_release(dev, handle->rix_hrxq);
1897 				handle->rix_hrxq = 0;
1898 			}
1899 		}
1900 		if (handle->vf_vlan.tag && handle->vf_vlan.created)
1901 			mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1902 	}
1903 }
1904 
1905 /**
1906  * Remove the flow from the NIC and the memory.
1907  *
1908  * @param[in] dev
1909  *   Pointer to the Ethernet device structure.
1910  * @param[in, out] flow
1911  *   Pointer to flow structure.
1912  */
1913 static void
1914 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1915 {
1916 	struct mlx5_priv *priv = dev->data->dev_private;
1917 	struct mlx5_flow_handle *handle;
1918 
1919 	if (!flow)
1920 		return;
1921 	flow_verbs_remove(dev, flow);
1922 	while (flow->dev_handles) {
1923 		uint32_t tmp_idx = flow->dev_handles;
1924 
1925 		handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1926 				   tmp_idx);
1927 		if (!handle)
1928 			return;
1929 		flow->dev_handles = handle->next.next;
1930 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1931 			   tmp_idx);
1932 	}
1933 	if (flow->counter) {
1934 		flow_verbs_counter_release(dev, flow->counter);
1935 		flow->counter = 0;
1936 	}
1937 }
1938 
1939 /**
1940  * Apply the flow to the NIC.
1941  *
1942  * @param[in] dev
1943  *   Pointer to the Ethernet device structure.
1944  * @param[in, out] flow
1945  *   Pointer to flow structure.
1946  * @param[out] error
1947  *   Pointer to error structure.
1948  *
1949  * @return
1950  *   0 on success, a negative errno value otherwise and rte_errno is set.
1951  */
1952 static int
1953 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1954 		 struct rte_flow_error *error)
1955 {
1956 	struct mlx5_priv *priv = dev->data->dev_private;
1957 	struct mlx5_flow_handle *handle;
1958 	struct mlx5_flow *dev_flow;
1959 	struct mlx5_hrxq *hrxq;
1960 	uint32_t dev_handles;
1961 	int err;
1962 	int idx;
1963 
1964 	for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1965 		dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1966 		handle = dev_flow->handle;
1967 		if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1968 			hrxq = mlx5_drop_action_create(dev);
1969 			if (!hrxq) {
1970 				rte_flow_error_set
1971 					(error, errno,
1972 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1973 					 "cannot get drop hash queue");
1974 				goto error;
1975 			}
1976 		} else {
1977 			uint32_t hrxq_idx;
1978 			struct mlx5_flow_rss_desc *rss_desc =
1979 				&((struct mlx5_flow_rss_desc *)priv->rss_desc)
1980 				[!!priv->flow_nested_idx];
1981 
1982 			MLX5_ASSERT(rss_desc->queue_num);
1983 			hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1984 						 MLX5_RSS_HASH_KEY_LEN,
1985 						 dev_flow->hash_fields,
1986 						 rss_desc->queue,
1987 						 rss_desc->queue_num);
1988 			if (!hrxq_idx)
1989 				hrxq_idx = mlx5_hrxq_new
1990 						(dev, rss_desc->key,
1991 						 MLX5_RSS_HASH_KEY_LEN,
1992 						 dev_flow->hash_fields,
1993 						 rss_desc->queue,
1994 						 rss_desc->queue_num,
1995 						 !!(handle->layers &
1996 						 MLX5_FLOW_LAYER_TUNNEL));
1997 			hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1998 					      hrxq_idx);
1999 			if (!hrxq) {
2000 				rte_flow_error_set
2001 					(error, rte_errno,
2002 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2003 					 "cannot get hash queue");
2004 				goto error;
2005 			}
2006 			handle->rix_hrxq = hrxq_idx;
2007 		}
2008 		MLX5_ASSERT(hrxq);
2009 		handle->drv_flow = mlx5_glue->create_flow
2010 					(hrxq->qp, &dev_flow->verbs.attr);
2011 		if (!handle->drv_flow) {
2012 			rte_flow_error_set(error, errno,
2013 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2014 					   NULL,
2015 					   "hardware refuses to create flow");
2016 			goto error;
2017 		}
2018 		if (priv->vmwa_context &&
2019 		    handle->vf_vlan.tag && !handle->vf_vlan.created) {
2020 			/*
2021 			 * The rule contains the VLAN pattern.
2022 			 * For VF we are going to create VLAN
2023 			 * interface to make hypervisor set correct
2024 			 * e-Switch vport context.
2025 			 */
2026 			mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
2027 		}
2028 	}
2029 	return 0;
2030 error:
2031 	err = rte_errno; /* Save rte_errno before cleanup. */
2032 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2033 		       dev_handles, handle, next) {
2034 		/* hrxq is union, don't touch it only the flag is set. */
2035 		if (handle->rix_hrxq) {
2036 			if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
2037 				mlx5_drop_action_destroy(dev);
2038 				handle->rix_hrxq = 0;
2039 			} else if (handle->fate_action ==
2040 				   MLX5_FLOW_FATE_QUEUE) {
2041 				mlx5_hrxq_release(dev, handle->rix_hrxq);
2042 				handle->rix_hrxq = 0;
2043 			}
2044 		}
2045 		if (handle->vf_vlan.tag && handle->vf_vlan.created)
2046 			mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
2047 	}
2048 	rte_errno = err; /* Restore rte_errno. */
2049 	return -rte_errno;
2050 }
2051 
2052 /**
2053  * Query a flow.
2054  *
2055  * @see rte_flow_query()
2056  * @see rte_flow_ops
2057  */
2058 static int
2059 flow_verbs_query(struct rte_eth_dev *dev,
2060 		 struct rte_flow *flow,
2061 		 const struct rte_flow_action *actions,
2062 		 void *data,
2063 		 struct rte_flow_error *error)
2064 {
2065 	int ret = -EINVAL;
2066 
2067 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2068 		switch (actions->type) {
2069 		case RTE_FLOW_ACTION_TYPE_VOID:
2070 			break;
2071 		case RTE_FLOW_ACTION_TYPE_COUNT:
2072 			ret = flow_verbs_counter_query(dev, flow, data, error);
2073 			break;
2074 		default:
2075 			return rte_flow_error_set(error, ENOTSUP,
2076 						  RTE_FLOW_ERROR_TYPE_ACTION,
2077 						  actions,
2078 						  "action not supported");
2079 		}
2080 	}
2081 	return ret;
2082 }
2083 
2084 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
2085 	.validate = flow_verbs_validate,
2086 	.prepare = flow_verbs_prepare,
2087 	.translate = flow_verbs_translate,
2088 	.apply = flow_verbs_apply,
2089 	.remove = flow_verbs_remove,
2090 	.destroy = flow_verbs_destroy,
2091 	.query = flow_verbs_query,
2092 };
2093