xref: /dpdk/drivers/net/mlx5/mlx5_flow_verbs.c (revision 200bc52e5aa0d72e70464c9cd22b55cf536ed13c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4 
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20 
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28 
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_glue.h"
33 #include "mlx5_prm.h"
34 #include "mlx5_rxtx.h"
35 
36 #define VERBS_SPEC_INNER(item_flags) \
37 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
38 
39 /**
40  * Create Verbs flow counter with Verbs library.
41  *
42  * @param[in] dev
43  *   Pointer to the Ethernet device structure.
44  * @param[in, out] counter
45  *   mlx5 flow counter object, contains the counter id,
46  *   handle of created Verbs flow counter is returned
47  *   in cs field (if counters are supported).
48  *
49  * @return
50  *   0 On success else a negative errno value is returned
51  *   and rte_errno is set.
52  */
53 static int
54 flow_verbs_counter_create(struct rte_eth_dev *dev,
55 			  struct mlx5_flow_counter *counter)
56 {
57 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
58 	struct mlx5_priv *priv = dev->data->dev_private;
59 	struct ibv_context *ctx = priv->sh->ctx;
60 	struct ibv_counter_set_init_attr init = {
61 			 .counter_set_id = counter->id};
62 
63 	counter->cs = mlx5_glue->create_counter_set(ctx, &init);
64 	if (!counter->cs) {
65 		rte_errno = ENOTSUP;
66 		return -ENOTSUP;
67 	}
68 	return 0;
69 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
70 	struct mlx5_priv *priv = dev->data->dev_private;
71 	struct ibv_context *ctx = priv->sh->ctx;
72 	struct ibv_counters_init_attr init = {0};
73 	struct ibv_counter_attach_attr attach;
74 	int ret;
75 
76 	memset(&attach, 0, sizeof(attach));
77 	counter->cs = mlx5_glue->create_counters(ctx, &init);
78 	if (!counter->cs) {
79 		rte_errno = ENOTSUP;
80 		return -ENOTSUP;
81 	}
82 	attach.counter_desc = IBV_COUNTER_PACKETS;
83 	attach.index = 0;
84 	ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
85 	if (!ret) {
86 		attach.counter_desc = IBV_COUNTER_BYTES;
87 		attach.index = 1;
88 		ret = mlx5_glue->attach_counters
89 					(counter->cs, &attach, NULL);
90 	}
91 	if (ret) {
92 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
93 		counter->cs = NULL;
94 		rte_errno = ret;
95 		return -ret;
96 	}
97 	return 0;
98 #else
99 	(void)dev;
100 	(void)counter;
101 	rte_errno = ENOTSUP;
102 	return -ENOTSUP;
103 #endif
104 }
105 
106 /**
107  * Get a flow counter.
108  *
109  * @param[in] dev
110  *   Pointer to the Ethernet device structure.
111  * @param[in] shared
112  *   Indicate if this counter is shared with other flows.
113  * @param[in] id
114  *   Counter identifier.
115  *
116  * @return
117  *   A pointer to the counter, NULL otherwise and rte_errno is set.
118  */
119 static struct mlx5_flow_counter *
120 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
121 {
122 	struct mlx5_priv *priv = dev->data->dev_private;
123 	struct mlx5_flow_counter *cnt;
124 	int ret;
125 
126 	if (shared) {
127 		LIST_FOREACH(cnt, &priv->flow_counters, next) {
128 			if (cnt->shared && cnt->id == id) {
129 				cnt->ref_cnt++;
130 				return cnt;
131 			}
132 		}
133 	}
134 	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
135 	if (!cnt) {
136 		rte_errno = ENOMEM;
137 		return NULL;
138 	}
139 	cnt->id = id;
140 	cnt->shared = shared;
141 	cnt->ref_cnt = 1;
142 	cnt->hits = 0;
143 	cnt->bytes = 0;
144 	/* Create counter with Verbs. */
145 	ret = flow_verbs_counter_create(dev, cnt);
146 	if (!ret) {
147 		LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
148 		return cnt;
149 	}
150 	/* Some error occurred in Verbs library. */
151 	rte_free(cnt);
152 	rte_errno = -ret;
153 	return NULL;
154 }
155 
156 /**
157  * Release a flow counter.
158  *
159  * @param[in] counter
160  *   Pointer to the counter handler.
161  */
162 static void
163 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
164 {
165 	if (--counter->ref_cnt == 0) {
166 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
167 		claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
168 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
169 		claim_zero(mlx5_glue->destroy_counters(counter->cs));
170 #endif
171 		LIST_REMOVE(counter, next);
172 		rte_free(counter);
173 	}
174 }
175 
176 /**
177  * Query a flow counter via Verbs library call.
178  *
179  * @see rte_flow_query()
180  * @see rte_flow_ops
181  */
182 static int
183 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
184 			 struct rte_flow *flow, void *data,
185 			 struct rte_flow_error *error)
186 {
187 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
188 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
189 	if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
190 		struct rte_flow_query_count *qc = data;
191 		uint64_t counters[2] = {0, 0};
192 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
193 		struct ibv_query_counter_set_attr query_cs_attr = {
194 			.cs = flow->counter->cs,
195 			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
196 		};
197 		struct ibv_counter_set_data query_out = {
198 			.out = counters,
199 			.outlen = 2 * sizeof(uint64_t),
200 		};
201 		int err = mlx5_glue->query_counter_set(&query_cs_attr,
202 						       &query_out);
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204 		int err = mlx5_glue->query_counters
205 			       (flow->counter->cs, counters,
206 				RTE_DIM(counters),
207 				IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
208 #endif
209 		if (err)
210 			return rte_flow_error_set
211 				(error, err,
212 				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
213 				 NULL,
214 				 "cannot read counter");
215 		qc->hits_set = 1;
216 		qc->bytes_set = 1;
217 		qc->hits = counters[0] - flow->counter->hits;
218 		qc->bytes = counters[1] - flow->counter->bytes;
219 		if (qc->reset) {
220 			flow->counter->hits = counters[0];
221 			flow->counter->bytes = counters[1];
222 		}
223 		return 0;
224 	}
225 	return rte_flow_error_set(error, EINVAL,
226 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
227 				  NULL,
228 				  "flow does not have counter");
229 #else
230 	(void)flow;
231 	(void)data;
232 	return rte_flow_error_set(error, ENOTSUP,
233 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
234 				  NULL,
235 				  "counters are not available");
236 #endif
237 }
238 
239 /**
240  * Add a verbs item specification into @p verbs.
241  *
242  * @param[out] verbs
243  *   Pointer to verbs structure.
244  * @param[in] src
245  *   Create specification.
246  * @param[in] size
247  *   Size in bytes of the specification to copy.
248  */
249 static void
250 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
251 {
252 	void *dst;
253 
254 	if (!verbs)
255 		return;
256 	assert(verbs->specs);
257 	dst = (void *)(verbs->specs + verbs->size);
258 	memcpy(dst, src, size);
259 	++verbs->attr->num_of_specs;
260 	verbs->size += size;
261 }
262 
263 /**
264  * Convert the @p item into a Verbs specification. This function assumes that
265  * the input is valid and that there is space to insert the requested item
266  * into the flow.
267  *
268  * @param[in, out] dev_flow
269  *   Pointer to dev_flow structure.
270  * @param[in] item
271  *   Item specification.
272  * @param[in] item_flags
273  *   Parsed item flags.
274  */
275 static void
276 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
277 			      const struct rte_flow_item *item,
278 			      uint64_t item_flags)
279 {
280 	const struct rte_flow_item_eth *spec = item->spec;
281 	const struct rte_flow_item_eth *mask = item->mask;
282 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
283 	struct ibv_flow_spec_eth eth = {
284 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
285 		.size = size,
286 	};
287 
288 	if (!mask)
289 		mask = &rte_flow_item_eth_mask;
290 	if (spec) {
291 		unsigned int i;
292 
293 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
294 			RTE_ETHER_ADDR_LEN);
295 		memcpy(&eth.val.src_mac, spec->src.addr_bytes,
296 			RTE_ETHER_ADDR_LEN);
297 		eth.val.ether_type = spec->type;
298 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
299 			RTE_ETHER_ADDR_LEN);
300 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
301 			RTE_ETHER_ADDR_LEN);
302 		eth.mask.ether_type = mask->type;
303 		/* Remove unwanted bits from values. */
304 		for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
305 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
306 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
307 		}
308 		eth.val.ether_type &= eth.mask.ether_type;
309 	}
310 	flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
311 }
312 
313 /**
314  * Update the VLAN tag in the Verbs Ethernet specification.
315  * This function assumes that the input is valid and there is space to add
316  * the requested item.
317  *
318  * @param[in, out] attr
319  *   Pointer to Verbs attributes structure.
320  * @param[in] eth
321  *   Verbs structure containing the VLAN information to copy.
322  */
323 static void
324 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
325 			    struct ibv_flow_spec_eth *eth)
326 {
327 	unsigned int i;
328 	const enum ibv_flow_spec_type search = eth->type;
329 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
330 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
331 
332 	for (i = 0; i != attr->num_of_specs; ++i) {
333 		if (hdr->type == search) {
334 			struct ibv_flow_spec_eth *e =
335 				(struct ibv_flow_spec_eth *)hdr;
336 
337 			e->val.vlan_tag = eth->val.vlan_tag;
338 			e->mask.vlan_tag = eth->mask.vlan_tag;
339 			e->val.ether_type = eth->val.ether_type;
340 			e->mask.ether_type = eth->mask.ether_type;
341 			break;
342 		}
343 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
344 	}
345 }
346 
347 /**
348  * Convert the @p item into a Verbs specification. This function assumes that
349  * the input is valid and that there is space to insert the requested item
350  * into the flow.
351  *
352  * @param[in, out] dev_flow
353  *   Pointer to dev_flow structure.
354  * @param[in] item
355  *   Item specification.
356  * @param[in] item_flags
357  *   Parsed item flags.
358  */
359 static void
360 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
361 			       const struct rte_flow_item *item,
362 			       uint64_t item_flags)
363 {
364 	const struct rte_flow_item_vlan *spec = item->spec;
365 	const struct rte_flow_item_vlan *mask = item->mask;
366 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
367 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
368 	struct ibv_flow_spec_eth eth = {
369 		.type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
370 		.size = size,
371 	};
372 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
373 				      MLX5_FLOW_LAYER_OUTER_L2;
374 
375 	if (!mask)
376 		mask = &rte_flow_item_vlan_mask;
377 	if (spec) {
378 		eth.val.vlan_tag = spec->tci;
379 		eth.mask.vlan_tag = mask->tci;
380 		eth.val.vlan_tag &= eth.mask.vlan_tag;
381 		eth.val.ether_type = spec->inner_type;
382 		eth.mask.ether_type = mask->inner_type;
383 		eth.val.ether_type &= eth.mask.ether_type;
384 	}
385 	if (!(item_flags & l2m))
386 		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
387 	else
388 		flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
389 }
390 
391 /**
392  * Convert the @p item into a Verbs specification. This function assumes that
393  * the input is valid and that there is space to insert the requested item
394  * into the flow.
395  *
396  * @param[in, out] dev_flow
397  *   Pointer to dev_flow structure.
398  * @param[in] item
399  *   Item specification.
400  * @param[in] item_flags
401  *   Parsed item flags.
402  */
403 static void
404 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
405 			       const struct rte_flow_item *item,
406 			       uint64_t item_flags)
407 {
408 	const struct rte_flow_item_ipv4 *spec = item->spec;
409 	const struct rte_flow_item_ipv4 *mask = item->mask;
410 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
411 	struct ibv_flow_spec_ipv4_ext ipv4 = {
412 		.type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
413 		.size = size,
414 	};
415 
416 	if (!mask)
417 		mask = &rte_flow_item_ipv4_mask;
418 	if (spec) {
419 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
420 			.src_ip = spec->hdr.src_addr,
421 			.dst_ip = spec->hdr.dst_addr,
422 			.proto = spec->hdr.next_proto_id,
423 			.tos = spec->hdr.type_of_service,
424 		};
425 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
426 			.src_ip = mask->hdr.src_addr,
427 			.dst_ip = mask->hdr.dst_addr,
428 			.proto = mask->hdr.next_proto_id,
429 			.tos = mask->hdr.type_of_service,
430 		};
431 		/* Remove unwanted bits from values. */
432 		ipv4.val.src_ip &= ipv4.mask.src_ip;
433 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
434 		ipv4.val.proto &= ipv4.mask.proto;
435 		ipv4.val.tos &= ipv4.mask.tos;
436 	}
437 	flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
438 }
439 
440 /**
441  * Convert the @p item into a Verbs specification. This function assumes that
442  * the input is valid and that there is space to insert the requested item
443  * into the flow.
444  *
445  * @param[in, out] dev_flow
446  *   Pointer to dev_flow structure.
447  * @param[in] item
448  *   Item specification.
449  * @param[in] item_flags
450  *   Parsed item flags.
451  */
452 static void
453 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
454 			       const struct rte_flow_item *item,
455 			       uint64_t item_flags)
456 {
457 	const struct rte_flow_item_ipv6 *spec = item->spec;
458 	const struct rte_flow_item_ipv6 *mask = item->mask;
459 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
460 	struct ibv_flow_spec_ipv6 ipv6 = {
461 		.type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
462 		.size = size,
463 	};
464 
465 	if (!mask)
466 		mask = &rte_flow_item_ipv6_mask;
467 	if (spec) {
468 		unsigned int i;
469 		uint32_t vtc_flow_val;
470 		uint32_t vtc_flow_mask;
471 
472 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
473 		       RTE_DIM(ipv6.val.src_ip));
474 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
475 		       RTE_DIM(ipv6.val.dst_ip));
476 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
477 		       RTE_DIM(ipv6.mask.src_ip));
478 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
479 		       RTE_DIM(ipv6.mask.dst_ip));
480 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
481 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
482 		ipv6.val.flow_label =
483 			rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
484 					 RTE_IPV6_HDR_FL_SHIFT);
485 		ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
486 					 RTE_IPV6_HDR_TC_SHIFT;
487 		ipv6.val.next_hdr = spec->hdr.proto;
488 		ipv6.val.hop_limit = spec->hdr.hop_limits;
489 		ipv6.mask.flow_label =
490 			rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
491 					 RTE_IPV6_HDR_FL_SHIFT);
492 		ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
493 					  RTE_IPV6_HDR_TC_SHIFT;
494 		ipv6.mask.next_hdr = mask->hdr.proto;
495 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
496 		/* Remove unwanted bits from values. */
497 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
498 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
499 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
500 		}
501 		ipv6.val.flow_label &= ipv6.mask.flow_label;
502 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
503 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
504 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
505 	}
506 	flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
507 }
508 
509 /**
510  * Convert the @p item into a Verbs specification. This function assumes that
511  * the input is valid and that there is space to insert the requested item
512  * into the flow.
513  *
514  * @param[in, out] dev_flow
515  *   Pointer to dev_flow structure.
516  * @param[in] item
517  *   Item specification.
518  * @param[in] item_flags
519  *   Parsed item flags.
520  */
521 static void
522 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
523 			      const struct rte_flow_item *item,
524 			      uint64_t item_flags __rte_unused)
525 {
526 	const struct rte_flow_item_tcp *spec = item->spec;
527 	const struct rte_flow_item_tcp *mask = item->mask;
528 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
529 	struct ibv_flow_spec_tcp_udp tcp = {
530 		.type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
531 		.size = size,
532 	};
533 
534 	if (!mask)
535 		mask = &rte_flow_item_tcp_mask;
536 	if (spec) {
537 		tcp.val.dst_port = spec->hdr.dst_port;
538 		tcp.val.src_port = spec->hdr.src_port;
539 		tcp.mask.dst_port = mask->hdr.dst_port;
540 		tcp.mask.src_port = mask->hdr.src_port;
541 		/* Remove unwanted bits from values. */
542 		tcp.val.src_port &= tcp.mask.src_port;
543 		tcp.val.dst_port &= tcp.mask.dst_port;
544 	}
545 	flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
546 }
547 
548 /**
549  * Convert the @p item into a Verbs specification. This function assumes that
550  * the input is valid and that there is space to insert the requested item
551  * into the flow.
552  *
553  * @param[in, out] dev_flow
554  *   Pointer to dev_flow structure.
555  * @param[in] item
556  *   Item specification.
557  * @param[in] item_flags
558  *   Parsed item flags.
559  */
560 static void
561 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
562 			      const struct rte_flow_item *item,
563 			      uint64_t item_flags __rte_unused)
564 {
565 	const struct rte_flow_item_udp *spec = item->spec;
566 	const struct rte_flow_item_udp *mask = item->mask;
567 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
568 	struct ibv_flow_spec_tcp_udp udp = {
569 		.type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
570 		.size = size,
571 	};
572 
573 	if (!mask)
574 		mask = &rte_flow_item_udp_mask;
575 	if (spec) {
576 		udp.val.dst_port = spec->hdr.dst_port;
577 		udp.val.src_port = spec->hdr.src_port;
578 		udp.mask.dst_port = mask->hdr.dst_port;
579 		udp.mask.src_port = mask->hdr.src_port;
580 		/* Remove unwanted bits from values. */
581 		udp.val.src_port &= udp.mask.src_port;
582 		udp.val.dst_port &= udp.mask.dst_port;
583 	}
584 	flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
585 }
586 
587 /**
588  * Convert the @p item into a Verbs specification. This function assumes that
589  * the input is valid and that there is space to insert the requested item
590  * into the flow.
591  *
592  * @param[in, out] dev_flow
593  *   Pointer to dev_flow structure.
594  * @param[in] item
595  *   Item specification.
596  * @param[in] item_flags
597  *   Parsed item flags.
598  */
599 static void
600 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
601 				const struct rte_flow_item *item,
602 				uint64_t item_flags __rte_unused)
603 {
604 	const struct rte_flow_item_vxlan *spec = item->spec;
605 	const struct rte_flow_item_vxlan *mask = item->mask;
606 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
607 	struct ibv_flow_spec_tunnel vxlan = {
608 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
609 		.size = size,
610 	};
611 	union vni {
612 		uint32_t vlan_id;
613 		uint8_t vni[4];
614 	} id = { .vlan_id = 0, };
615 
616 	if (!mask)
617 		mask = &rte_flow_item_vxlan_mask;
618 	if (spec) {
619 		memcpy(&id.vni[1], spec->vni, 3);
620 		vxlan.val.tunnel_id = id.vlan_id;
621 		memcpy(&id.vni[1], mask->vni, 3);
622 		vxlan.mask.tunnel_id = id.vlan_id;
623 		/* Remove unwanted bits from values. */
624 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
625 	}
626 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
627 }
628 
629 /**
630  * Convert the @p item into a Verbs specification. This function assumes that
631  * the input is valid and that there is space to insert the requested item
632  * into the flow.
633  *
634  * @param[in, out] dev_flow
635  *   Pointer to dev_flow structure.
636  * @param[in] item
637  *   Item specification.
638  * @param[in] item_flags
639  *   Parsed item flags.
640  */
641 static void
642 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
643 				    const struct rte_flow_item *item,
644 				    uint64_t item_flags __rte_unused)
645 {
646 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
647 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
648 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
649 	struct ibv_flow_spec_tunnel vxlan_gpe = {
650 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
651 		.size = size,
652 	};
653 	union vni {
654 		uint32_t vlan_id;
655 		uint8_t vni[4];
656 	} id = { .vlan_id = 0, };
657 
658 	if (!mask)
659 		mask = &rte_flow_item_vxlan_gpe_mask;
660 	if (spec) {
661 		memcpy(&id.vni[1], spec->vni, 3);
662 		vxlan_gpe.val.tunnel_id = id.vlan_id;
663 		memcpy(&id.vni[1], mask->vni, 3);
664 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
665 		/* Remove unwanted bits from values. */
666 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
667 	}
668 	flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
669 }
670 
671 /**
672  * Update the protocol in Verbs IPv4/IPv6 spec.
673  *
674  * @param[in, out] attr
675  *   Pointer to Verbs attributes structure.
676  * @param[in] search
677  *   Specification type to search in order to update the IP protocol.
678  * @param[in] protocol
679  *   Protocol value to set if none is present in the specification.
680  */
681 static void
682 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
683 				       enum ibv_flow_spec_type search,
684 				       uint8_t protocol)
685 {
686 	unsigned int i;
687 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
688 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
689 
690 	if (!attr)
691 		return;
692 	for (i = 0; i != attr->num_of_specs; ++i) {
693 		if (hdr->type == search) {
694 			union {
695 				struct ibv_flow_spec_ipv4_ext *ipv4;
696 				struct ibv_flow_spec_ipv6 *ipv6;
697 			} ip;
698 
699 			switch (search) {
700 			case IBV_FLOW_SPEC_IPV4_EXT:
701 				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
702 				if (!ip.ipv4->val.proto) {
703 					ip.ipv4->val.proto = protocol;
704 					ip.ipv4->mask.proto = 0xff;
705 				}
706 				break;
707 			case IBV_FLOW_SPEC_IPV6:
708 				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
709 				if (!ip.ipv6->val.next_hdr) {
710 					ip.ipv6->val.next_hdr = protocol;
711 					ip.ipv6->mask.next_hdr = 0xff;
712 				}
713 				break;
714 			default:
715 				break;
716 			}
717 			break;
718 		}
719 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
720 	}
721 }
722 
723 /**
724  * Convert the @p item into a Verbs specification. This function assumes that
725  * the input is valid and that there is space to insert the requested item
726  * into the flow.
727  *
728  * @param[in, out] dev_flow
729  *   Pointer to dev_flow structure.
730  * @param[in] item
731  *   Item specification.
732  * @param[in] item_flags
733  *   Parsed item flags.
734  */
735 static void
736 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
737 			      const struct rte_flow_item *item __rte_unused,
738 			      uint64_t item_flags)
739 {
740 	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
741 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
742 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
743 	struct ibv_flow_spec_tunnel tunnel = {
744 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
745 		.size = size,
746 	};
747 #else
748 	const struct rte_flow_item_gre *spec = item->spec;
749 	const struct rte_flow_item_gre *mask = item->mask;
750 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
751 	struct ibv_flow_spec_gre tunnel = {
752 		.type = IBV_FLOW_SPEC_GRE,
753 		.size = size,
754 	};
755 
756 	if (!mask)
757 		mask = &rte_flow_item_gre_mask;
758 	if (spec) {
759 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
760 		tunnel.val.protocol = spec->protocol;
761 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
762 		tunnel.mask.protocol = mask->protocol;
763 		/* Remove unwanted bits from values. */
764 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
765 		tunnel.val.protocol &= tunnel.mask.protocol;
766 		tunnel.val.key &= tunnel.mask.key;
767 	}
768 #endif
769 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
770 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
771 						       IBV_FLOW_SPEC_IPV4_EXT,
772 						       IPPROTO_GRE);
773 	else
774 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
775 						       IBV_FLOW_SPEC_IPV6,
776 						       IPPROTO_GRE);
777 	flow_verbs_spec_add(verbs, &tunnel, size);
778 }
779 
780 /**
781  * Convert the @p action into a Verbs specification. This function assumes that
782  * the input is valid and that there is space to insert the requested action
783  * into the flow. This function also return the action that was added.
784  *
785  * @param[in, out] dev_flow
786  *   Pointer to dev_flow structure.
787  * @param[in] item
788  *   Item specification.
789  * @param[in] item_flags
790  *   Parsed item flags.
791  */
792 static void
793 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
794 			       const struct rte_flow_item *item __rte_unused,
795 			       uint64_t item_flags __rte_unused)
796 {
797 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
798 	const struct rte_flow_item_mpls *spec = item->spec;
799 	const struct rte_flow_item_mpls *mask = item->mask;
800 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
801 	struct ibv_flow_spec_mpls mpls = {
802 		.type = IBV_FLOW_SPEC_MPLS,
803 		.size = size,
804 	};
805 
806 	if (!mask)
807 		mask = &rte_flow_item_mpls_mask;
808 	if (spec) {
809 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
810 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
811 		/* Remove unwanted bits from values.  */
812 		mpls.val.label &= mpls.mask.label;
813 	}
814 	flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
815 #endif
816 }
817 
818 /**
819  * Convert the @p action into a Verbs specification. This function assumes that
820  * the input is valid and that there is space to insert the requested action
821  * into the flow.
822  *
823  * @param[in] dev_flow
824  *   Pointer to mlx5_flow.
825  * @param[in] action
826  *   Action configuration.
827  */
828 static void
829 flow_verbs_translate_action_drop
830 	(struct mlx5_flow *dev_flow,
831 	 const struct rte_flow_action *action __rte_unused)
832 {
833 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
834 	struct ibv_flow_spec_action_drop drop = {
835 			.type = IBV_FLOW_SPEC_ACTION_DROP,
836 			.size = size,
837 	};
838 
839 	flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
840 }
841 
842 /**
843  * Convert the @p action into a Verbs specification. This function assumes that
844  * the input is valid and that there is space to insert the requested action
845  * into the flow.
846  *
847  * @param[in] dev_flow
848  *   Pointer to mlx5_flow.
849  * @param[in] action
850  *   Action configuration.
851  */
852 static void
853 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
854 				  const struct rte_flow_action *action)
855 {
856 	const struct rte_flow_action_queue *queue = action->conf;
857 	struct rte_flow *flow = dev_flow->flow;
858 
859 	if (flow->queue)
860 		(*flow->queue)[0] = queue->index;
861 	flow->rss.queue_num = 1;
862 }
863 
864 /**
865  * Convert the @p action into a Verbs specification. This function assumes that
866  * the input is valid and that there is space to insert the requested action
867  * into the flow.
868  *
869  * @param[in] action
870  *   Action configuration.
871  * @param[in, out] action_flags
872  *   Pointer to the detected actions.
873  * @param[in] dev_flow
874  *   Pointer to mlx5_flow.
875  */
876 static void
877 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
878 				const struct rte_flow_action *action)
879 {
880 	const struct rte_flow_action_rss *rss = action->conf;
881 	const uint8_t *rss_key;
882 	struct rte_flow *flow = dev_flow->flow;
883 
884 	if (flow->queue)
885 		memcpy((*flow->queue), rss->queue,
886 		       rss->queue_num * sizeof(uint16_t));
887 	flow->rss.queue_num = rss->queue_num;
888 	/* NULL RSS key indicates default RSS key. */
889 	rss_key = !rss->key ? rss_hash_default_key : rss->key;
890 	memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
891 	/* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
892 	flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
893 	flow->rss.level = rss->level;
894 }
895 
896 /**
897  * Convert the @p action into a Verbs specification. This function assumes that
898  * the input is valid and that there is space to insert the requested action
899  * into the flow.
900  *
901  * @param[in] dev_flow
902  *   Pointer to mlx5_flow.
903  * @param[in] action
904  *   Action configuration.
905  */
906 static void
907 flow_verbs_translate_action_flag
908 	(struct mlx5_flow *dev_flow,
909 	 const struct rte_flow_action *action __rte_unused)
910 {
911 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
912 	struct ibv_flow_spec_action_tag tag = {
913 		.type = IBV_FLOW_SPEC_ACTION_TAG,
914 		.size = size,
915 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
916 	};
917 
918 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
919 }
920 
921 /**
922  * Convert the @p action into a Verbs specification. This function assumes that
923  * the input is valid and that there is space to insert the requested action
924  * into the flow.
925  *
926  * @param[in] dev_flow
927  *   Pointer to mlx5_flow.
928  * @param[in] action
929  *   Action configuration.
930  */
931 static void
932 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
933 				 const struct rte_flow_action *action)
934 {
935 	const struct rte_flow_action_mark *mark = action->conf;
936 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
937 	struct ibv_flow_spec_action_tag tag = {
938 		.type = IBV_FLOW_SPEC_ACTION_TAG,
939 		.size = size,
940 		.tag_id = mlx5_flow_mark_set(mark->id),
941 	};
942 
943 	flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
944 }
945 
946 /**
947  * Convert the @p action into a Verbs specification. This function assumes that
948  * the input is valid and that there is space to insert the requested action
949  * into the flow.
950  *
951  * @param[in] dev
952  *   Pointer to the Ethernet device structure.
953  * @param[in] action
954  *   Action configuration.
955  * @param[in] dev_flow
956  *   Pointer to mlx5_flow.
957  * @param[out] error
958  *   Pointer to error structure.
959  *
960  * @return
961  *   0 On success else a negative errno value is returned and rte_errno is set.
962  */
963 static int
964 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
965 				  const struct rte_flow_action *action,
966 				  struct rte_eth_dev *dev,
967 				  struct rte_flow_error *error)
968 {
969 	const struct rte_flow_action_count *count = action->conf;
970 	struct rte_flow *flow = dev_flow->flow;
971 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
972 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
973 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
974 	struct ibv_flow_spec_counter_action counter = {
975 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
976 		.size = size,
977 	};
978 #endif
979 
980 	if (!flow->counter) {
981 		flow->counter = flow_verbs_counter_new(dev, count->shared,
982 						       count->id);
983 		if (!flow->counter)
984 			return rte_flow_error_set(error, rte_errno,
985 						  RTE_FLOW_ERROR_TYPE_ACTION,
986 						  action,
987 						  "cannot get counter"
988 						  " context.");
989 	}
990 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
991 	counter.counter_set_handle = flow->counter->cs->handle;
992 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
993 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
994 	counter.counters = flow->counter->cs;
995 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
996 #endif
997 	return 0;
998 }
999 
1000 /**
1001  * Internal validation function. For validating both actions and items.
1002  *
1003  * @param[in] dev
1004  *   Pointer to the Ethernet device structure.
1005  * @param[in] attr
1006  *   Pointer to the flow attributes.
1007  * @param[in] items
1008  *   Pointer to the list of items.
1009  * @param[in] actions
1010  *   Pointer to the list of actions.
1011  * @param[out] error
1012  *   Pointer to the error structure.
1013  *
1014  * @return
1015  *   0 on success, a negative errno value otherwise and rte_errno is set.
1016  */
1017 static int
1018 flow_verbs_validate(struct rte_eth_dev *dev,
1019 		    const struct rte_flow_attr *attr,
1020 		    const struct rte_flow_item items[],
1021 		    const struct rte_flow_action actions[],
1022 		    struct rte_flow_error *error)
1023 {
1024 	int ret;
1025 	uint64_t action_flags = 0;
1026 	uint64_t item_flags = 0;
1027 	uint64_t last_item = 0;
1028 	uint8_t next_protocol = 0xff;
1029 
1030 	if (items == NULL)
1031 		return -1;
1032 	ret = mlx5_flow_validate_attributes(dev, attr, error);
1033 	if (ret < 0)
1034 		return ret;
1035 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1036 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1037 		int ret = 0;
1038 
1039 		switch (items->type) {
1040 		case RTE_FLOW_ITEM_TYPE_VOID:
1041 			break;
1042 		case RTE_FLOW_ITEM_TYPE_ETH:
1043 			ret = mlx5_flow_validate_item_eth(items, item_flags,
1044 							  error);
1045 			if (ret < 0)
1046 				return ret;
1047 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1048 					     MLX5_FLOW_LAYER_OUTER_L2;
1049 			break;
1050 		case RTE_FLOW_ITEM_TYPE_VLAN:
1051 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
1052 							   error);
1053 			if (ret < 0)
1054 				return ret;
1055 			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1056 					      MLX5_FLOW_LAYER_INNER_VLAN) :
1057 					     (MLX5_FLOW_LAYER_OUTER_L2 |
1058 					      MLX5_FLOW_LAYER_OUTER_VLAN);
1059 			break;
1060 		case RTE_FLOW_ITEM_TYPE_IPV4:
1061 			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1062 							   NULL, error);
1063 			if (ret < 0)
1064 				return ret;
1065 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1066 					     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1067 			if (items->mask != NULL &&
1068 			    ((const struct rte_flow_item_ipv4 *)
1069 			     items->mask)->hdr.next_proto_id) {
1070 				next_protocol =
1071 					((const struct rte_flow_item_ipv4 *)
1072 					 (items->spec))->hdr.next_proto_id;
1073 				next_protocol &=
1074 					((const struct rte_flow_item_ipv4 *)
1075 					 (items->mask))->hdr.next_proto_id;
1076 			} else {
1077 				/* Reset for inner layer. */
1078 				next_protocol = 0xff;
1079 			}
1080 			break;
1081 		case RTE_FLOW_ITEM_TYPE_IPV6:
1082 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1083 							   NULL, error);
1084 			if (ret < 0)
1085 				return ret;
1086 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1087 					     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1088 			if (items->mask != NULL &&
1089 			    ((const struct rte_flow_item_ipv6 *)
1090 			     items->mask)->hdr.proto) {
1091 				next_protocol =
1092 					((const struct rte_flow_item_ipv6 *)
1093 					 items->spec)->hdr.proto;
1094 				next_protocol &=
1095 					((const struct rte_flow_item_ipv6 *)
1096 					 items->mask)->hdr.proto;
1097 			} else {
1098 				/* Reset for inner layer. */
1099 				next_protocol = 0xff;
1100 			}
1101 			break;
1102 		case RTE_FLOW_ITEM_TYPE_UDP:
1103 			ret = mlx5_flow_validate_item_udp(items, item_flags,
1104 							  next_protocol,
1105 							  error);
1106 			if (ret < 0)
1107 				return ret;
1108 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1109 					     MLX5_FLOW_LAYER_OUTER_L4_UDP;
1110 			break;
1111 		case RTE_FLOW_ITEM_TYPE_TCP:
1112 			ret = mlx5_flow_validate_item_tcp
1113 						(items, item_flags,
1114 						 next_protocol,
1115 						 &rte_flow_item_tcp_mask,
1116 						 error);
1117 			if (ret < 0)
1118 				return ret;
1119 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1120 					     MLX5_FLOW_LAYER_OUTER_L4_TCP;
1121 			break;
1122 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1123 			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1124 							    error);
1125 			if (ret < 0)
1126 				return ret;
1127 			last_item = MLX5_FLOW_LAYER_VXLAN;
1128 			break;
1129 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1130 			ret = mlx5_flow_validate_item_vxlan_gpe(items,
1131 								item_flags,
1132 								dev, error);
1133 			if (ret < 0)
1134 				return ret;
1135 			last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1136 			break;
1137 		case RTE_FLOW_ITEM_TYPE_GRE:
1138 			ret = mlx5_flow_validate_item_gre(items, item_flags,
1139 							  next_protocol, error);
1140 			if (ret < 0)
1141 				return ret;
1142 			last_item = MLX5_FLOW_LAYER_GRE;
1143 			break;
1144 		case RTE_FLOW_ITEM_TYPE_MPLS:
1145 			ret = mlx5_flow_validate_item_mpls(dev, items,
1146 							   item_flags,
1147 							   last_item, error);
1148 			if (ret < 0)
1149 				return ret;
1150 			last_item = MLX5_FLOW_LAYER_MPLS;
1151 			break;
1152 		default:
1153 			return rte_flow_error_set(error, ENOTSUP,
1154 						  RTE_FLOW_ERROR_TYPE_ITEM,
1155 						  NULL, "item not supported");
1156 		}
1157 		item_flags |= last_item;
1158 	}
1159 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1160 		switch (actions->type) {
1161 		case RTE_FLOW_ACTION_TYPE_VOID:
1162 			break;
1163 		case RTE_FLOW_ACTION_TYPE_FLAG:
1164 			ret = mlx5_flow_validate_action_flag(action_flags,
1165 							     attr,
1166 							     error);
1167 			if (ret < 0)
1168 				return ret;
1169 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1170 			break;
1171 		case RTE_FLOW_ACTION_TYPE_MARK:
1172 			ret = mlx5_flow_validate_action_mark(actions,
1173 							     action_flags,
1174 							     attr,
1175 							     error);
1176 			if (ret < 0)
1177 				return ret;
1178 			action_flags |= MLX5_FLOW_ACTION_MARK;
1179 			break;
1180 		case RTE_FLOW_ACTION_TYPE_DROP:
1181 			ret = mlx5_flow_validate_action_drop(action_flags,
1182 							     attr,
1183 							     error);
1184 			if (ret < 0)
1185 				return ret;
1186 			action_flags |= MLX5_FLOW_ACTION_DROP;
1187 			break;
1188 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1189 			ret = mlx5_flow_validate_action_queue(actions,
1190 							      action_flags, dev,
1191 							      attr,
1192 							      error);
1193 			if (ret < 0)
1194 				return ret;
1195 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1196 			break;
1197 		case RTE_FLOW_ACTION_TYPE_RSS:
1198 			ret = mlx5_flow_validate_action_rss(actions,
1199 							    action_flags, dev,
1200 							    attr, item_flags,
1201 							    error);
1202 			if (ret < 0)
1203 				return ret;
1204 			action_flags |= MLX5_FLOW_ACTION_RSS;
1205 			break;
1206 		case RTE_FLOW_ACTION_TYPE_COUNT:
1207 			ret = mlx5_flow_validate_action_count(dev, attr, error);
1208 			if (ret < 0)
1209 				return ret;
1210 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1211 			break;
1212 		default:
1213 			return rte_flow_error_set(error, ENOTSUP,
1214 						  RTE_FLOW_ERROR_TYPE_ACTION,
1215 						  actions,
1216 						  "action not supported");
1217 		}
1218 	}
1219 	if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1220 		return rte_flow_error_set(error, EINVAL,
1221 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
1222 					  "no fate action is found");
1223 	return 0;
1224 }
1225 
1226 /**
1227  * Calculate the required bytes that are needed for the action part of the verbs
1228  * flow.
1229  *
1230  * @param[in] actions
1231  *   Pointer to the list of actions.
1232  *
1233  * @return
1234  *   The size of the memory needed for all actions.
1235  */
1236 static int
1237 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1238 {
1239 	int size = 0;
1240 
1241 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1242 		switch (actions->type) {
1243 		case RTE_FLOW_ACTION_TYPE_VOID:
1244 			break;
1245 		case RTE_FLOW_ACTION_TYPE_FLAG:
1246 			size += sizeof(struct ibv_flow_spec_action_tag);
1247 			break;
1248 		case RTE_FLOW_ACTION_TYPE_MARK:
1249 			size += sizeof(struct ibv_flow_spec_action_tag);
1250 			break;
1251 		case RTE_FLOW_ACTION_TYPE_DROP:
1252 			size += sizeof(struct ibv_flow_spec_action_drop);
1253 			break;
1254 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1255 			break;
1256 		case RTE_FLOW_ACTION_TYPE_RSS:
1257 			break;
1258 		case RTE_FLOW_ACTION_TYPE_COUNT:
1259 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1260 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1261 			size += sizeof(struct ibv_flow_spec_counter_action);
1262 #endif
1263 			break;
1264 		default:
1265 			break;
1266 		}
1267 	}
1268 	return size;
1269 }
1270 
1271 /**
1272  * Calculate the required bytes that are needed for the item part of the verbs
1273  * flow.
1274  *
1275  * @param[in] items
1276  *   Pointer to the list of items.
1277  *
1278  * @return
1279  *   The size of the memory needed for all items.
1280  */
1281 static int
1282 flow_verbs_get_items_size(const struct rte_flow_item items[])
1283 {
1284 	int size = 0;
1285 
1286 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1287 		switch (items->type) {
1288 		case RTE_FLOW_ITEM_TYPE_VOID:
1289 			break;
1290 		case RTE_FLOW_ITEM_TYPE_ETH:
1291 			size += sizeof(struct ibv_flow_spec_eth);
1292 			break;
1293 		case RTE_FLOW_ITEM_TYPE_VLAN:
1294 			size += sizeof(struct ibv_flow_spec_eth);
1295 			break;
1296 		case RTE_FLOW_ITEM_TYPE_IPV4:
1297 			size += sizeof(struct ibv_flow_spec_ipv4_ext);
1298 			break;
1299 		case RTE_FLOW_ITEM_TYPE_IPV6:
1300 			size += sizeof(struct ibv_flow_spec_ipv6);
1301 			break;
1302 		case RTE_FLOW_ITEM_TYPE_UDP:
1303 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1304 			break;
1305 		case RTE_FLOW_ITEM_TYPE_TCP:
1306 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1307 			break;
1308 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1309 			size += sizeof(struct ibv_flow_spec_tunnel);
1310 			break;
1311 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1312 			size += sizeof(struct ibv_flow_spec_tunnel);
1313 			break;
1314 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1315 		case RTE_FLOW_ITEM_TYPE_GRE:
1316 			size += sizeof(struct ibv_flow_spec_gre);
1317 			break;
1318 		case RTE_FLOW_ITEM_TYPE_MPLS:
1319 			size += sizeof(struct ibv_flow_spec_mpls);
1320 			break;
1321 #else
1322 		case RTE_FLOW_ITEM_TYPE_GRE:
1323 			size += sizeof(struct ibv_flow_spec_tunnel);
1324 			break;
1325 #endif
1326 		default:
1327 			break;
1328 		}
1329 	}
1330 	return size;
1331 }
1332 
1333 /**
1334  * Internal preparation function. Allocate mlx5_flow with the required size.
1335  * The required size is calculate based on the actions and items. This function
1336  * also returns the detected actions and items for later use.
1337  *
1338  * @param[in] attr
1339  *   Pointer to the flow attributes.
1340  * @param[in] items
1341  *   Pointer to the list of items.
1342  * @param[in] actions
1343  *   Pointer to the list of actions.
1344  * @param[out] error
1345  *   Pointer to the error structure.
1346  *
1347  * @return
1348  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1349  *   is set.
1350  */
1351 static struct mlx5_flow *
1352 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1353 		   const struct rte_flow_item items[],
1354 		   const struct rte_flow_action actions[],
1355 		   struct rte_flow_error *error)
1356 {
1357 	uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1358 	struct mlx5_flow *flow;
1359 
1360 	size += flow_verbs_get_actions_size(actions);
1361 	size += flow_verbs_get_items_size(items);
1362 	flow = rte_calloc(__func__, 1, size, 0);
1363 	if (!flow) {
1364 		rte_flow_error_set(error, ENOMEM,
1365 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1366 				   "not enough memory to create flow");
1367 		return NULL;
1368 	}
1369 	flow->verbs.attr = (void *)(flow + 1);
1370 	flow->verbs.specs =
1371 		(uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1372 	return flow;
1373 }
1374 
1375 /**
1376  * Fill the flow with verb spec.
1377  *
1378  * @param[in] dev
1379  *   Pointer to Ethernet device.
1380  * @param[in, out] dev_flow
1381  *   Pointer to the mlx5 flow.
1382  * @param[in] attr
1383  *   Pointer to the flow attributes.
1384  * @param[in] items
1385  *   Pointer to the list of items.
1386  * @param[in] actions
1387  *   Pointer to the list of actions.
1388  * @param[out] error
1389  *   Pointer to the error structure.
1390  *
1391  * @return
1392  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1393  */
1394 static int
1395 flow_verbs_translate(struct rte_eth_dev *dev,
1396 		     struct mlx5_flow *dev_flow,
1397 		     const struct rte_flow_attr *attr,
1398 		     const struct rte_flow_item items[],
1399 		     const struct rte_flow_action actions[],
1400 		     struct rte_flow_error *error)
1401 {
1402 	struct rte_flow *flow = dev_flow->flow;
1403 	uint64_t item_flags = 0;
1404 	uint64_t action_flags = 0;
1405 	uint64_t priority = attr->priority;
1406 	uint32_t subpriority = 0;
1407 	struct mlx5_priv *priv = dev->data->dev_private;
1408 
1409 	if (priority == MLX5_FLOW_PRIO_RSVD)
1410 		priority = priv->config.flow_prio - 1;
1411 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1412 		int ret;
1413 
1414 		switch (actions->type) {
1415 		case RTE_FLOW_ACTION_TYPE_VOID:
1416 			break;
1417 		case RTE_FLOW_ACTION_TYPE_FLAG:
1418 			flow_verbs_translate_action_flag(dev_flow, actions);
1419 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1420 			break;
1421 		case RTE_FLOW_ACTION_TYPE_MARK:
1422 			flow_verbs_translate_action_mark(dev_flow, actions);
1423 			action_flags |= MLX5_FLOW_ACTION_MARK;
1424 			break;
1425 		case RTE_FLOW_ACTION_TYPE_DROP:
1426 			flow_verbs_translate_action_drop(dev_flow, actions);
1427 			action_flags |= MLX5_FLOW_ACTION_DROP;
1428 			break;
1429 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1430 			flow_verbs_translate_action_queue(dev_flow, actions);
1431 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1432 			break;
1433 		case RTE_FLOW_ACTION_TYPE_RSS:
1434 			flow_verbs_translate_action_rss(dev_flow, actions);
1435 			action_flags |= MLX5_FLOW_ACTION_RSS;
1436 			break;
1437 		case RTE_FLOW_ACTION_TYPE_COUNT:
1438 			ret = flow_verbs_translate_action_count(dev_flow,
1439 								actions,
1440 								dev, error);
1441 			if (ret < 0)
1442 				return ret;
1443 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1444 			break;
1445 		default:
1446 			return rte_flow_error_set(error, ENOTSUP,
1447 						  RTE_FLOW_ERROR_TYPE_ACTION,
1448 						  actions,
1449 						  "action not supported");
1450 		}
1451 	}
1452 	flow->actions = action_flags;
1453 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1454 		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1455 
1456 		switch (items->type) {
1457 		case RTE_FLOW_ITEM_TYPE_VOID:
1458 			break;
1459 		case RTE_FLOW_ITEM_TYPE_ETH:
1460 			flow_verbs_translate_item_eth(dev_flow, items,
1461 						      item_flags);
1462 			subpriority = MLX5_PRIORITY_MAP_L2;
1463 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1464 					       MLX5_FLOW_LAYER_OUTER_L2;
1465 			break;
1466 		case RTE_FLOW_ITEM_TYPE_VLAN:
1467 			flow_verbs_translate_item_vlan(dev_flow, items,
1468 						       item_flags);
1469 			subpriority = MLX5_PRIORITY_MAP_L2;
1470 			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1471 						MLX5_FLOW_LAYER_INNER_VLAN) :
1472 					       (MLX5_FLOW_LAYER_OUTER_L2 |
1473 						MLX5_FLOW_LAYER_OUTER_VLAN);
1474 			break;
1475 		case RTE_FLOW_ITEM_TYPE_IPV4:
1476 			flow_verbs_translate_item_ipv4(dev_flow, items,
1477 						       item_flags);
1478 			subpriority = MLX5_PRIORITY_MAP_L3;
1479 			dev_flow->verbs.hash_fields |=
1480 				mlx5_flow_hashfields_adjust
1481 					(dev_flow, tunnel,
1482 					 MLX5_IPV4_LAYER_TYPES,
1483 					 MLX5_IPV4_IBV_RX_HASH);
1484 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1485 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1486 			break;
1487 		case RTE_FLOW_ITEM_TYPE_IPV6:
1488 			flow_verbs_translate_item_ipv6(dev_flow, items,
1489 						       item_flags);
1490 			subpriority = MLX5_PRIORITY_MAP_L3;
1491 			dev_flow->verbs.hash_fields |=
1492 				mlx5_flow_hashfields_adjust
1493 					(dev_flow, tunnel,
1494 					 MLX5_IPV6_LAYER_TYPES,
1495 					 MLX5_IPV6_IBV_RX_HASH);
1496 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1497 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1498 			break;
1499 		case RTE_FLOW_ITEM_TYPE_TCP:
1500 			flow_verbs_translate_item_tcp(dev_flow, items,
1501 						      item_flags);
1502 			subpriority = MLX5_PRIORITY_MAP_L4;
1503 			dev_flow->verbs.hash_fields |=
1504 				mlx5_flow_hashfields_adjust
1505 					(dev_flow, tunnel, ETH_RSS_TCP,
1506 					 (IBV_RX_HASH_SRC_PORT_TCP |
1507 					  IBV_RX_HASH_DST_PORT_TCP));
1508 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1509 					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
1510 			break;
1511 		case RTE_FLOW_ITEM_TYPE_UDP:
1512 			flow_verbs_translate_item_udp(dev_flow, items,
1513 						      item_flags);
1514 			subpriority = MLX5_PRIORITY_MAP_L4;
1515 			dev_flow->verbs.hash_fields |=
1516 				mlx5_flow_hashfields_adjust
1517 					(dev_flow, tunnel, ETH_RSS_UDP,
1518 					 (IBV_RX_HASH_SRC_PORT_UDP |
1519 					  IBV_RX_HASH_DST_PORT_UDP));
1520 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1521 					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
1522 			break;
1523 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1524 			flow_verbs_translate_item_vxlan(dev_flow, items,
1525 							item_flags);
1526 			subpriority = MLX5_PRIORITY_MAP_L2;
1527 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
1528 			break;
1529 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1530 			flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1531 							    item_flags);
1532 			subpriority = MLX5_PRIORITY_MAP_L2;
1533 			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1534 			break;
1535 		case RTE_FLOW_ITEM_TYPE_GRE:
1536 			flow_verbs_translate_item_gre(dev_flow, items,
1537 						      item_flags);
1538 			subpriority = MLX5_PRIORITY_MAP_L2;
1539 			item_flags |= MLX5_FLOW_LAYER_GRE;
1540 			break;
1541 		case RTE_FLOW_ITEM_TYPE_MPLS:
1542 			flow_verbs_translate_item_mpls(dev_flow, items,
1543 						       item_flags);
1544 			subpriority = MLX5_PRIORITY_MAP_L2;
1545 			item_flags |= MLX5_FLOW_LAYER_MPLS;
1546 			break;
1547 		default:
1548 			return rte_flow_error_set(error, ENOTSUP,
1549 						  RTE_FLOW_ERROR_TYPE_ITEM,
1550 						  NULL,
1551 						  "item not supported");
1552 		}
1553 	}
1554 	dev_flow->layers = item_flags;
1555 	dev_flow->verbs.attr->priority =
1556 		mlx5_flow_adjust_priority(dev, priority, subpriority);
1557 	dev_flow->verbs.attr->port = (uint8_t)priv->ibv_port;
1558 	return 0;
1559 }
1560 
1561 /**
1562  * Remove the flow from the NIC but keeps it in memory.
1563  *
1564  * @param[in] dev
1565  *   Pointer to the Ethernet device structure.
1566  * @param[in, out] flow
1567  *   Pointer to flow structure.
1568  */
1569 static void
1570 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1571 {
1572 	struct mlx5_flow_verbs *verbs;
1573 	struct mlx5_flow *dev_flow;
1574 
1575 	if (!flow)
1576 		return;
1577 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1578 		verbs = &dev_flow->verbs;
1579 		if (verbs->flow) {
1580 			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1581 			verbs->flow = NULL;
1582 		}
1583 		if (verbs->hrxq) {
1584 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1585 				mlx5_hrxq_drop_release(dev);
1586 			else
1587 				mlx5_hrxq_release(dev, verbs->hrxq);
1588 			verbs->hrxq = NULL;
1589 		}
1590 	}
1591 }
1592 
1593 /**
1594  * Remove the flow from the NIC and the memory.
1595  *
1596  * @param[in] dev
1597  *   Pointer to the Ethernet device structure.
1598  * @param[in, out] flow
1599  *   Pointer to flow structure.
1600  */
1601 static void
1602 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1603 {
1604 	struct mlx5_flow *dev_flow;
1605 
1606 	if (!flow)
1607 		return;
1608 	flow_verbs_remove(dev, flow);
1609 	while (!LIST_EMPTY(&flow->dev_flows)) {
1610 		dev_flow = LIST_FIRST(&flow->dev_flows);
1611 		LIST_REMOVE(dev_flow, next);
1612 		rte_free(dev_flow);
1613 	}
1614 	if (flow->counter) {
1615 		flow_verbs_counter_release(flow->counter);
1616 		flow->counter = NULL;
1617 	}
1618 }
1619 
1620 /**
1621  * Apply the flow to the NIC.
1622  *
1623  * @param[in] dev
1624  *   Pointer to the Ethernet device structure.
1625  * @param[in, out] flow
1626  *   Pointer to flow structure.
1627  * @param[out] error
1628  *   Pointer to error structure.
1629  *
1630  * @return
1631  *   0 on success, a negative errno value otherwise and rte_errno is set.
1632  */
1633 static int
1634 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1635 		 struct rte_flow_error *error)
1636 {
1637 	struct mlx5_flow_verbs *verbs;
1638 	struct mlx5_flow *dev_flow;
1639 	int err;
1640 
1641 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1642 		verbs = &dev_flow->verbs;
1643 		if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1644 			verbs->hrxq = mlx5_hrxq_drop_new(dev);
1645 			if (!verbs->hrxq) {
1646 				rte_flow_error_set
1647 					(error, errno,
1648 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1649 					 "cannot get drop hash queue");
1650 				goto error;
1651 			}
1652 		} else {
1653 			struct mlx5_hrxq *hrxq;
1654 
1655 			hrxq = mlx5_hrxq_get(dev, flow->key,
1656 					     MLX5_RSS_HASH_KEY_LEN,
1657 					     verbs->hash_fields,
1658 					     (*flow->queue),
1659 					     flow->rss.queue_num);
1660 			if (!hrxq)
1661 				hrxq = mlx5_hrxq_new(dev, flow->key,
1662 						     MLX5_RSS_HASH_KEY_LEN,
1663 						     verbs->hash_fields,
1664 						     (*flow->queue),
1665 						     flow->rss.queue_num,
1666 						     !!(dev_flow->layers &
1667 						      MLX5_FLOW_LAYER_TUNNEL));
1668 			if (!hrxq) {
1669 				rte_flow_error_set
1670 					(error, rte_errno,
1671 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1672 					 "cannot get hash queue");
1673 				goto error;
1674 			}
1675 			verbs->hrxq = hrxq;
1676 		}
1677 		verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1678 						     verbs->attr);
1679 		if (!verbs->flow) {
1680 			rte_flow_error_set(error, errno,
1681 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1682 					   NULL,
1683 					   "hardware refuses to create flow");
1684 			goto error;
1685 		}
1686 	}
1687 	return 0;
1688 error:
1689 	err = rte_errno; /* Save rte_errno before cleanup. */
1690 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1691 		verbs = &dev_flow->verbs;
1692 		if (verbs->hrxq) {
1693 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1694 				mlx5_hrxq_drop_release(dev);
1695 			else
1696 				mlx5_hrxq_release(dev, verbs->hrxq);
1697 			verbs->hrxq = NULL;
1698 		}
1699 	}
1700 	rte_errno = err; /* Restore rte_errno. */
1701 	return -rte_errno;
1702 }
1703 
1704 /**
1705  * Query a flow.
1706  *
1707  * @see rte_flow_query()
1708  * @see rte_flow_ops
1709  */
1710 static int
1711 flow_verbs_query(struct rte_eth_dev *dev,
1712 		 struct rte_flow *flow,
1713 		 const struct rte_flow_action *actions,
1714 		 void *data,
1715 		 struct rte_flow_error *error)
1716 {
1717 	int ret = -EINVAL;
1718 
1719 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1720 		switch (actions->type) {
1721 		case RTE_FLOW_ACTION_TYPE_VOID:
1722 			break;
1723 		case RTE_FLOW_ACTION_TYPE_COUNT:
1724 			ret = flow_verbs_counter_query(dev, flow, data, error);
1725 			break;
1726 		default:
1727 			return rte_flow_error_set(error, ENOTSUP,
1728 						  RTE_FLOW_ERROR_TYPE_ACTION,
1729 						  actions,
1730 						  "action not supported");
1731 		}
1732 	}
1733 	return ret;
1734 }
1735 
1736 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1737 	.validate = flow_verbs_validate,
1738 	.prepare = flow_verbs_prepare,
1739 	.translate = flow_verbs_translate,
1740 	.apply = flow_verbs_apply,
1741 	.remove = flow_verbs_remove,
1742 	.destroy = flow_verbs_destroy,
1743 	.query = flow_verbs_query,
1744 };
1745