xref: /dpdk/drivers/net/mlx5/mlx5_flow_verbs.c (revision 1edccebcccdbe600dc0a3a418fae68336648a87e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4 
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20 
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29 
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35 
36 /**
37  * Get a flow counter.
38  *
39  * @param[in] dev
40  *   Pointer to the Ethernet device structure.
41  * @param[in] shared
42  *   Indicate if this counter is shared with other flows.
43  * @param[in] id
44  *   Counter identifier.
45  *
46  * @return
47  *   A pointer to the counter, NULL otherwise and rte_errno is set.
48  */
49 static struct mlx5_flow_counter *
50 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
51 {
52 	struct priv *priv = dev->data->dev_private;
53 	struct mlx5_flow_counter *cnt;
54 
55 	LIST_FOREACH(cnt, &priv->flow_counters, next) {
56 		if (!cnt->shared || cnt->shared != shared)
57 			continue;
58 		if (cnt->id != id)
59 			continue;
60 		cnt->ref_cnt++;
61 		return cnt;
62 	}
63 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64 
65 	struct mlx5_flow_counter tmpl = {
66 		.shared = shared,
67 		.id = id,
68 		.cs = mlx5_glue->create_counter_set
69 			(priv->ctx,
70 			 &(struct ibv_counter_set_init_attr){
71 				 .counter_set_id = id,
72 			 }),
73 		.hits = 0,
74 		.bytes = 0,
75 	};
76 
77 	if (!tmpl.cs) {
78 		rte_errno = errno;
79 		return NULL;
80 	}
81 	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
82 	if (!cnt) {
83 		rte_errno = ENOMEM;
84 		return NULL;
85 	}
86 	*cnt = tmpl;
87 	LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
88 	return cnt;
89 #endif
90 	rte_errno = ENOTSUP;
91 	return NULL;
92 }
93 
94 /**
95  * Release a flow counter.
96  *
97  * @param[in] counter
98  *   Pointer to the counter handler.
99  */
100 static void
101 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
102 {
103 	if (--counter->ref_cnt == 0) {
104 		claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
105 		LIST_REMOVE(counter, next);
106 		rte_free(counter);
107 	}
108 }
109 
110 /**
111  * Add a verbs item specification into @p flow.
112  *
113  * @param[in, out] flow
114  *   Pointer to flow structure.
115  * @param[in] src
116  *   Create specification.
117  * @param[in] size
118  *   Size in bytes of the specification to copy.
119  */
120 static void
121 flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
122 {
123 	struct mlx5_flow_verbs *verbs = &flow->verbs;
124 
125 	if (verbs->specs) {
126 		void *dst;
127 
128 		dst = (void *)(verbs->specs + verbs->size);
129 		memcpy(dst, src, size);
130 		++verbs->attr->num_of_specs;
131 	}
132 	verbs->size += size;
133 }
134 
135 /**
136  * Convert the @p item into a Verbs specification. This function assumes that
137  * the input is valid and that there is space to insert the requested item
138  * into the flow.
139  *
140  * @param[in] item
141  *   Item specification.
142  * @param[in] item_flags
143  *   Bit field with all detected items.
144  * @param[in, out] dev_flow
145  *   Pointer to dev_flow structure.
146  */
147 static void
148 flow_verbs_translate_item_eth(const struct rte_flow_item *item,
149 			      uint64_t *item_flags,
150 			      struct mlx5_flow *dev_flow)
151 {
152 	const struct rte_flow_item_eth *spec = item->spec;
153 	const struct rte_flow_item_eth *mask = item->mask;
154 	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
155 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
156 	struct ibv_flow_spec_eth eth = {
157 		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
158 		.size = size,
159 	};
160 
161 	if (!mask)
162 		mask = &rte_flow_item_eth_mask;
163 	if (spec) {
164 		unsigned int i;
165 
166 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
167 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
168 		eth.val.ether_type = spec->type;
169 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
170 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
171 		eth.mask.ether_type = mask->type;
172 		/* Remove unwanted bits from values. */
173 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
174 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
175 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
176 		}
177 		eth.val.ether_type &= eth.mask.ether_type;
178 		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
179 	}
180 	flow_verbs_spec_add(dev_flow, &eth, size);
181 	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
182 				MLX5_FLOW_LAYER_OUTER_L2;
183 }
184 
185 /**
186  * Update the VLAN tag in the Verbs Ethernet specification.
187  * This function assumes that the input is valid and there is space to add
188  * the requested item.
189  *
190  * @param[in, out] attr
191  *   Pointer to Verbs attributes structure.
192  * @param[in] eth
193  *   Verbs structure containing the VLAN information to copy.
194  */
195 static void
196 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
197 			    struct ibv_flow_spec_eth *eth)
198 {
199 	unsigned int i;
200 	const enum ibv_flow_spec_type search = eth->type;
201 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
202 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
203 
204 	for (i = 0; i != attr->num_of_specs; ++i) {
205 		if (hdr->type == search) {
206 			struct ibv_flow_spec_eth *e =
207 				(struct ibv_flow_spec_eth *)hdr;
208 
209 			e->val.vlan_tag = eth->val.vlan_tag;
210 			e->mask.vlan_tag = eth->mask.vlan_tag;
211 			e->val.ether_type = eth->val.ether_type;
212 			e->mask.ether_type = eth->mask.ether_type;
213 			break;
214 		}
215 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
216 	}
217 }
218 
219 /**
220  * Convert the @p item into a Verbs specification. This function assumes that
221  * the input is valid and that there is space to insert the requested item
222  * into the flow.
223  *
224  * @param[in] item
225  *   Item specification.
226  * @param[in, out] item_flags
227  *   Bit mask that holds all detected items.
228  * @param[in, out] dev_flow
229  *   Pointer to dev_flow structure.
230  */
231 static void
232 flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
233 			       uint64_t *item_flags,
234 			       struct mlx5_flow *dev_flow)
235 {
236 	const struct rte_flow_item_vlan *spec = item->spec;
237 	const struct rte_flow_item_vlan *mask = item->mask;
238 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
239 	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
240 	struct ibv_flow_spec_eth eth = {
241 		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
242 		.size = size,
243 	};
244 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
245 				      MLX5_FLOW_LAYER_OUTER_L2;
246 
247 	if (!mask)
248 		mask = &rte_flow_item_vlan_mask;
249 	if (spec) {
250 		eth.val.vlan_tag = spec->tci;
251 		eth.mask.vlan_tag = mask->tci;
252 		eth.val.vlan_tag &= eth.mask.vlan_tag;
253 		eth.val.ether_type = spec->inner_type;
254 		eth.mask.ether_type = mask->inner_type;
255 		eth.val.ether_type &= eth.mask.ether_type;
256 	}
257 	if (!(*item_flags & l2m)) {
258 		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
259 		flow_verbs_spec_add(dev_flow, &eth, size);
260 	} else {
261 		flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
262 		size = 0; /* Only an update is done in eth specification. */
263 	}
264 	*item_flags |= tunnel ?
265 		       (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
266 		       (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
267 }
268 
269 /**
270  * Convert the @p item into a Verbs specification. This function assumes that
271  * the input is valid and that there is space to insert the requested item
272  * into the flow.
273  *
274  * @param[in] item
275  *   Item specification.
276  * @param[in, out] item_flags
277  *   Bit mask that marks all detected items.
278  * @param[in, out] dev_flow
279  *   Pointer to sepacific flow structure.
280  */
281 static void
282 flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
283 			       uint64_t *item_flags,
284 			       struct mlx5_flow *dev_flow)
285 {
286 	const struct rte_flow_item_ipv4 *spec = item->spec;
287 	const struct rte_flow_item_ipv4 *mask = item->mask;
288 	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
289 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
290 	struct ibv_flow_spec_ipv4_ext ipv4 = {
291 		.type = IBV_FLOW_SPEC_IPV4_EXT |
292 			(tunnel ? IBV_FLOW_SPEC_INNER : 0),
293 		.size = size,
294 	};
295 
296 	if (!mask)
297 		mask = &rte_flow_item_ipv4_mask;
298 	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
299 				MLX5_FLOW_LAYER_OUTER_L3_IPV4;
300 	if (spec) {
301 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
302 			.src_ip = spec->hdr.src_addr,
303 			.dst_ip = spec->hdr.dst_addr,
304 			.proto = spec->hdr.next_proto_id,
305 			.tos = spec->hdr.type_of_service,
306 		};
307 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
308 			.src_ip = mask->hdr.src_addr,
309 			.dst_ip = mask->hdr.dst_addr,
310 			.proto = mask->hdr.next_proto_id,
311 			.tos = mask->hdr.type_of_service,
312 		};
313 		/* Remove unwanted bits from values. */
314 		ipv4.val.src_ip &= ipv4.mask.src_ip;
315 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
316 		ipv4.val.proto &= ipv4.mask.proto;
317 		ipv4.val.tos &= ipv4.mask.tos;
318 	}
319 	dev_flow->verbs.hash_fields |=
320 		mlx5_flow_hashfields_adjust(dev_flow, tunnel,
321 					    MLX5_IPV4_LAYER_TYPES,
322 					    MLX5_IPV4_IBV_RX_HASH);
323 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
324 	flow_verbs_spec_add(dev_flow, &ipv4, size);
325 }
326 
327 /**
328  * Convert the @p item into a Verbs specification. This function assumes that
329  * the input is valid and that there is space to insert the requested item
330  * into the flow.
331  *
332  * @param[in] item
333  *   Item specification.
334  * @param[in, out] item_flags
335  *   Bit mask that marks all detected items.
336  * @param[in, out] dev_flow
337  *   Pointer to sepacific flow structure.
338  */
339 static void
340 flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
341 			       uint64_t *item_flags,
342 			       struct mlx5_flow *dev_flow)
343 {
344 	const struct rte_flow_item_ipv6 *spec = item->spec;
345 	const struct rte_flow_item_ipv6 *mask = item->mask;
346 	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
347 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
348 	struct ibv_flow_spec_ipv6 ipv6 = {
349 		.type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
350 		.size = size,
351 	};
352 
353 	if (!mask)
354 		mask = &rte_flow_item_ipv6_mask;
355 	 *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
356 				 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
357 	if (spec) {
358 		unsigned int i;
359 		uint32_t vtc_flow_val;
360 		uint32_t vtc_flow_mask;
361 
362 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
363 		       RTE_DIM(ipv6.val.src_ip));
364 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
365 		       RTE_DIM(ipv6.val.dst_ip));
366 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
367 		       RTE_DIM(ipv6.mask.src_ip));
368 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
369 		       RTE_DIM(ipv6.mask.dst_ip));
370 		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
371 		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
372 		ipv6.val.flow_label =
373 			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
374 					 IPV6_HDR_FL_SHIFT);
375 		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
376 					 IPV6_HDR_TC_SHIFT;
377 		ipv6.val.next_hdr = spec->hdr.proto;
378 		ipv6.val.hop_limit = spec->hdr.hop_limits;
379 		ipv6.mask.flow_label =
380 			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
381 					 IPV6_HDR_FL_SHIFT);
382 		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
383 					  IPV6_HDR_TC_SHIFT;
384 		ipv6.mask.next_hdr = mask->hdr.proto;
385 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
386 		/* Remove unwanted bits from values. */
387 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
388 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
389 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
390 		}
391 		ipv6.val.flow_label &= ipv6.mask.flow_label;
392 		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
393 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
394 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
395 	}
396 	dev_flow->verbs.hash_fields |=
397 		mlx5_flow_hashfields_adjust(dev_flow, tunnel,
398 					    MLX5_IPV6_LAYER_TYPES,
399 					    MLX5_IPV6_IBV_RX_HASH);
400 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
401 	flow_verbs_spec_add(dev_flow, &ipv6, size);
402 }
403 
404 /**
405  * Convert the @p item into a Verbs specification. This function assumes that
406  * the input is valid and that there is space to insert the requested item
407  * into the flow.
408  *
409  * @param[in] item
410  *   Item specification.
411  * @param[in, out] item_flags
412  *   Bit mask that marks all detected items.
413  * @param[in, out] dev_flow
414  *   Pointer to sepacific flow structure.
415  */
416 static void
417 flow_verbs_translate_item_udp(const struct rte_flow_item *item,
418 			      uint64_t *item_flags,
419 			      struct mlx5_flow *dev_flow)
420 {
421 	const struct rte_flow_item_udp *spec = item->spec;
422 	const struct rte_flow_item_udp *mask = item->mask;
423 	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
424 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
425 	struct ibv_flow_spec_tcp_udp udp = {
426 		.type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
427 		.size = size,
428 	};
429 
430 	if (!mask)
431 		mask = &rte_flow_item_udp_mask;
432 	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
433 				MLX5_FLOW_LAYER_OUTER_L4_UDP;
434 	if (spec) {
435 		udp.val.dst_port = spec->hdr.dst_port;
436 		udp.val.src_port = spec->hdr.src_port;
437 		udp.mask.dst_port = mask->hdr.dst_port;
438 		udp.mask.src_port = mask->hdr.src_port;
439 		/* Remove unwanted bits from values. */
440 		udp.val.src_port &= udp.mask.src_port;
441 		udp.val.dst_port &= udp.mask.dst_port;
442 	}
443 	dev_flow->verbs.hash_fields |=
444 		mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
445 					    (IBV_RX_HASH_SRC_PORT_UDP |
446 					     IBV_RX_HASH_DST_PORT_UDP));
447 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
448 	flow_verbs_spec_add(dev_flow, &udp, size);
449 }
450 
451 /**
452  * Convert the @p item into a Verbs specification. This function assumes that
453  * the input is valid and that there is space to insert the requested item
454  * into the flow.
455  *
456  * @param[in] item
457  *   Item specification.
458  * @param[in, out] item_flags
459  *   Bit mask that marks all detected items.
460  * @param[in, out] dev_flow
461  *   Pointer to sepacific flow structure.
462  */
463 static void
464 flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
465 			      uint64_t *item_flags,
466 			      struct mlx5_flow *dev_flow)
467 {
468 	const struct rte_flow_item_tcp *spec = item->spec;
469 	const struct rte_flow_item_tcp *mask = item->mask;
470 	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
471 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
472 	struct ibv_flow_spec_tcp_udp tcp = {
473 		.type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
474 		.size = size,
475 	};
476 
477 	if (!mask)
478 		mask = &rte_flow_item_tcp_mask;
479 	*item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
480 				 MLX5_FLOW_LAYER_OUTER_L4_TCP;
481 	if (spec) {
482 		tcp.val.dst_port = spec->hdr.dst_port;
483 		tcp.val.src_port = spec->hdr.src_port;
484 		tcp.mask.dst_port = mask->hdr.dst_port;
485 		tcp.mask.src_port = mask->hdr.src_port;
486 		/* Remove unwanted bits from values. */
487 		tcp.val.src_port &= tcp.mask.src_port;
488 		tcp.val.dst_port &= tcp.mask.dst_port;
489 	}
490 	dev_flow->verbs.hash_fields |=
491 		mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
492 					    (IBV_RX_HASH_SRC_PORT_TCP |
493 					     IBV_RX_HASH_DST_PORT_TCP));
494 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
495 	flow_verbs_spec_add(dev_flow, &tcp, size);
496 }
497 
498 /**
499  * Convert the @p item into a Verbs specification. This function assumes that
500  * the input is valid and that there is space to insert the requested item
501  * into the flow.
502  *
503  * @param[in] item
504  *   Item specification.
505  * @param[in, out] item_flags
506  *   Bit mask that marks all detected items.
507  * @param[in, out] dev_flow
508  *   Pointer to sepacific flow structure.
509  */
510 static void
511 flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
512 				uint64_t *item_flags,
513 				struct mlx5_flow *dev_flow)
514 {
515 	const struct rte_flow_item_vxlan *spec = item->spec;
516 	const struct rte_flow_item_vxlan *mask = item->mask;
517 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
518 	struct ibv_flow_spec_tunnel vxlan = {
519 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
520 		.size = size,
521 	};
522 	union vni {
523 		uint32_t vlan_id;
524 		uint8_t vni[4];
525 	} id = { .vlan_id = 0, };
526 
527 	if (!mask)
528 		mask = &rte_flow_item_vxlan_mask;
529 	if (spec) {
530 		memcpy(&id.vni[1], spec->vni, 3);
531 		vxlan.val.tunnel_id = id.vlan_id;
532 		memcpy(&id.vni[1], mask->vni, 3);
533 		vxlan.mask.tunnel_id = id.vlan_id;
534 		/* Remove unwanted bits from values. */
535 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
536 	}
537 	flow_verbs_spec_add(dev_flow, &vxlan, size);
538 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
539 	*item_flags |= MLX5_FLOW_LAYER_VXLAN;
540 }
541 
542 /**
543  * Convert the @p item into a Verbs specification. This function assumes that
544  * the input is valid and that there is space to insert the requested item
545  * into the flow.
546  *
547  * @param[in] item
548  *   Item specification.
549  * @param[in, out] item_flags
550  *   Bit mask that marks all detected items.
551  * @param[in, out] dev_flow
552  *   Pointer to sepacific flow structure.
553  */
554 static void
555 flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
556 				    uint64_t *item_flags,
557 				    struct mlx5_flow *dev_flow)
558 {
559 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
560 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
561 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
562 	struct ibv_flow_spec_tunnel vxlan_gpe = {
563 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
564 		.size = size,
565 	};
566 	union vni {
567 		uint32_t vlan_id;
568 		uint8_t vni[4];
569 	} id = { .vlan_id = 0, };
570 
571 	if (!mask)
572 		mask = &rte_flow_item_vxlan_gpe_mask;
573 	if (spec) {
574 		memcpy(&id.vni[1], spec->vni, 3);
575 		vxlan_gpe.val.tunnel_id = id.vlan_id;
576 		memcpy(&id.vni[1], mask->vni, 3);
577 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
578 		/* Remove unwanted bits from values. */
579 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
580 	}
581 	flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
582 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
583 	*item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
584 }
585 
586 /**
587  * Update the protocol in Verbs IPv4/IPv6 spec.
588  *
589  * @param[in, out] attr
590  *   Pointer to Verbs attributes structure.
591  * @param[in] search
592  *   Specification type to search in order to update the IP protocol.
593  * @param[in] protocol
594  *   Protocol value to set if none is present in the specification.
595  */
596 static void
597 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
598 				       enum ibv_flow_spec_type search,
599 				       uint8_t protocol)
600 {
601 	unsigned int i;
602 	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
603 		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
604 
605 	if (!attr)
606 		return;
607 	for (i = 0; i != attr->num_of_specs; ++i) {
608 		if (hdr->type == search) {
609 			union {
610 				struct ibv_flow_spec_ipv4_ext *ipv4;
611 				struct ibv_flow_spec_ipv6 *ipv6;
612 			} ip;
613 
614 			switch (search) {
615 			case IBV_FLOW_SPEC_IPV4_EXT:
616 				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
617 				if (!ip.ipv4->val.proto) {
618 					ip.ipv4->val.proto = protocol;
619 					ip.ipv4->mask.proto = 0xff;
620 				}
621 				break;
622 			case IBV_FLOW_SPEC_IPV6:
623 				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
624 				if (!ip.ipv6->val.next_hdr) {
625 					ip.ipv6->val.next_hdr = protocol;
626 					ip.ipv6->mask.next_hdr = 0xff;
627 				}
628 				break;
629 			default:
630 				break;
631 			}
632 			break;
633 		}
634 		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
635 	}
636 }
637 
638 /**
639  * Convert the @p item into a Verbs specification. This function assumes that
640  * the input is valid and that there is space to insert the requested item
641  * into the flow.
642  *
643  * @param[in] item
644  *   Item specification.
645  * @param[in, out] item_flags
646  *   Bit mask that marks all detected items.
647  * @param[in, out] dev_flow
648  *   Pointer to sepacific flow structure.
649  */
650 static void
651 flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
652 			      uint64_t *item_flags,
653 			      struct mlx5_flow *dev_flow)
654 {
655 	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
656 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
657 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
658 	struct ibv_flow_spec_tunnel tunnel = {
659 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
660 		.size = size,
661 	};
662 #else
663 	const struct rte_flow_item_gre *spec = item->spec;
664 	const struct rte_flow_item_gre *mask = item->mask;
665 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
666 	struct ibv_flow_spec_gre tunnel = {
667 		.type = IBV_FLOW_SPEC_GRE,
668 		.size = size,
669 	};
670 
671 	if (!mask)
672 		mask = &rte_flow_item_gre_mask;
673 	if (spec) {
674 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
675 		tunnel.val.protocol = spec->protocol;
676 		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
677 		tunnel.mask.protocol = mask->protocol;
678 		/* Remove unwanted bits from values. */
679 		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
680 		tunnel.val.protocol &= tunnel.mask.protocol;
681 		tunnel.val.key &= tunnel.mask.key;
682 	}
683 #endif
684 	if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
685 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
686 						       IBV_FLOW_SPEC_IPV4_EXT,
687 						       IPPROTO_GRE);
688 	else
689 		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
690 						       IBV_FLOW_SPEC_IPV6,
691 						       IPPROTO_GRE);
692 	flow_verbs_spec_add(dev_flow, &tunnel, size);
693 	verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
694 	*item_flags |= MLX5_FLOW_LAYER_GRE;
695 }
696 
697 /**
698  * Convert the @p action into a Verbs specification. This function assumes that
699  * the input is valid and that there is space to insert the requested action
700  * into the flow. This function also return the action that was added.
701  *
702  * @param[in] item
703  *   Item specification.
704  * @param[in, out] item_flags
705  *   Bit mask that marks all detected items.
706  * @param[in, out] dev_flow
707  *   Pointer to sepacific flow structure.
708  */
709 static void
710 flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
711 			       uint64_t *action_flags __rte_unused,
712 			       struct mlx5_flow *dev_flow __rte_unused)
713 {
714 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
715 	const struct rte_flow_item_mpls *spec = item->spec;
716 	const struct rte_flow_item_mpls *mask = item->mask;
717 	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
718 	struct ibv_flow_spec_mpls mpls = {
719 		.type = IBV_FLOW_SPEC_MPLS,
720 		.size = size,
721 	};
722 
723 	if (!mask)
724 		mask = &rte_flow_item_mpls_mask;
725 	if (spec) {
726 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
727 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
728 		/* Remove unwanted bits from values.  */
729 		mpls.val.label &= mpls.mask.label;
730 	}
731 	flow_verbs_spec_add(dev_flow, &mpls, size);
732 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
733 	*action_flags |= MLX5_FLOW_LAYER_MPLS;
734 #endif
735 }
736 
737 /**
738  * Convert the @p action into a Verbs specification. This function assumes that
739  * the input is valid and that there is space to insert the requested action
740  * into the flow. This function also return the action that was added.
741  *
742  * @param[in, out] action_flags
743  *   Pointer to the detected actions.
744  * @param[in] dev_flow
745  *   Pointer to mlx5_flow.
746  */
747 static void
748 flow_verbs_translate_action_drop(uint64_t *action_flags,
749 				 struct mlx5_flow *dev_flow)
750 {
751 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
752 	struct ibv_flow_spec_action_drop drop = {
753 			.type = IBV_FLOW_SPEC_ACTION_DROP,
754 			.size = size,
755 	};
756 
757 	flow_verbs_spec_add(dev_flow, &drop, size);
758 	*action_flags |= MLX5_FLOW_ACTION_DROP;
759 }
760 
761 /**
762  * Convert the @p action into a Verbs specification. This function assumes that
763  * the input is valid and that there is space to insert the requested action
764  * into the flow. This function also return the action that was added.
765  *
766  * @param[in] action
767  *   Action configuration.
768  * @param[in, out] action_flags
769  *   Pointer to the detected actions.
770  * @param[in] dev_flow
771  *   Pointer to mlx5_flow.
772  */
773 static void
774 flow_verbs_translate_action_queue(const struct rte_flow_action *action,
775 				  uint64_t *action_flags,
776 				  struct mlx5_flow *dev_flow)
777 {
778 	const struct rte_flow_action_queue *queue = action->conf;
779 	struct rte_flow *flow = dev_flow->flow;
780 
781 	if (flow->queue)
782 		(*flow->queue)[0] = queue->index;
783 	flow->rss.queue_num = 1;
784 	*action_flags |= MLX5_FLOW_ACTION_QUEUE;
785 }
786 
787 /**
788  * Convert the @p action into a Verbs specification. This function assumes that
789  * the input is valid and that there is space to insert the requested action
790  * into the flow. This function also return the action that was added.
791  *
792  * @param[in] action
793  *   Action configuration.
794  * @param[in, out] action_flags
795  *   Pointer to the detected actions.
796  * @param[in] dev_flow
797  *   Pointer to mlx5_flow.
798  */
799 static void
800 flow_verbs_translate_action_rss(const struct rte_flow_action *action,
801 				uint64_t *action_flags,
802 				struct mlx5_flow *dev_flow)
803 {
804 	const struct rte_flow_action_rss *rss = action->conf;
805 	struct rte_flow *flow = dev_flow->flow;
806 
807 	if (flow->queue)
808 		memcpy((*flow->queue), rss->queue,
809 		       rss->queue_num * sizeof(uint16_t));
810 	flow->rss.queue_num = rss->queue_num;
811 	memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
812 	flow->rss.types = rss->types;
813 	flow->rss.level = rss->level;
814 	*action_flags |= MLX5_FLOW_ACTION_RSS;
815 }
816 
817 /**
818  * Convert the @p action into a Verbs specification. This function assumes that
819  * the input is valid and that there is space to insert the requested action
820  * into the flow. This function also return the action that was added.
821  *
822  * @param[in] action
823  *   Action configuration.
824  * @param[in, out] action_flags
825  *   Pointer to the detected actions.
826  * @param[in] dev_flow
827  *   Pointer to mlx5_flow.
828  */
829 static void
830 flow_verbs_translate_action_flag
831 			(const struct rte_flow_action *action __rte_unused,
832 			 uint64_t *action_flags,
833 			 struct mlx5_flow *dev_flow)
834 {
835 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
836 	struct ibv_flow_spec_action_tag tag = {
837 		.type = IBV_FLOW_SPEC_ACTION_TAG,
838 		.size = size,
839 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
840 	};
841 	*action_flags |= MLX5_FLOW_ACTION_MARK;
842 	flow_verbs_spec_add(dev_flow, &tag, size);
843 }
844 
845 /**
846  * Update verbs specification to modify the flag to mark.
847  *
848  * @param[in, out] verbs
849  *   Pointer to the mlx5_flow_verbs structure.
850  * @param[in] mark_id
851  *   Mark identifier to replace the flag.
852  */
853 static void
854 flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
855 {
856 	struct ibv_spec_header *hdr;
857 	int i;
858 
859 	if (!verbs)
860 		return;
861 	/* Update Verbs specification. */
862 	hdr = (struct ibv_spec_header *)verbs->specs;
863 	if (!hdr)
864 		return;
865 	for (i = 0; i != verbs->attr->num_of_specs; ++i) {
866 		if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
867 			struct ibv_flow_spec_action_tag *t =
868 				(struct ibv_flow_spec_action_tag *)hdr;
869 
870 			t->tag_id = mlx5_flow_mark_set(mark_id);
871 		}
872 		hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
873 	}
874 }
875 
876 /**
877  * Convert the @p action into a Verbs specification. This function assumes that
878  * the input is valid and that there is space to insert the requested action
879  * into the flow. This function also return the action that was added.
880  *
881  * @param[in] action
882  *   Action configuration.
883  * @param[in, out] action_flags
884  *   Pointer to the detected actions.
885  * @param[in] dev_flow
886  *   Pointer to mlx5_flow.
887  */
888 static void
889 flow_verbs_translate_action_mark(const struct rte_flow_action *action,
890 				 uint64_t *action_flags,
891 				 struct mlx5_flow *dev_flow)
892 {
893 	const struct rte_flow_action_mark *mark = action->conf;
894 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
895 	struct ibv_flow_spec_action_tag tag = {
896 		.type = IBV_FLOW_SPEC_ACTION_TAG,
897 		.size = size,
898 	};
899 	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
900 
901 	if (*action_flags & MLX5_FLOW_ACTION_FLAG) {
902 		flow_verbs_mark_update(verbs, mark->id);
903 		size = 0;
904 	} else {
905 		tag.tag_id = mlx5_flow_mark_set(mark->id);
906 		flow_verbs_spec_add(dev_flow, &tag, size);
907 	}
908 	*action_flags |= MLX5_FLOW_ACTION_MARK;
909 }
910 
911 /**
912  * Convert the @p action into a Verbs specification. This function assumes that
913  * the input is valid and that there is space to insert the requested action
914  * into the flow. This function also return the action that was added.
915  *
916  * @param[in] dev
917  *   Pointer to the Ethernet device structure.
918  * @param[in] action
919  *   Action configuration.
920  * @param[in, out] action_flags
921  *   Pointer to the detected actions.
922  * @param[in] dev_flow
923  *   Pointer to mlx5_flow.
924  * @param[out] error
925  *   Pointer to error structure.
926  *
927  * @return
928  *   0 On success else a negative errno value is returned and rte_errno is set.
929  */
930 static int
931 flow_verbs_translate_action_count(struct rte_eth_dev *dev,
932 				  const struct rte_flow_action *action,
933 				  uint64_t *action_flags,
934 				  struct mlx5_flow *dev_flow,
935 				  struct rte_flow_error *error)
936 {
937 	const struct rte_flow_action_count *count = action->conf;
938 	struct rte_flow *flow = dev_flow->flow;
939 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
940 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
941 	struct ibv_flow_spec_counter_action counter = {
942 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
943 		.size = size,
944 	};
945 #endif
946 
947 	if (!flow->counter) {
948 		flow->counter = flow_verbs_counter_new(dev, count->shared,
949 						       count->id);
950 		if (!flow->counter)
951 			return rte_flow_error_set(error, rte_errno,
952 						  RTE_FLOW_ERROR_TYPE_ACTION,
953 						  action,
954 						  "cannot get counter"
955 						  " context.");
956 	}
957 	*action_flags |= MLX5_FLOW_ACTION_COUNT;
958 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
959 	counter.counter_set_handle = flow->counter->cs->handle;
960 	flow_verbs_spec_add(dev_flow, &counter, size);
961 #endif
962 	return 0;
963 }
964 
965 /**
966  * Internal validation function. For validating both actions and items.
967  *
968  * @param[in] dev
969  *   Pointer to the Ethernet device structure.
970  * @param[in] attr
971  *   Pointer to the flow attributes.
972  * @param[in] items
973  *   Pointer to the list of items.
974  * @param[in] actions
975  *   Pointer to the list of actions.
976  * @param[out] error
977  *   Pointer to the error structure.
978  *
979  * @return
980  *   0 on success, a negative errno value otherwise and rte_errno is set.
981  */
982 static int
983 flow_verbs_validate(struct rte_eth_dev *dev,
984 		    const struct rte_flow_attr *attr,
985 		    const struct rte_flow_item items[],
986 		    const struct rte_flow_action actions[],
987 		    struct rte_flow_error *error)
988 {
989 	int ret;
990 	uint32_t action_flags = 0;
991 	uint32_t item_flags = 0;
992 	int tunnel = 0;
993 	uint8_t next_protocol = 0xff;
994 
995 	if (items == NULL)
996 		return -1;
997 	ret = mlx5_flow_validate_attributes(dev, attr, error);
998 	if (ret < 0)
999 		return ret;
1000 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1001 		int ret = 0;
1002 		switch (items->type) {
1003 		case RTE_FLOW_ITEM_TYPE_VOID:
1004 			break;
1005 		case RTE_FLOW_ITEM_TYPE_ETH:
1006 			ret = mlx5_flow_validate_item_eth(items, item_flags,
1007 							  error);
1008 			if (ret < 0)
1009 				return ret;
1010 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1011 					       MLX5_FLOW_LAYER_OUTER_L2;
1012 			break;
1013 		case RTE_FLOW_ITEM_TYPE_VLAN:
1014 			ret = mlx5_flow_validate_item_vlan(items, item_flags,
1015 							   error);
1016 			if (ret < 0)
1017 				return ret;
1018 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1019 					       MLX5_FLOW_LAYER_OUTER_VLAN;
1020 			break;
1021 		case RTE_FLOW_ITEM_TYPE_IPV4:
1022 			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1023 							   error);
1024 			if (ret < 0)
1025 				return ret;
1026 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1027 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1028 			if (items->mask != NULL &&
1029 			    ((const struct rte_flow_item_ipv4 *)
1030 			     items->mask)->hdr.next_proto_id)
1031 				next_protocol =
1032 					((const struct rte_flow_item_ipv4 *)
1033 					 (items->spec))->hdr.next_proto_id;
1034 			break;
1035 		case RTE_FLOW_ITEM_TYPE_IPV6:
1036 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1037 							   error);
1038 			if (ret < 0)
1039 				return ret;
1040 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1041 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1042 			if (items->mask != NULL &&
1043 			    ((const struct rte_flow_item_ipv6 *)
1044 			     items->mask)->hdr.proto)
1045 				next_protocol =
1046 					((const struct rte_flow_item_ipv6 *)
1047 					 items->spec)->hdr.proto;
1048 			break;
1049 		case RTE_FLOW_ITEM_TYPE_UDP:
1050 			ret = mlx5_flow_validate_item_udp(items, item_flags,
1051 							  next_protocol,
1052 							  error);
1053 			if (ret < 0)
1054 				return ret;
1055 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1056 					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
1057 			break;
1058 		case RTE_FLOW_ITEM_TYPE_TCP:
1059 			ret = mlx5_flow_validate_item_tcp
1060 						(items, item_flags,
1061 						 next_protocol,
1062 						 &rte_flow_item_tcp_mask,
1063 						 error);
1064 			if (ret < 0)
1065 				return ret;
1066 			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1067 					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
1068 			break;
1069 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1070 			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1071 							    error);
1072 			if (ret < 0)
1073 				return ret;
1074 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
1075 			break;
1076 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1077 			ret = mlx5_flow_validate_item_vxlan_gpe(items,
1078 								item_flags,
1079 								dev, error);
1080 			if (ret < 0)
1081 				return ret;
1082 			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1083 			break;
1084 		case RTE_FLOW_ITEM_TYPE_GRE:
1085 			ret = mlx5_flow_validate_item_gre(items, item_flags,
1086 							  next_protocol, error);
1087 			if (ret < 0)
1088 				return ret;
1089 			item_flags |= MLX5_FLOW_LAYER_GRE;
1090 			break;
1091 		case RTE_FLOW_ITEM_TYPE_MPLS:
1092 			ret = mlx5_flow_validate_item_mpls(items, item_flags,
1093 							   next_protocol,
1094 							   error);
1095 			if (ret < 0)
1096 				return ret;
1097 			if (next_protocol != 0xff &&
1098 			    next_protocol != IPPROTO_MPLS)
1099 				return rte_flow_error_set
1100 					(error, EINVAL,
1101 					 RTE_FLOW_ERROR_TYPE_ITEM, items,
1102 					 "protocol filtering not compatible"
1103 					 " with MPLS layer");
1104 			item_flags |= MLX5_FLOW_LAYER_MPLS;
1105 			break;
1106 		default:
1107 			return rte_flow_error_set(error, ENOTSUP,
1108 						  RTE_FLOW_ERROR_TYPE_ITEM,
1109 						  NULL, "item not supported");
1110 		}
1111 	}
1112 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1113 		tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1114 		switch (actions->type) {
1115 		case RTE_FLOW_ACTION_TYPE_VOID:
1116 			break;
1117 		case RTE_FLOW_ACTION_TYPE_FLAG:
1118 			ret = mlx5_flow_validate_action_flag(action_flags,
1119 							     attr,
1120 							     error);
1121 			if (ret < 0)
1122 				return ret;
1123 			action_flags |= MLX5_FLOW_ACTION_FLAG;
1124 			break;
1125 		case RTE_FLOW_ACTION_TYPE_MARK:
1126 			ret = mlx5_flow_validate_action_mark(actions,
1127 							     action_flags,
1128 							     attr,
1129 							     error);
1130 			if (ret < 0)
1131 				return ret;
1132 			action_flags |= MLX5_FLOW_ACTION_MARK;
1133 			break;
1134 		case RTE_FLOW_ACTION_TYPE_DROP:
1135 			ret = mlx5_flow_validate_action_drop(action_flags,
1136 							     attr,
1137 							     error);
1138 			if (ret < 0)
1139 				return ret;
1140 			action_flags |= MLX5_FLOW_ACTION_DROP;
1141 			break;
1142 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1143 			ret = mlx5_flow_validate_action_queue(actions,
1144 							      action_flags, dev,
1145 							      attr,
1146 							      error);
1147 			if (ret < 0)
1148 				return ret;
1149 			action_flags |= MLX5_FLOW_ACTION_QUEUE;
1150 			break;
1151 		case RTE_FLOW_ACTION_TYPE_RSS:
1152 			ret = mlx5_flow_validate_action_rss(actions,
1153 							    action_flags, dev,
1154 							    attr,
1155 							    error);
1156 			if (ret < 0)
1157 				return ret;
1158 			action_flags |= MLX5_FLOW_ACTION_RSS;
1159 			break;
1160 		case RTE_FLOW_ACTION_TYPE_COUNT:
1161 			ret = mlx5_flow_validate_action_count(dev, attr, error);
1162 			if (ret < 0)
1163 				return ret;
1164 			action_flags |= MLX5_FLOW_ACTION_COUNT;
1165 			break;
1166 		default:
1167 			return rte_flow_error_set(error, ENOTSUP,
1168 						  RTE_FLOW_ERROR_TYPE_ACTION,
1169 						  actions,
1170 						  "action not supported");
1171 		}
1172 	}
1173 	if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1174 		return rte_flow_error_set(error, EINVAL,
1175 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
1176 					  "no fate action is found");
1177 	return 0;
1178 }
1179 
1180 /**
1181  * Calculate the required bytes that are needed for the action part of the verbs
1182  * flow, in addtion returns bit-fields with all the detected action, in order to
1183  * avoid another interation over the actions.
1184  *
1185  * @param[in] actions
1186  *   Pointer to the list of actions.
1187  * @param[out] action_flags
1188  *   Pointer to the detected actions.
1189  *
1190  * @return
1191  *   The size of the memory needed for all actions.
1192  */
1193 static int
1194 flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
1195 				uint64_t *action_flags)
1196 {
1197 	int size = 0;
1198 	uint64_t detected_actions = 0;
1199 
1200 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1201 		switch (actions->type) {
1202 		case RTE_FLOW_ACTION_TYPE_VOID:
1203 			break;
1204 		case RTE_FLOW_ACTION_TYPE_FLAG:
1205 			size += sizeof(struct ibv_flow_spec_action_tag);
1206 			detected_actions |= MLX5_FLOW_ACTION_FLAG;
1207 			break;
1208 		case RTE_FLOW_ACTION_TYPE_MARK:
1209 			size += sizeof(struct ibv_flow_spec_action_tag);
1210 			detected_actions |= MLX5_FLOW_ACTION_MARK;
1211 			break;
1212 		case RTE_FLOW_ACTION_TYPE_DROP:
1213 			size += sizeof(struct ibv_flow_spec_action_drop);
1214 			detected_actions |= MLX5_FLOW_ACTION_DROP;
1215 			break;
1216 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1217 			detected_actions |= MLX5_FLOW_ACTION_QUEUE;
1218 			break;
1219 		case RTE_FLOW_ACTION_TYPE_RSS:
1220 			detected_actions |= MLX5_FLOW_ACTION_RSS;
1221 			break;
1222 		case RTE_FLOW_ACTION_TYPE_COUNT:
1223 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1224 			size += sizeof(struct ibv_flow_spec_counter_action);
1225 #endif
1226 			detected_actions |= MLX5_FLOW_ACTION_COUNT;
1227 			break;
1228 		default:
1229 			break;
1230 		}
1231 	}
1232 	*action_flags = detected_actions;
1233 	return size;
1234 }
1235 
1236 /**
1237  * Calculate the required bytes that are needed for the item part of the verbs
1238  * flow, in addtion returns bit-fields with all the detected action, in order to
1239  * avoid another interation over the actions.
1240  *
1241  * @param[in] actions
1242  *   Pointer to the list of items.
1243  * @param[in, out] item_flags
1244  *   Pointer to the detected items.
1245  *
1246  * @return
1247  *   The size of the memory needed for all items.
1248  */
1249 static int
1250 flow_verbs_get_items_and_size(const struct rte_flow_item items[],
1251 			      uint64_t *item_flags)
1252 {
1253 	int size = 0;
1254 	uint64_t detected_items = 0;
1255 	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
1256 
1257 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1258 		switch (items->type) {
1259 		case RTE_FLOW_ITEM_TYPE_VOID:
1260 			break;
1261 		case RTE_FLOW_ITEM_TYPE_ETH:
1262 			size += sizeof(struct ibv_flow_spec_eth);
1263 			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1264 						   MLX5_FLOW_LAYER_OUTER_L2;
1265 			break;
1266 		case RTE_FLOW_ITEM_TYPE_VLAN:
1267 			size += sizeof(struct ibv_flow_spec_eth);
1268 			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1269 						   MLX5_FLOW_LAYER_OUTER_VLAN;
1270 			break;
1271 		case RTE_FLOW_ITEM_TYPE_IPV4:
1272 			size += sizeof(struct ibv_flow_spec_ipv4_ext);
1273 			detected_items |= tunnel ?
1274 					  MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1275 					  MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1276 			break;
1277 		case RTE_FLOW_ITEM_TYPE_IPV6:
1278 			size += sizeof(struct ibv_flow_spec_ipv6);
1279 			detected_items |= tunnel ?
1280 					  MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1281 					  MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1282 			break;
1283 		case RTE_FLOW_ITEM_TYPE_UDP:
1284 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1285 			detected_items |= tunnel ?
1286 					  MLX5_FLOW_LAYER_INNER_L4_UDP :
1287 					  MLX5_FLOW_LAYER_OUTER_L4_UDP;
1288 			break;
1289 		case RTE_FLOW_ITEM_TYPE_TCP:
1290 			size += sizeof(struct ibv_flow_spec_tcp_udp);
1291 			detected_items |= tunnel ?
1292 					  MLX5_FLOW_LAYER_INNER_L4_TCP :
1293 					  MLX5_FLOW_LAYER_OUTER_L4_TCP;
1294 			break;
1295 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1296 			size += sizeof(struct ibv_flow_spec_tunnel);
1297 			detected_items |= MLX5_FLOW_LAYER_VXLAN;
1298 			break;
1299 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1300 			size += sizeof(struct ibv_flow_spec_tunnel);
1301 			detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
1302 			break;
1303 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1304 		case RTE_FLOW_ITEM_TYPE_GRE:
1305 			size += sizeof(struct ibv_flow_spec_gre);
1306 			detected_items |= MLX5_FLOW_LAYER_GRE;
1307 			break;
1308 		case RTE_FLOW_ITEM_TYPE_MPLS:
1309 			size += sizeof(struct ibv_flow_spec_mpls);
1310 			detected_items |= MLX5_FLOW_LAYER_MPLS;
1311 			break;
1312 #else
1313 		case RTE_FLOW_ITEM_TYPE_GRE:
1314 			size += sizeof(struct ibv_flow_spec_tunnel);
1315 			detected_items |= MLX5_FLOW_LAYER_TUNNEL;
1316 			break;
1317 #endif
1318 		default:
1319 			break;
1320 		}
1321 	}
1322 	*item_flags = detected_items;
1323 	return size;
1324 }
1325 
1326 /**
1327  * Internal preparation function. Allocate mlx5_flow with the required size.
1328  * The required size is calculate based on the actions and items. This function
1329  * also returns the detected actions and items for later use.
1330  *
1331  * @param[in] attr
1332  *   Pointer to the flow attributes.
1333  * @param[in] items
1334  *   Pointer to the list of items.
1335  * @param[in] actions
1336  *   Pointer to the list of actions.
1337  * @param[out] item_flags
1338  *   Pointer to bit mask of all items detected.
1339  * @param[out] action_flags
1340  *   Pointer to bit mask of all actions detected.
1341  * @param[out] error
1342  *   Pointer to the error structure.
1343  *
1344  * @return
1345  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1346  *   is set.
1347  */
1348 static struct mlx5_flow *
1349 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1350 		   const struct rte_flow_item items[],
1351 		   const struct rte_flow_action actions[],
1352 		   uint64_t *item_flags,
1353 		   uint64_t *action_flags,
1354 		   struct rte_flow_error *error)
1355 {
1356 	uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1357 	struct mlx5_flow *flow;
1358 
1359 	size += flow_verbs_get_actions_and_size(actions, action_flags);
1360 	size += flow_verbs_get_items_and_size(items, item_flags);
1361 	flow = rte_calloc(__func__, 1, size, 0);
1362 	if (!flow) {
1363 		rte_flow_error_set(error, ENOMEM,
1364 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1365 				   "not enough memory to create flow");
1366 		return NULL;
1367 	}
1368 	flow->verbs.attr = (void *)(flow + 1);
1369 	flow->verbs.specs =
1370 		(uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1371 	return flow;
1372 }
1373 
1374 /**
1375  * Fill the flow with verb spec.
1376  *
1377  * @param[in] dev
1378  *   Pointer to Ethernet device.
1379  * @param[in, out] dev_flow
1380  *   Pointer to the mlx5 flow.
1381  * @param[in] attr
1382  *   Pointer to the flow attributes.
1383  * @param[in] items
1384  *   Pointer to the list of items.
1385  * @param[in] actions
1386  *   Pointer to the list of actions.
1387  * @param[out] error
1388  *   Pointer to the error structure.
1389  *
1390  * @return
1391  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1392  */
1393 static int
1394 flow_verbs_translate(struct rte_eth_dev *dev,
1395 		     struct mlx5_flow *dev_flow,
1396 		     const struct rte_flow_attr *attr,
1397 		     const struct rte_flow_item items[],
1398 		     const struct rte_flow_action actions[],
1399 		     struct rte_flow_error *error)
1400 {
1401 	uint64_t action_flags = 0;
1402 	uint64_t item_flags = 0;
1403 	uint64_t priority = attr->priority;
1404 	struct priv *priv = dev->data->dev_private;
1405 
1406 	if (priority == MLX5_FLOW_PRIO_RSVD)
1407 		priority = priv->config.flow_prio - 1;
1408 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1409 		int ret;
1410 		switch (actions->type) {
1411 		case RTE_FLOW_ACTION_TYPE_VOID:
1412 			break;
1413 		case RTE_FLOW_ACTION_TYPE_FLAG:
1414 			flow_verbs_translate_action_flag(actions,
1415 							 &action_flags,
1416 							 dev_flow);
1417 			break;
1418 		case RTE_FLOW_ACTION_TYPE_MARK:
1419 			flow_verbs_translate_action_mark(actions,
1420 							 &action_flags,
1421 							 dev_flow);
1422 			break;
1423 		case RTE_FLOW_ACTION_TYPE_DROP:
1424 			flow_verbs_translate_action_drop(&action_flags,
1425 							 dev_flow);
1426 			break;
1427 		case RTE_FLOW_ACTION_TYPE_QUEUE:
1428 			flow_verbs_translate_action_queue(actions,
1429 							  &action_flags,
1430 							  dev_flow);
1431 			break;
1432 		case RTE_FLOW_ACTION_TYPE_RSS:
1433 			flow_verbs_translate_action_rss(actions,
1434 							&action_flags,
1435 							dev_flow);
1436 			break;
1437 		case RTE_FLOW_ACTION_TYPE_COUNT:
1438 			ret = flow_verbs_translate_action_count(dev,
1439 								actions,
1440 								&action_flags,
1441 								dev_flow,
1442 								error);
1443 			if (ret < 0)
1444 				return ret;
1445 			break;
1446 		default:
1447 			return rte_flow_error_set(error, ENOTSUP,
1448 						  RTE_FLOW_ERROR_TYPE_ACTION,
1449 						  actions,
1450 						  "action not supported");
1451 		}
1452 	}
1453 	dev_flow->flow->actions |= action_flags;
1454 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1455 		switch (items->type) {
1456 		case RTE_FLOW_ITEM_TYPE_VOID:
1457 			break;
1458 		case RTE_FLOW_ITEM_TYPE_ETH:
1459 			flow_verbs_translate_item_eth(items, &item_flags,
1460 						      dev_flow);
1461 			break;
1462 		case RTE_FLOW_ITEM_TYPE_VLAN:
1463 			flow_verbs_translate_item_vlan(items, &item_flags,
1464 						       dev_flow);
1465 			break;
1466 		case RTE_FLOW_ITEM_TYPE_IPV4:
1467 			flow_verbs_translate_item_ipv4(items, &item_flags,
1468 						       dev_flow);
1469 			break;
1470 		case RTE_FLOW_ITEM_TYPE_IPV6:
1471 			flow_verbs_translate_item_ipv6(items, &item_flags,
1472 						       dev_flow);
1473 			break;
1474 		case RTE_FLOW_ITEM_TYPE_UDP:
1475 			flow_verbs_translate_item_udp(items, &item_flags,
1476 						      dev_flow);
1477 			break;
1478 		case RTE_FLOW_ITEM_TYPE_TCP:
1479 			flow_verbs_translate_item_tcp(items, &item_flags,
1480 						      dev_flow);
1481 			break;
1482 		case RTE_FLOW_ITEM_TYPE_VXLAN:
1483 			flow_verbs_translate_item_vxlan(items, &item_flags,
1484 							dev_flow);
1485 			break;
1486 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1487 			flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
1488 							    dev_flow);
1489 			break;
1490 		case RTE_FLOW_ITEM_TYPE_GRE:
1491 			flow_verbs_translate_item_gre(items, &item_flags,
1492 						      dev_flow);
1493 			break;
1494 		case RTE_FLOW_ITEM_TYPE_MPLS:
1495 			flow_verbs_translate_item_mpls(items, &item_flags,
1496 						       dev_flow);
1497 			break;
1498 		default:
1499 			return rte_flow_error_set(error, ENOTSUP,
1500 						  RTE_FLOW_ERROR_TYPE_ITEM,
1501 						  NULL,
1502 						  "item not supported");
1503 		}
1504 	}
1505 	dev_flow->verbs.attr->priority =
1506 		mlx5_flow_adjust_priority(dev, priority,
1507 					  dev_flow->verbs.attr->priority);
1508 	return 0;
1509 }
1510 
1511 /**
1512  * Remove the flow from the NIC but keeps it in memory.
1513  *
1514  * @param[in] dev
1515  *   Pointer to the Ethernet device structure.
1516  * @param[in, out] flow
1517  *   Pointer to flow structure.
1518  */
1519 static void
1520 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1521 {
1522 	struct mlx5_flow_verbs *verbs;
1523 	struct mlx5_flow *dev_flow;
1524 
1525 	if (!flow)
1526 		return;
1527 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1528 		verbs = &dev_flow->verbs;
1529 		if (verbs->flow) {
1530 			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1531 			verbs->flow = NULL;
1532 		}
1533 		if (verbs->hrxq) {
1534 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1535 				mlx5_hrxq_drop_release(dev);
1536 			else
1537 				mlx5_hrxq_release(dev, verbs->hrxq);
1538 			verbs->hrxq = NULL;
1539 		}
1540 	}
1541 	if (flow->counter) {
1542 		flow_verbs_counter_release(flow->counter);
1543 		flow->counter = NULL;
1544 	}
1545 }
1546 
1547 /**
1548  * Remove the flow from the NIC and the memory.
1549  *
1550  * @param[in] dev
1551  *   Pointer to the Ethernet device structure.
1552  * @param[in, out] flow
1553  *   Pointer to flow structure.
1554  */
1555 static void
1556 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1557 {
1558 	struct mlx5_flow *dev_flow;
1559 
1560 	if (!flow)
1561 		return;
1562 	flow_verbs_remove(dev, flow);
1563 	while (!LIST_EMPTY(&flow->dev_flows)) {
1564 		dev_flow = LIST_FIRST(&flow->dev_flows);
1565 		LIST_REMOVE(dev_flow, next);
1566 		rte_free(dev_flow);
1567 	}
1568 }
1569 
1570 /**
1571  * Apply the flow to the NIC.
1572  *
1573  * @param[in] dev
1574  *   Pointer to the Ethernet device structure.
1575  * @param[in, out] flow
1576  *   Pointer to flow structure.
1577  * @param[out] error
1578  *   Pointer to error structure.
1579  *
1580  * @return
1581  *   0 on success, a negative errno value otherwise and rte_errno is set.
1582  */
1583 static int
1584 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1585 		 struct rte_flow_error *error)
1586 {
1587 	struct mlx5_flow_verbs *verbs;
1588 	struct mlx5_flow *dev_flow;
1589 	int err;
1590 
1591 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1592 		verbs = &dev_flow->verbs;
1593 		if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1594 			verbs->hrxq = mlx5_hrxq_drop_new(dev);
1595 			if (!verbs->hrxq) {
1596 				rte_flow_error_set
1597 					(error, errno,
1598 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1599 					 "cannot get drop hash queue");
1600 				goto error;
1601 			}
1602 		} else {
1603 			struct mlx5_hrxq *hrxq;
1604 
1605 			hrxq = mlx5_hrxq_get(dev, flow->key,
1606 					     MLX5_RSS_HASH_KEY_LEN,
1607 					     verbs->hash_fields,
1608 					     (*flow->queue),
1609 					     flow->rss.queue_num);
1610 			if (!hrxq)
1611 				hrxq = mlx5_hrxq_new(dev, flow->key,
1612 						     MLX5_RSS_HASH_KEY_LEN,
1613 						     verbs->hash_fields,
1614 						     (*flow->queue),
1615 						     flow->rss.queue_num,
1616 						     !!(flow->layers &
1617 						      MLX5_FLOW_LAYER_TUNNEL));
1618 			if (!hrxq) {
1619 				rte_flow_error_set
1620 					(error, rte_errno,
1621 					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1622 					 "cannot get hash queue");
1623 				goto error;
1624 			}
1625 			verbs->hrxq = hrxq;
1626 		}
1627 		verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1628 						     verbs->attr);
1629 		if (!verbs->flow) {
1630 			rte_flow_error_set(error, errno,
1631 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1632 					   NULL,
1633 					   "hardware refuses to create flow");
1634 			goto error;
1635 		}
1636 	}
1637 	return 0;
1638 error:
1639 	err = rte_errno; /* Save rte_errno before cleanup. */
1640 	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1641 		verbs = &dev_flow->verbs;
1642 		if (verbs->hrxq) {
1643 			if (flow->actions & MLX5_FLOW_ACTION_DROP)
1644 				mlx5_hrxq_drop_release(dev);
1645 			else
1646 				mlx5_hrxq_release(dev, verbs->hrxq);
1647 			verbs->hrxq = NULL;
1648 		}
1649 	}
1650 	rte_errno = err; /* Restore rte_errno. */
1651 	return -rte_errno;
1652 }
1653 
1654 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1655 	.validate = flow_verbs_validate,
1656 	.prepare = flow_verbs_prepare,
1657 	.translate = flow_verbs_translate,
1658 	.apply = flow_verbs_apply,
1659 	.remove = flow_verbs_remove,
1660 	.destroy = flow_verbs_destroy,
1661 };
1662