xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision b79e4c00af0e7cfb8601ab0208659d226b82bd10)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/queue.h>
35 #include <string.h>
36 
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46 
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51 
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54 
55 /* Number of Work Queue necessary for the DROP queue. */
56 #ifndef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
57 #define MLX5_DROP_WQ_N 4
58 #else
59 #define MLX5_DROP_WQ_N 1
60 #endif
61 
62 static int
63 mlx5_flow_create_eth(const struct rte_flow_item *item,
64 		     const void *default_mask,
65 		     void *data);
66 
67 static int
68 mlx5_flow_create_vlan(const struct rte_flow_item *item,
69 		      const void *default_mask,
70 		      void *data);
71 
72 static int
73 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
74 		      const void *default_mask,
75 		      void *data);
76 
77 static int
78 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
79 		      const void *default_mask,
80 		      void *data);
81 
82 static int
83 mlx5_flow_create_udp(const struct rte_flow_item *item,
84 		     const void *default_mask,
85 		     void *data);
86 
87 static int
88 mlx5_flow_create_tcp(const struct rte_flow_item *item,
89 		     const void *default_mask,
90 		     void *data);
91 
92 static int
93 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
94 		       const void *default_mask,
95 		       void *data);
96 
97 struct rte_flow {
98 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
99 	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
100 	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
101 	struct ibv_qp *qp; /**< Verbs queue pair. */
102 	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
103 	struct ibv_exp_wq *wq; /**< Verbs work queue. */
104 	struct ibv_cq *cq; /**< Verbs completion queue. */
105 	uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
106 	uint32_t mark:1; /**< Set if the flow is marked. */
107 	uint32_t drop:1; /**< Drop queue. */
108 	uint64_t hash_fields; /**< Fields that participate in the hash. */
109 	struct rxq *rxqs[]; /**< Pointer to the queues array. */
110 };
111 
112 /** Static initializer for items. */
113 #define ITEMS(...) \
114 	(const enum rte_flow_item_type []){ \
115 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
116 	}
117 
118 /** Structure to generate a simple graph of layers supported by the NIC. */
119 struct mlx5_flow_items {
120 	/** List of possible actions for these items. */
121 	const enum rte_flow_action_type *const actions;
122 	/** Bit-masks corresponding to the possibilities for the item. */
123 	const void *mask;
124 	/**
125 	 * Default bit-masks to use when item->mask is not provided. When
126 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
127 	 * used instead.
128 	 */
129 	const void *default_mask;
130 	/** Bit-masks size in bytes. */
131 	const unsigned int mask_sz;
132 	/**
133 	 * Conversion function from rte_flow to NIC specific flow.
134 	 *
135 	 * @param item
136 	 *   rte_flow item to convert.
137 	 * @param default_mask
138 	 *   Default bit-masks to use when item->mask is not provided.
139 	 * @param data
140 	 *   Internal structure to store the conversion.
141 	 *
142 	 * @return
143 	 *   0 on success, negative value otherwise.
144 	 */
145 	int (*convert)(const struct rte_flow_item *item,
146 		       const void *default_mask,
147 		       void *data);
148 	/** Size in bytes of the destination structure. */
149 	const unsigned int dst_sz;
150 	/** List of possible following items.  */
151 	const enum rte_flow_item_type *const items;
152 };
153 
154 /** Valid action for this PMD. */
155 static const enum rte_flow_action_type valid_actions[] = {
156 	RTE_FLOW_ACTION_TYPE_DROP,
157 	RTE_FLOW_ACTION_TYPE_QUEUE,
158 	RTE_FLOW_ACTION_TYPE_MARK,
159 	RTE_FLOW_ACTION_TYPE_FLAG,
160 	RTE_FLOW_ACTION_TYPE_END,
161 };
162 
163 /** Graph of supported items and associated actions. */
164 static const struct mlx5_flow_items mlx5_flow_items[] = {
165 	[RTE_FLOW_ITEM_TYPE_END] = {
166 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
167 			       RTE_FLOW_ITEM_TYPE_VXLAN),
168 	},
169 	[RTE_FLOW_ITEM_TYPE_ETH] = {
170 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
171 			       RTE_FLOW_ITEM_TYPE_IPV4,
172 			       RTE_FLOW_ITEM_TYPE_IPV6),
173 		.actions = valid_actions,
174 		.mask = &(const struct rte_flow_item_eth){
175 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
176 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
177 			.type = -1,
178 		},
179 		.default_mask = &rte_flow_item_eth_mask,
180 		.mask_sz = sizeof(struct rte_flow_item_eth),
181 		.convert = mlx5_flow_create_eth,
182 		.dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
183 	},
184 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
185 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
186 			       RTE_FLOW_ITEM_TYPE_IPV6),
187 		.actions = valid_actions,
188 		.mask = &(const struct rte_flow_item_vlan){
189 			.tci = -1,
190 		},
191 		.default_mask = &rte_flow_item_vlan_mask,
192 		.mask_sz = sizeof(struct rte_flow_item_vlan),
193 		.convert = mlx5_flow_create_vlan,
194 		.dst_sz = 0,
195 	},
196 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
197 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
198 			       RTE_FLOW_ITEM_TYPE_TCP),
199 		.actions = valid_actions,
200 		.mask = &(const struct rte_flow_item_ipv4){
201 			.hdr = {
202 				.src_addr = -1,
203 				.dst_addr = -1,
204 				.type_of_service = -1,
205 				.next_proto_id = -1,
206 			},
207 		},
208 		.default_mask = &rte_flow_item_ipv4_mask,
209 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
210 		.convert = mlx5_flow_create_ipv4,
211 		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
212 	},
213 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
214 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
215 			       RTE_FLOW_ITEM_TYPE_TCP),
216 		.actions = valid_actions,
217 		.mask = &(const struct rte_flow_item_ipv6){
218 			.hdr = {
219 				.src_addr = {
220 					0xff, 0xff, 0xff, 0xff,
221 					0xff, 0xff, 0xff, 0xff,
222 					0xff, 0xff, 0xff, 0xff,
223 					0xff, 0xff, 0xff, 0xff,
224 				},
225 				.dst_addr = {
226 					0xff, 0xff, 0xff, 0xff,
227 					0xff, 0xff, 0xff, 0xff,
228 					0xff, 0xff, 0xff, 0xff,
229 					0xff, 0xff, 0xff, 0xff,
230 				},
231 				.vtc_flow = -1,
232 				.proto = -1,
233 				.hop_limits = -1,
234 			},
235 		},
236 		.default_mask = &rte_flow_item_ipv6_mask,
237 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
238 		.convert = mlx5_flow_create_ipv6,
239 		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
240 	},
241 	[RTE_FLOW_ITEM_TYPE_UDP] = {
242 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
243 		.actions = valid_actions,
244 		.mask = &(const struct rte_flow_item_udp){
245 			.hdr = {
246 				.src_port = -1,
247 				.dst_port = -1,
248 			},
249 		},
250 		.default_mask = &rte_flow_item_udp_mask,
251 		.mask_sz = sizeof(struct rte_flow_item_udp),
252 		.convert = mlx5_flow_create_udp,
253 		.dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
254 	},
255 	[RTE_FLOW_ITEM_TYPE_TCP] = {
256 		.actions = valid_actions,
257 		.mask = &(const struct rte_flow_item_tcp){
258 			.hdr = {
259 				.src_port = -1,
260 				.dst_port = -1,
261 			},
262 		},
263 		.default_mask = &rte_flow_item_tcp_mask,
264 		.mask_sz = sizeof(struct rte_flow_item_tcp),
265 		.convert = mlx5_flow_create_tcp,
266 		.dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
267 	},
268 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
269 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
270 		.actions = valid_actions,
271 		.mask = &(const struct rte_flow_item_vxlan){
272 			.vni = "\xff\xff\xff",
273 		},
274 		.default_mask = &rte_flow_item_vxlan_mask,
275 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
276 		.convert = mlx5_flow_create_vxlan,
277 		.dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
278 	},
279 };
280 
281 /** Structure to pass to the conversion function. */
282 struct mlx5_flow {
283 	struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
284 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
285 	uint32_t inner; /**< Set once VXLAN is encountered. */
286 	uint64_t hash_fields; /**< Fields that participate in the hash. */
287 };
288 
289 /** Structure for Drop queue. */
290 struct rte_flow_drop {
291 	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
292 	struct ibv_qp *qp; /**< Verbs queue pair. */
293 	struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
294 	struct ibv_cq *cq; /**< Verbs completion queue. */
295 };
296 
297 struct mlx5_flow_action {
298 	uint32_t queue:1; /**< Target is a receive queue. */
299 	uint32_t drop:1; /**< Target is a drop queue. */
300 	uint32_t mark:1; /**< Mark is present in the flow. */
301 	uint32_t mark_id; /**< Mark identifier. */
302 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
303 	uint16_t queues_n; /**< Number of entries in queue[]. */
304 };
305 
306 /**
307  * Check support for a given item.
308  *
309  * @param item[in]
310  *   Item specification.
311  * @param mask[in]
312  *   Bit-masks covering supported fields to compare with spec, last and mask in
313  *   \item.
314  * @param size
315  *   Bit-Mask size in bytes.
316  *
317  * @return
318  *   0 on success.
319  */
320 static int
321 mlx5_flow_item_validate(const struct rte_flow_item *item,
322 			const uint8_t *mask, unsigned int size)
323 {
324 	int ret = 0;
325 
326 	if (!item->spec && (item->mask || item->last))
327 		return -1;
328 	if (item->spec && !item->mask) {
329 		unsigned int i;
330 		const uint8_t *spec = item->spec;
331 
332 		for (i = 0; i < size; ++i)
333 			if ((spec[i] | mask[i]) != mask[i])
334 				return -1;
335 	}
336 	if (item->last && !item->mask) {
337 		unsigned int i;
338 		const uint8_t *spec = item->last;
339 
340 		for (i = 0; i < size; ++i)
341 			if ((spec[i] | mask[i]) != mask[i])
342 				return -1;
343 	}
344 	if (item->mask) {
345 		unsigned int i;
346 		const uint8_t *spec = item->mask;
347 
348 		for (i = 0; i < size; ++i)
349 			if ((spec[i] | mask[i]) != mask[i])
350 				return -1;
351 	}
352 	if (item->spec && item->last) {
353 		uint8_t spec[size];
354 		uint8_t last[size];
355 		const uint8_t *apply = mask;
356 		unsigned int i;
357 
358 		if (item->mask)
359 			apply = item->mask;
360 		for (i = 0; i < size; ++i) {
361 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
362 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
363 		}
364 		ret = memcmp(spec, last, size);
365 	}
366 	return ret;
367 }
368 
369 /**
370  * Validate a flow supported by the NIC.
371  *
372  * @param priv
373  *   Pointer to private structure.
374  * @param[in] attr
375  *   Flow rule attributes.
376  * @param[in] pattern
377  *   Pattern specification (list terminated by the END pattern item).
378  * @param[in] actions
379  *   Associated actions (list terminated by the END action).
380  * @param[out] error
381  *   Perform verbose error reporting if not NULL.
382  * @param[in, out] flow
383  *   Flow structure to update.
384  * @param[in, out] action
385  *   Action structure to update.
386  *
387  * @return
388  *   0 on success, a negative errno value otherwise and rte_errno is set.
389  */
390 static int
391 priv_flow_validate(struct priv *priv,
392 		   const struct rte_flow_attr *attr,
393 		   const struct rte_flow_item items[],
394 		   const struct rte_flow_action actions[],
395 		   struct rte_flow_error *error,
396 		   struct mlx5_flow *flow,
397 		   struct mlx5_flow_action *action)
398 {
399 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
400 
401 	(void)priv;
402 	if (attr->group) {
403 		rte_flow_error_set(error, ENOTSUP,
404 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
405 				   NULL,
406 				   "groups are not supported");
407 		return -rte_errno;
408 	}
409 	if (attr->priority) {
410 		rte_flow_error_set(error, ENOTSUP,
411 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
412 				   NULL,
413 				   "priorities are not supported");
414 		return -rte_errno;
415 	}
416 	if (attr->egress) {
417 		rte_flow_error_set(error, ENOTSUP,
418 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
419 				   NULL,
420 				   "egress is not supported");
421 		return -rte_errno;
422 	}
423 	if (!attr->ingress) {
424 		rte_flow_error_set(error, ENOTSUP,
425 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
426 				   NULL,
427 				   "only ingress is supported");
428 		return -rte_errno;
429 	}
430 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
431 		const struct mlx5_flow_items *token = NULL;
432 		unsigned int i;
433 		int err;
434 
435 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
436 			continue;
437 		for (i = 0;
438 		     cur_item->items &&
439 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
440 		     ++i) {
441 			if (cur_item->items[i] == items->type) {
442 				token = &mlx5_flow_items[items->type];
443 				break;
444 			}
445 		}
446 		if (!token)
447 			goto exit_item_not_supported;
448 		cur_item = token;
449 		err = mlx5_flow_item_validate(items,
450 					      (const uint8_t *)cur_item->mask,
451 					      cur_item->mask_sz);
452 		if (err)
453 			goto exit_item_not_supported;
454 		if (flow->ibv_attr && cur_item->convert) {
455 			err = cur_item->convert(items,
456 						(cur_item->default_mask ?
457 						 cur_item->default_mask :
458 						 cur_item->mask),
459 						flow);
460 			if (err)
461 				goto exit_item_not_supported;
462 		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
463 			if (flow->inner) {
464 				rte_flow_error_set(error, ENOTSUP,
465 						   RTE_FLOW_ERROR_TYPE_ITEM,
466 						   items,
467 						   "cannot recognize multiple"
468 						   " VXLAN encapsulations");
469 				return -rte_errno;
470 			}
471 			flow->inner = 1;
472 		}
473 		flow->offset += cur_item->dst_sz;
474 	}
475 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
476 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
477 			continue;
478 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
479 			action->drop = 1;
480 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
481 			const struct rte_flow_action_queue *queue =
482 				(const struct rte_flow_action_queue *)
483 				actions->conf;
484 			uint16_t n;
485 			uint16_t found = 0;
486 
487 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
488 				goto exit_action_not_supported;
489 			for (n = 0; n < action->queues_n; ++n) {
490 				if (action->queues[n] == queue->index) {
491 					found = 1;
492 					break;
493 				}
494 			}
495 			if (action->queues_n > 1 && !found) {
496 				rte_flow_error_set(error, ENOTSUP,
497 					   RTE_FLOW_ERROR_TYPE_ACTION,
498 					   actions,
499 					   "queue action not in RSS queues");
500 				return -rte_errno;
501 			}
502 			if (!found) {
503 				action->queue = 1;
504 				action->queues_n = 1;
505 				action->queues[0] = queue->index;
506 			}
507 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
508 			const struct rte_flow_action_rss *rss =
509 				(const struct rte_flow_action_rss *)
510 				actions->conf;
511 			uint16_t n;
512 
513 			if (!rss || !rss->num) {
514 				rte_flow_error_set(error, EINVAL,
515 						   RTE_FLOW_ERROR_TYPE_ACTION,
516 						   actions,
517 						   "no valid queues");
518 				return -rte_errno;
519 			}
520 			if (action->queues_n == 1) {
521 				uint16_t found = 0;
522 
523 				assert(action->queues_n);
524 				for (n = 0; n < rss->num; ++n) {
525 					if (action->queues[0] ==
526 					    rss->queue[n]) {
527 						found = 1;
528 						break;
529 					}
530 				}
531 				if (!found) {
532 					rte_flow_error_set(error, ENOTSUP,
533 						   RTE_FLOW_ERROR_TYPE_ACTION,
534 						   actions,
535 						   "queue action not in RSS"
536 						   " queues");
537 					return -rte_errno;
538 				}
539 			}
540 			for (n = 0; n < rss->num; ++n) {
541 				if (rss->queue[n] >= priv->rxqs_n) {
542 					rte_flow_error_set(error, EINVAL,
543 						   RTE_FLOW_ERROR_TYPE_ACTION,
544 						   actions,
545 						   "queue id > number of"
546 						   " queues");
547 					return -rte_errno;
548 				}
549 			}
550 			action->queue = 1;
551 			for (n = 0; n < rss->num; ++n)
552 				action->queues[n] = rss->queue[n];
553 			action->queues_n = rss->num;
554 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
555 			const struct rte_flow_action_mark *mark =
556 				(const struct rte_flow_action_mark *)
557 				actions->conf;
558 
559 			if (!mark) {
560 				rte_flow_error_set(error, EINVAL,
561 						   RTE_FLOW_ERROR_TYPE_ACTION,
562 						   actions,
563 						   "mark must be defined");
564 				return -rte_errno;
565 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
566 				rte_flow_error_set(error, ENOTSUP,
567 						   RTE_FLOW_ERROR_TYPE_ACTION,
568 						   actions,
569 						   "mark must be between 0"
570 						   " and 16777199");
571 				return -rte_errno;
572 			}
573 			action->mark = 1;
574 			action->mark_id = mark->id;
575 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
576 			action->mark = 1;
577 		} else {
578 			goto exit_action_not_supported;
579 		}
580 	}
581 	if (action->mark && !flow->ibv_attr && !action->drop)
582 		flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
583 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
584 	if (!flow->ibv_attr && action->drop)
585 		flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
586 #endif
587 	if (!action->queue && !action->drop) {
588 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
589 				   NULL, "no valid action");
590 		return -rte_errno;
591 	}
592 	return 0;
593 exit_item_not_supported:
594 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
595 			   items, "item not supported");
596 	return -rte_errno;
597 exit_action_not_supported:
598 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
599 			   actions, "action not supported");
600 	return -rte_errno;
601 }
602 
603 /**
604  * Validate a flow supported by the NIC.
605  *
606  * @see rte_flow_validate()
607  * @see rte_flow_ops
608  */
609 int
610 mlx5_flow_validate(struct rte_eth_dev *dev,
611 		   const struct rte_flow_attr *attr,
612 		   const struct rte_flow_item items[],
613 		   const struct rte_flow_action actions[],
614 		   struct rte_flow_error *error)
615 {
616 	struct priv *priv = dev->data->dev_private;
617 	int ret;
618 	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
619 	struct mlx5_flow_action action = {
620 		.queue = 0,
621 		.drop = 0,
622 		.mark = 0,
623 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
624 		.queues_n = 0,
625 	};
626 
627 	priv_lock(priv);
628 	ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
629 				 &action);
630 	priv_unlock(priv);
631 	return ret;
632 }
633 
634 /**
635  * Convert Ethernet item to Verbs specification.
636  *
637  * @param item[in]
638  *   Item specification.
639  * @param default_mask[in]
640  *   Default bit-masks to use when item->mask is not provided.
641  * @param data[in, out]
642  *   User structure.
643  */
644 static int
645 mlx5_flow_create_eth(const struct rte_flow_item *item,
646 		     const void *default_mask,
647 		     void *data)
648 {
649 	const struct rte_flow_item_eth *spec = item->spec;
650 	const struct rte_flow_item_eth *mask = item->mask;
651 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
652 	struct ibv_exp_flow_spec_eth *eth;
653 	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
654 	unsigned int i;
655 
656 	++flow->ibv_attr->num_of_specs;
657 	flow->ibv_attr->priority = 2;
658 	flow->hash_fields = 0;
659 	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
660 	*eth = (struct ibv_exp_flow_spec_eth) {
661 		.type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
662 		.size = eth_size,
663 	};
664 	if (!spec)
665 		return 0;
666 	if (!mask)
667 		mask = default_mask;
668 	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
669 	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
670 	eth->val.ether_type = spec->type;
671 	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
672 	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
673 	eth->mask.ether_type = mask->type;
674 	/* Remove unwanted bits from values. */
675 	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
676 		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
677 		eth->val.src_mac[i] &= eth->mask.src_mac[i];
678 	}
679 	eth->val.ether_type &= eth->mask.ether_type;
680 	return 0;
681 }
682 
683 /**
684  * Convert VLAN item to Verbs specification.
685  *
686  * @param item[in]
687  *   Item specification.
688  * @param default_mask[in]
689  *   Default bit-masks to use when item->mask is not provided.
690  * @param data[in, out]
691  *   User structure.
692  */
693 static int
694 mlx5_flow_create_vlan(const struct rte_flow_item *item,
695 		      const void *default_mask,
696 		      void *data)
697 {
698 	const struct rte_flow_item_vlan *spec = item->spec;
699 	const struct rte_flow_item_vlan *mask = item->mask;
700 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
701 	struct ibv_exp_flow_spec_eth *eth;
702 	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
703 
704 	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
705 	if (!spec)
706 		return 0;
707 	if (!mask)
708 		mask = default_mask;
709 	eth->val.vlan_tag = spec->tci;
710 	eth->mask.vlan_tag = mask->tci;
711 	eth->val.vlan_tag &= eth->mask.vlan_tag;
712 	return 0;
713 }
714 
715 /**
716  * Convert IPv4 item to Verbs specification.
717  *
718  * @param item[in]
719  *   Item specification.
720  * @param default_mask[in]
721  *   Default bit-masks to use when item->mask is not provided.
722  * @param data[in, out]
723  *   User structure.
724  */
725 static int
726 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
727 		      const void *default_mask,
728 		      void *data)
729 {
730 	const struct rte_flow_item_ipv4 *spec = item->spec;
731 	const struct rte_flow_item_ipv4 *mask = item->mask;
732 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
733 	struct ibv_exp_flow_spec_ipv4_ext *ipv4;
734 	unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
735 
736 	++flow->ibv_attr->num_of_specs;
737 	flow->ibv_attr->priority = 1;
738 	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
739 			     IBV_EXP_RX_HASH_DST_IPV4);
740 	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
741 	*ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
742 		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
743 		.size = ipv4_size,
744 	};
745 	if (!spec)
746 		return 0;
747 	if (!mask)
748 		mask = default_mask;
749 	ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
750 		.src_ip = spec->hdr.src_addr,
751 		.dst_ip = spec->hdr.dst_addr,
752 		.proto = spec->hdr.next_proto_id,
753 		.tos = spec->hdr.type_of_service,
754 	};
755 	ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
756 		.src_ip = mask->hdr.src_addr,
757 		.dst_ip = mask->hdr.dst_addr,
758 		.proto = mask->hdr.next_proto_id,
759 		.tos = mask->hdr.type_of_service,
760 	};
761 	/* Remove unwanted bits from values. */
762 	ipv4->val.src_ip &= ipv4->mask.src_ip;
763 	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
764 	ipv4->val.proto &= ipv4->mask.proto;
765 	ipv4->val.tos &= ipv4->mask.tos;
766 	return 0;
767 }
768 
769 /**
770  * Convert IPv6 item to Verbs specification.
771  *
772  * @param item[in]
773  *   Item specification.
774  * @param default_mask[in]
775  *   Default bit-masks to use when item->mask is not provided.
776  * @param data[in, out]
777  *   User structure.
778  */
779 static int
780 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
781 		      const void *default_mask,
782 		      void *data)
783 {
784 	const struct rte_flow_item_ipv6 *spec = item->spec;
785 	const struct rte_flow_item_ipv6 *mask = item->mask;
786 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
787 	struct ibv_exp_flow_spec_ipv6_ext *ipv6;
788 	unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
789 	unsigned int i;
790 
791 	++flow->ibv_attr->num_of_specs;
792 	flow->ibv_attr->priority = 1;
793 	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
794 			     IBV_EXP_RX_HASH_DST_IPV6);
795 	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
796 	*ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
797 		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
798 		.size = ipv6_size,
799 	};
800 	if (!spec)
801 		return 0;
802 	if (!mask)
803 		mask = default_mask;
804 	memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
805 	       RTE_DIM(ipv6->val.src_ip));
806 	memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
807 	       RTE_DIM(ipv6->val.dst_ip));
808 	memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
809 	       RTE_DIM(ipv6->mask.src_ip));
810 	memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
811 	       RTE_DIM(ipv6->mask.dst_ip));
812 	ipv6->mask.flow_label = mask->hdr.vtc_flow;
813 	ipv6->mask.next_hdr = mask->hdr.proto;
814 	ipv6->mask.hop_limit = mask->hdr.hop_limits;
815 	/* Remove unwanted bits from values. */
816 	for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
817 		ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
818 		ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
819 	}
820 	ipv6->val.flow_label &= ipv6->mask.flow_label;
821 	ipv6->val.next_hdr &= ipv6->mask.next_hdr;
822 	ipv6->val.hop_limit &= ipv6->mask.hop_limit;
823 	return 0;
824 }
825 
826 /**
827  * Convert UDP item to Verbs specification.
828  *
829  * @param item[in]
830  *   Item specification.
831  * @param default_mask[in]
832  *   Default bit-masks to use when item->mask is not provided.
833  * @param data[in, out]
834  *   User structure.
835  */
836 static int
837 mlx5_flow_create_udp(const struct rte_flow_item *item,
838 		     const void *default_mask,
839 		     void *data)
840 {
841 	const struct rte_flow_item_udp *spec = item->spec;
842 	const struct rte_flow_item_udp *mask = item->mask;
843 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
844 	struct ibv_exp_flow_spec_tcp_udp *udp;
845 	unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
846 
847 	++flow->ibv_attr->num_of_specs;
848 	flow->ibv_attr->priority = 0;
849 	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
850 			      IBV_EXP_RX_HASH_DST_PORT_UDP);
851 	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
852 	*udp = (struct ibv_exp_flow_spec_tcp_udp) {
853 		.type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
854 		.size = udp_size,
855 	};
856 	if (!spec)
857 		return 0;
858 	if (!mask)
859 		mask = default_mask;
860 	udp->val.dst_port = spec->hdr.dst_port;
861 	udp->val.src_port = spec->hdr.src_port;
862 	udp->mask.dst_port = mask->hdr.dst_port;
863 	udp->mask.src_port = mask->hdr.src_port;
864 	/* Remove unwanted bits from values. */
865 	udp->val.src_port &= udp->mask.src_port;
866 	udp->val.dst_port &= udp->mask.dst_port;
867 	return 0;
868 }
869 
870 /**
871  * Convert TCP item to Verbs specification.
872  *
873  * @param item[in]
874  *   Item specification.
875  * @param default_mask[in]
876  *   Default bit-masks to use when item->mask is not provided.
877  * @param data[in, out]
878  *   User structure.
879  */
880 static int
881 mlx5_flow_create_tcp(const struct rte_flow_item *item,
882 		     const void *default_mask,
883 		     void *data)
884 {
885 	const struct rte_flow_item_tcp *spec = item->spec;
886 	const struct rte_flow_item_tcp *mask = item->mask;
887 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
888 	struct ibv_exp_flow_spec_tcp_udp *tcp;
889 	unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
890 
891 	++flow->ibv_attr->num_of_specs;
892 	flow->ibv_attr->priority = 0;
893 	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
894 			      IBV_EXP_RX_HASH_DST_PORT_TCP);
895 	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
896 	*tcp = (struct ibv_exp_flow_spec_tcp_udp) {
897 		.type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
898 		.size = tcp_size,
899 	};
900 	if (!spec)
901 		return 0;
902 	if (!mask)
903 		mask = default_mask;
904 	tcp->val.dst_port = spec->hdr.dst_port;
905 	tcp->val.src_port = spec->hdr.src_port;
906 	tcp->mask.dst_port = mask->hdr.dst_port;
907 	tcp->mask.src_port = mask->hdr.src_port;
908 	/* Remove unwanted bits from values. */
909 	tcp->val.src_port &= tcp->mask.src_port;
910 	tcp->val.dst_port &= tcp->mask.dst_port;
911 	return 0;
912 }
913 
914 /**
915  * Convert VXLAN item to Verbs specification.
916  *
917  * @param item[in]
918  *   Item specification.
919  * @param default_mask[in]
920  *   Default bit-masks to use when item->mask is not provided.
921  * @param data[in, out]
922  *   User structure.
923  */
924 static int
925 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
926 		       const void *default_mask,
927 		       void *data)
928 {
929 	const struct rte_flow_item_vxlan *spec = item->spec;
930 	const struct rte_flow_item_vxlan *mask = item->mask;
931 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
932 	struct ibv_exp_flow_spec_tunnel *vxlan;
933 	unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
934 	union vni {
935 		uint32_t vlan_id;
936 		uint8_t vni[4];
937 	} id;
938 
939 	++flow->ibv_attr->num_of_specs;
940 	flow->ibv_attr->priority = 0;
941 	id.vni[0] = 0;
942 	vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
943 	*vxlan = (struct ibv_exp_flow_spec_tunnel) {
944 		.type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
945 		.size = size,
946 	};
947 	flow->inner = IBV_EXP_FLOW_SPEC_INNER;
948 	if (!spec)
949 		return 0;
950 	if (!mask)
951 		mask = default_mask;
952 	memcpy(&id.vni[1], spec->vni, 3);
953 	vxlan->val.tunnel_id = id.vlan_id;
954 	memcpy(&id.vni[1], mask->vni, 3);
955 	vxlan->mask.tunnel_id = id.vlan_id;
956 	/* Remove unwanted bits from values. */
957 	vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
958 	return 0;
959 }
960 
961 /**
962  * Convert mark/flag action to Verbs specification.
963  *
964  * @param flow
965  *   Pointer to MLX5 flow structure.
966  * @param mark_id
967  *   Mark identifier.
968  */
969 static int
970 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
971 {
972 	struct ibv_exp_flow_spec_action_tag *tag;
973 	unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
974 
975 	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
976 	*tag = (struct ibv_exp_flow_spec_action_tag){
977 		.type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
978 		.size = size,
979 		.tag_id = mlx5_flow_mark_set(mark_id),
980 	};
981 	++flow->ibv_attr->num_of_specs;
982 	return 0;
983 }
984 
985 /**
986  * Complete flow rule creation with a drop queue.
987  *
988  * @param priv
989  *   Pointer to private structure.
990  * @param flow
991  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
992  * @param[out] error
993  *   Perform verbose error reporting if not NULL.
994  *
995  * @return
996  *   A flow if the rule could be created.
997  */
998 static struct rte_flow *
999 priv_flow_create_action_queue_drop(struct priv *priv,
1000 				   struct mlx5_flow *flow,
1001 				   struct rte_flow_error *error)
1002 {
1003 	struct rte_flow *rte_flow;
1004 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
1005 	struct ibv_exp_flow_spec_action_drop *drop;
1006 	unsigned int size = sizeof(struct ibv_exp_flow_spec_action_drop);
1007 #endif
1008 
1009 	assert(priv->pd);
1010 	assert(priv->ctx);
1011 	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1012 	if (!rte_flow) {
1013 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1014 				   NULL, "cannot allocate flow memory");
1015 		return NULL;
1016 	}
1017 	rte_flow->drop = 1;
1018 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
1019 	drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1020 	*drop = (struct ibv_exp_flow_spec_action_drop){
1021 			.type = IBV_EXP_FLOW_SPEC_ACTION_DROP,
1022 			.size = size,
1023 	};
1024 	++flow->ibv_attr->num_of_specs;
1025 	flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
1026 #endif
1027 	rte_flow->ibv_attr = flow->ibv_attr;
1028 	if (!priv->started)
1029 		return rte_flow;
1030 	rte_flow->qp = priv->flow_drop_queue->qp;
1031 	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1032 						 rte_flow->ibv_attr);
1033 	if (!rte_flow->ibv_flow) {
1034 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1035 				   NULL, "flow rule creation failure");
1036 		goto error;
1037 	}
1038 	return rte_flow;
1039 error:
1040 	assert(rte_flow);
1041 	rte_free(rte_flow);
1042 	return NULL;
1043 }
1044 
1045 /**
1046  * Complete flow rule creation.
1047  *
1048  * @param priv
1049  *   Pointer to private structure.
1050  * @param flow
1051  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1052  * @param action
1053  *   Target action structure.
1054  * @param[out] error
1055  *   Perform verbose error reporting if not NULL.
1056  *
1057  * @return
1058  *   A flow if the rule could be created.
1059  */
1060 static struct rte_flow *
1061 priv_flow_create_action_queue(struct priv *priv,
1062 			      struct mlx5_flow *flow,
1063 			      struct mlx5_flow_action *action,
1064 			      struct rte_flow_error *error)
1065 {
1066 	struct rte_flow *rte_flow;
1067 	unsigned int i;
1068 	unsigned int j;
1069 	const unsigned int wqs_n = 1 << log2above(action->queues_n);
1070 	struct ibv_exp_wq *wqs[wqs_n];
1071 
1072 	assert(priv->pd);
1073 	assert(priv->ctx);
1074 	assert(!action->drop);
1075 	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1076 			      sizeof(*rte_flow->rxqs) * action->queues_n, 0);
1077 	if (!rte_flow) {
1078 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1079 				   NULL, "cannot allocate flow memory");
1080 		return NULL;
1081 	}
1082 	for (i = 0; i < action->queues_n; ++i) {
1083 		struct rxq_ctrl *rxq;
1084 
1085 		rxq = container_of((*priv->rxqs)[action->queues[i]],
1086 				   struct rxq_ctrl, rxq);
1087 		wqs[i] = rxq->wq;
1088 		rte_flow->rxqs[i] = &rxq->rxq;
1089 		++rte_flow->rxqs_n;
1090 		rxq->rxq.mark |= action->mark;
1091 	}
1092 	/* finalise indirection table. */
1093 	for (j = 0; i < wqs_n; ++i, ++j) {
1094 		wqs[i] = wqs[j];
1095 		if (j == action->queues_n)
1096 			j = 0;
1097 	}
1098 	rte_flow->mark = action->mark;
1099 	rte_flow->ibv_attr = flow->ibv_attr;
1100 	rte_flow->hash_fields = flow->hash_fields;
1101 	rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1102 		priv->ctx,
1103 		&(struct ibv_exp_rwq_ind_table_init_attr){
1104 			.pd = priv->pd,
1105 			.log_ind_tbl_size = log2above(action->queues_n),
1106 			.ind_tbl = wqs,
1107 			.comp_mask = 0,
1108 		});
1109 	if (!rte_flow->ind_table) {
1110 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1111 				   NULL, "cannot allocate indirection table");
1112 		goto error;
1113 	}
1114 	rte_flow->qp = ibv_exp_create_qp(
1115 		priv->ctx,
1116 		&(struct ibv_exp_qp_init_attr){
1117 			.qp_type = IBV_QPT_RAW_PACKET,
1118 			.comp_mask =
1119 				IBV_EXP_QP_INIT_ATTR_PD |
1120 				IBV_EXP_QP_INIT_ATTR_PORT |
1121 				IBV_EXP_QP_INIT_ATTR_RX_HASH,
1122 			.pd = priv->pd,
1123 			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1124 				.rx_hash_function =
1125 					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1126 				.rx_hash_key_len = rss_hash_default_key_len,
1127 				.rx_hash_key = rss_hash_default_key,
1128 				.rx_hash_fields_mask = rte_flow->hash_fields,
1129 				.rwq_ind_tbl = rte_flow->ind_table,
1130 			},
1131 			.port_num = priv->port,
1132 		});
1133 	if (!rte_flow->qp) {
1134 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1135 				   NULL, "cannot allocate QP");
1136 		goto error;
1137 	}
1138 	if (!priv->started)
1139 		return rte_flow;
1140 	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1141 						 rte_flow->ibv_attr);
1142 	if (!rte_flow->ibv_flow) {
1143 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1144 				   NULL, "flow rule creation failure");
1145 		goto error;
1146 	}
1147 	return rte_flow;
1148 error:
1149 	assert(rte_flow);
1150 	if (rte_flow->qp)
1151 		ibv_destroy_qp(rte_flow->qp);
1152 	if (rte_flow->ind_table)
1153 		ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1154 	rte_free(rte_flow);
1155 	return NULL;
1156 }
1157 
1158 /**
1159  * Convert a flow.
1160  *
1161  * @param priv
1162  *   Pointer to private structure.
1163  * @param[in] attr
1164  *   Flow rule attributes.
1165  * @param[in] pattern
1166  *   Pattern specification (list terminated by the END pattern item).
1167  * @param[in] actions
1168  *   Associated actions (list terminated by the END action).
1169  * @param[out] error
1170  *   Perform verbose error reporting if not NULL.
1171  *
1172  * @return
1173  *   A flow on success, NULL otherwise.
1174  */
1175 static struct rte_flow *
1176 priv_flow_create(struct priv *priv,
1177 		 const struct rte_flow_attr *attr,
1178 		 const struct rte_flow_item items[],
1179 		 const struct rte_flow_action actions[],
1180 		 struct rte_flow_error *error)
1181 {
1182 	struct rte_flow *rte_flow;
1183 	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1184 	struct mlx5_flow_action action = {
1185 		.queue = 0,
1186 		.drop = 0,
1187 		.mark = 0,
1188 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1189 		.queues_n = 0,
1190 	};
1191 	int err;
1192 
1193 	err = priv_flow_validate(priv, attr, items, actions, error, &flow,
1194 				 &action);
1195 	if (err)
1196 		goto exit;
1197 	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1198 	flow.offset = sizeof(struct ibv_exp_flow_attr);
1199 	if (!flow.ibv_attr) {
1200 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1201 				   NULL, "cannot allocate ibv_attr memory");
1202 		goto exit;
1203 	}
1204 	*flow.ibv_attr = (struct ibv_exp_flow_attr){
1205 		.type = IBV_EXP_FLOW_ATTR_NORMAL,
1206 		.size = sizeof(struct ibv_exp_flow_attr),
1207 		.priority = attr->priority,
1208 		.num_of_specs = 0,
1209 		.port = 0,
1210 		.flags = 0,
1211 		.reserved = 0,
1212 	};
1213 	flow.inner = 0;
1214 	flow.hash_fields = 0;
1215 	claim_zero(priv_flow_validate(priv, attr, items, actions,
1216 				      error, &flow, &action));
1217 	if (action.mark && !action.drop) {
1218 		mlx5_flow_create_flag_mark(&flow, action.mark_id);
1219 		flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1220 	}
1221 	if (action.drop)
1222 		rte_flow =
1223 			priv_flow_create_action_queue_drop(priv, &flow, error);
1224 	else
1225 		rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1226 							 error);
1227 	if (!rte_flow)
1228 		goto exit;
1229 	return rte_flow;
1230 exit:
1231 	rte_free(flow.ibv_attr);
1232 	return NULL;
1233 }
1234 
1235 /**
1236  * Create a flow.
1237  *
1238  * @see rte_flow_create()
1239  * @see rte_flow_ops
1240  */
1241 struct rte_flow *
1242 mlx5_flow_create(struct rte_eth_dev *dev,
1243 		 const struct rte_flow_attr *attr,
1244 		 const struct rte_flow_item items[],
1245 		 const struct rte_flow_action actions[],
1246 		 struct rte_flow_error *error)
1247 {
1248 	struct priv *priv = dev->data->dev_private;
1249 	struct rte_flow *flow;
1250 
1251 	priv_lock(priv);
1252 	flow = priv_flow_create(priv, attr, items, actions, error);
1253 	if (flow) {
1254 		TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1255 		DEBUG("Flow created %p", (void *)flow);
1256 	}
1257 	priv_unlock(priv);
1258 	return flow;
1259 }
1260 
1261 /**
1262  * Destroy a flow.
1263  *
1264  * @param priv
1265  *   Pointer to private structure.
1266  * @param[in] flow
1267  *   Flow to destroy.
1268  */
1269 static void
1270 priv_flow_destroy(struct priv *priv,
1271 		  struct rte_flow *flow)
1272 {
1273 	TAILQ_REMOVE(&priv->flows, flow, next);
1274 	if (flow->ibv_flow)
1275 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1276 	if (flow->drop)
1277 		goto free;
1278 	if (flow->qp)
1279 		claim_zero(ibv_destroy_qp(flow->qp));
1280 	if (flow->ind_table)
1281 		claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1282 	if (flow->drop && flow->wq)
1283 		claim_zero(ibv_exp_destroy_wq(flow->wq));
1284 	if (flow->drop && flow->cq)
1285 		claim_zero(ibv_destroy_cq(flow->cq));
1286 	if (flow->mark) {
1287 		struct rte_flow *tmp;
1288 		struct rxq *rxq;
1289 		uint32_t mark_n = 0;
1290 		uint32_t queue_n;
1291 
1292 		/*
1293 		 * To remove the mark from the queue, the queue must not be
1294 		 * present in any other marked flow (RSS or not).
1295 		 */
1296 		for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1297 			rxq = flow->rxqs[queue_n];
1298 			for (tmp = TAILQ_FIRST(&priv->flows);
1299 			     tmp;
1300 			     tmp = TAILQ_NEXT(tmp, next)) {
1301 				uint32_t tqueue_n;
1302 
1303 				if (tmp->drop)
1304 					continue;
1305 				for (tqueue_n = 0;
1306 				     tqueue_n < tmp->rxqs_n;
1307 				     ++tqueue_n) {
1308 					struct rxq *trxq;
1309 
1310 					trxq = tmp->rxqs[tqueue_n];
1311 					if (rxq == trxq)
1312 						++mark_n;
1313 				}
1314 			}
1315 			rxq->mark = !!mark_n;
1316 		}
1317 	}
1318 free:
1319 	rte_free(flow->ibv_attr);
1320 	DEBUG("Flow destroyed %p", (void *)flow);
1321 	rte_free(flow);
1322 }
1323 
1324 /**
1325  * Destroy a flow.
1326  *
1327  * @see rte_flow_destroy()
1328  * @see rte_flow_ops
1329  */
1330 int
1331 mlx5_flow_destroy(struct rte_eth_dev *dev,
1332 		  struct rte_flow *flow,
1333 		  struct rte_flow_error *error)
1334 {
1335 	struct priv *priv = dev->data->dev_private;
1336 
1337 	(void)error;
1338 	priv_lock(priv);
1339 	priv_flow_destroy(priv, flow);
1340 	priv_unlock(priv);
1341 	return 0;
1342 }
1343 
1344 /**
1345  * Destroy all flows.
1346  *
1347  * @param priv
1348  *   Pointer to private structure.
1349  */
1350 static void
1351 priv_flow_flush(struct priv *priv)
1352 {
1353 	while (!TAILQ_EMPTY(&priv->flows)) {
1354 		struct rte_flow *flow;
1355 
1356 		flow = TAILQ_FIRST(&priv->flows);
1357 		priv_flow_destroy(priv, flow);
1358 	}
1359 }
1360 
1361 /**
1362  * Destroy all flows.
1363  *
1364  * @see rte_flow_flush()
1365  * @see rte_flow_ops
1366  */
1367 int
1368 mlx5_flow_flush(struct rte_eth_dev *dev,
1369 		struct rte_flow_error *error)
1370 {
1371 	struct priv *priv = dev->data->dev_private;
1372 
1373 	(void)error;
1374 	priv_lock(priv);
1375 	priv_flow_flush(priv);
1376 	priv_unlock(priv);
1377 	return 0;
1378 }
1379 
1380 /**
1381  * Create drop queue.
1382  *
1383  * @param priv
1384  *   Pointer to private structure.
1385  *
1386  * @return
1387  *   0 on success.
1388  */
1389 static int
1390 priv_flow_create_drop_queue(struct priv *priv)
1391 {
1392 	struct rte_flow_drop *fdq = NULL;
1393 	unsigned int i;
1394 
1395 	assert(priv->pd);
1396 	assert(priv->ctx);
1397 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1398 	if (!fdq) {
1399 		WARN("cannot allocate memory for drop queue");
1400 		goto error;
1401 	}
1402 	fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1403 			&(struct ibv_exp_cq_init_attr){
1404 			.comp_mask = 0,
1405 			});
1406 	if (!fdq->cq) {
1407 		WARN("cannot allocate CQ for drop queue");
1408 		goto error;
1409 	}
1410 	for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1411 		fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1412 				&(struct ibv_exp_wq_init_attr){
1413 				.wq_type = IBV_EXP_WQT_RQ,
1414 				.max_recv_wr = 1,
1415 				.max_recv_sge = 1,
1416 				.pd = priv->pd,
1417 				.cq = fdq->cq,
1418 				});
1419 		if (!fdq->wqs[i]) {
1420 			WARN("cannot allocate WQ for drop queue");
1421 			goto error;
1422 		}
1423 	}
1424 	fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1425 			&(struct ibv_exp_rwq_ind_table_init_attr){
1426 			.pd = priv->pd,
1427 			.log_ind_tbl_size = 0,
1428 			.ind_tbl = fdq->wqs,
1429 			.comp_mask = 0,
1430 			});
1431 	if (!fdq->ind_table) {
1432 		WARN("cannot allocate indirection table for drop queue");
1433 		goto error;
1434 	}
1435 	fdq->qp = ibv_exp_create_qp(priv->ctx,
1436 		&(struct ibv_exp_qp_init_attr){
1437 			.qp_type = IBV_QPT_RAW_PACKET,
1438 			.comp_mask =
1439 				IBV_EXP_QP_INIT_ATTR_PD |
1440 				IBV_EXP_QP_INIT_ATTR_PORT |
1441 				IBV_EXP_QP_INIT_ATTR_RX_HASH,
1442 			.pd = priv->pd,
1443 			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1444 				.rx_hash_function =
1445 					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1446 				.rx_hash_key_len = rss_hash_default_key_len,
1447 				.rx_hash_key = rss_hash_default_key,
1448 				.rx_hash_fields_mask = 0,
1449 				.rwq_ind_tbl = fdq->ind_table,
1450 				},
1451 			.port_num = priv->port,
1452 			});
1453 	if (!fdq->qp) {
1454 		WARN("cannot allocate QP for drop queue");
1455 		goto error;
1456 	}
1457 	priv->flow_drop_queue = fdq;
1458 	return 0;
1459 error:
1460 	if (fdq->qp)
1461 		claim_zero(ibv_destroy_qp(fdq->qp));
1462 	if (fdq->ind_table)
1463 		claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1464 	for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1465 		if (fdq->wqs[i])
1466 			claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1467 	}
1468 	if (fdq->cq)
1469 		claim_zero(ibv_destroy_cq(fdq->cq));
1470 	if (fdq)
1471 		rte_free(fdq);
1472 	priv->flow_drop_queue = NULL;
1473 	return -1;
1474 }
1475 
1476 /**
1477  * Delete drop queue.
1478  *
1479  * @param priv
1480  *   Pointer to private structure.
1481  */
1482 static void
1483 priv_flow_delete_drop_queue(struct priv *priv)
1484 {
1485 	struct rte_flow_drop *fdq = priv->flow_drop_queue;
1486 	unsigned int i;
1487 
1488 	if (!fdq)
1489 		return;
1490 	if (fdq->qp)
1491 		claim_zero(ibv_destroy_qp(fdq->qp));
1492 	if (fdq->ind_table)
1493 		claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1494 	for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1495 		if (fdq->wqs[i])
1496 			claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1497 	}
1498 	if (fdq->cq)
1499 		claim_zero(ibv_destroy_cq(fdq->cq));
1500 	rte_free(fdq);
1501 	priv->flow_drop_queue = NULL;
1502 }
1503 
1504 /**
1505  * Remove all flows.
1506  *
1507  * Called by dev_stop() to remove all flows.
1508  *
1509  * @param priv
1510  *   Pointer to private structure.
1511  */
1512 void
1513 priv_flow_stop(struct priv *priv)
1514 {
1515 	struct rte_flow *flow;
1516 
1517 	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1518 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1519 		flow->ibv_flow = NULL;
1520 		if (flow->mark) {
1521 			unsigned int n;
1522 
1523 			for (n = 0; n < flow->rxqs_n; ++n)
1524 				flow->rxqs[n]->mark = 0;
1525 		}
1526 		DEBUG("Flow %p removed", (void *)flow);
1527 	}
1528 	priv_flow_delete_drop_queue(priv);
1529 }
1530 
1531 /**
1532  * Add all flows.
1533  *
1534  * @param priv
1535  *   Pointer to private structure.
1536  *
1537  * @return
1538  *   0 on success, a errno value otherwise and rte_errno is set.
1539  */
1540 int
1541 priv_flow_start(struct priv *priv)
1542 {
1543 	int ret;
1544 	struct rte_flow *flow;
1545 
1546 	ret = priv_flow_create_drop_queue(priv);
1547 	if (ret)
1548 		return -1;
1549 	TAILQ_FOREACH(flow, &priv->flows, next) {
1550 		struct ibv_qp *qp;
1551 
1552 		if (flow->drop)
1553 			qp = priv->flow_drop_queue->qp;
1554 		else
1555 			qp = flow->qp;
1556 		flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1557 		if (!flow->ibv_flow) {
1558 			DEBUG("Flow %p cannot be applied", (void *)flow);
1559 			rte_errno = EINVAL;
1560 			return rte_errno;
1561 		}
1562 		DEBUG("Flow %p applied", (void *)flow);
1563 		if (flow->mark) {
1564 			unsigned int n;
1565 
1566 			for (n = 0; n < flow->rxqs_n; ++n)
1567 				flow->rxqs[n]->mark = 1;
1568 		}
1569 	}
1570 	return 0;
1571 }
1572 
1573 /**
1574  * Verify if the Rx queue is used in a flow.
1575  *
1576  * @param priv
1577  *   Pointer to private structure.
1578  * @param rxq
1579  *   Pointer to the queue to search.
1580  *
1581  * @return
1582  *   Nonzero if the queue is used by a flow.
1583  */
1584 int
1585 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1586 {
1587 	struct rte_flow *flow;
1588 
1589 	for (flow = TAILQ_FIRST(&priv->flows);
1590 	     flow;
1591 	     flow = TAILQ_NEXT(flow, next)) {
1592 		unsigned int n;
1593 
1594 		if (flow->drop)
1595 			continue;
1596 		for (n = 0; n < flow->rxqs_n; ++n) {
1597 			if (flow->rxqs[n] == rxq)
1598 				return 1;
1599 		}
1600 	}
1601 	return 0;
1602 }
1603 
1604 /**
1605  * Isolated mode.
1606  *
1607  * @see rte_flow_isolate()
1608  * @see rte_flow_ops
1609  */
1610 int
1611 mlx5_flow_isolate(struct rte_eth_dev *dev,
1612 		  int enable,
1613 		  struct rte_flow_error *error)
1614 {
1615 	struct priv *priv = dev->data->dev_private;
1616 
1617 	priv_lock(priv);
1618 	if (priv->started) {
1619 		rte_flow_error_set(error, EBUSY,
1620 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1621 				   NULL,
1622 				   "port must be stopped first");
1623 		priv_unlock(priv);
1624 		return -rte_errno;
1625 	}
1626 	priv->isolated = !!enable;
1627 	priv_unlock(priv);
1628 	return 0;
1629 }
1630