xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 5b590fbe09b6163a55f279bc5d4b85dce39f1d49)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/queue.h>
35 #include <string.h>
36 
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46 
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51 
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54 
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
57 
58 /* Internet Protocol versions. */
59 #define MLX5_IPV4 4
60 #define MLX5_IPV6 6
61 
62 /* Dev ops structure defined in mlx5.c */
63 extern const struct eth_dev_ops mlx5_dev_ops;
64 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
65 
66 static int
67 mlx5_flow_create_eth(const struct rte_flow_item *item,
68 		     const void *default_mask,
69 		     void *data);
70 
71 static int
72 mlx5_flow_create_vlan(const struct rte_flow_item *item,
73 		      const void *default_mask,
74 		      void *data);
75 
76 static int
77 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
78 		      const void *default_mask,
79 		      void *data);
80 
81 static int
82 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
83 		      const void *default_mask,
84 		      void *data);
85 
86 static int
87 mlx5_flow_create_udp(const struct rte_flow_item *item,
88 		     const void *default_mask,
89 		     void *data);
90 
91 static int
92 mlx5_flow_create_tcp(const struct rte_flow_item *item,
93 		     const void *default_mask,
94 		     void *data);
95 
96 static int
97 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
98 		       const void *default_mask,
99 		       void *data);
100 
101 struct mlx5_flow_parse;
102 
103 static void
104 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
105 		      unsigned int size);
106 
107 static int
108 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
109 
110 /* Hash RX queue types. */
111 enum hash_rxq_type {
112 	HASH_RXQ_TCPV4,
113 	HASH_RXQ_UDPV4,
114 	HASH_RXQ_IPV4,
115 	HASH_RXQ_TCPV6,
116 	HASH_RXQ_UDPV6,
117 	HASH_RXQ_IPV6,
118 	HASH_RXQ_ETH,
119 };
120 
121 /* Initialization data for hash RX queue. */
122 struct hash_rxq_init {
123 	uint64_t hash_fields; /* Fields that participate in the hash. */
124 	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
125 	unsigned int flow_priority; /* Flow priority to use. */
126 	unsigned int ip_version; /* Internet protocol. */
127 };
128 
129 /* Initialization data for hash RX queues. */
130 const struct hash_rxq_init hash_rxq_init[] = {
131 	[HASH_RXQ_TCPV4] = {
132 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
133 				IBV_RX_HASH_DST_IPV4 |
134 				IBV_RX_HASH_SRC_PORT_TCP |
135 				IBV_RX_HASH_DST_PORT_TCP),
136 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
137 		.flow_priority = 0,
138 		.ip_version = MLX5_IPV4,
139 	},
140 	[HASH_RXQ_UDPV4] = {
141 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
142 				IBV_RX_HASH_DST_IPV4 |
143 				IBV_RX_HASH_SRC_PORT_UDP |
144 				IBV_RX_HASH_DST_PORT_UDP),
145 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
146 		.flow_priority = 0,
147 		.ip_version = MLX5_IPV4,
148 	},
149 	[HASH_RXQ_IPV4] = {
150 		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
151 				IBV_RX_HASH_DST_IPV4),
152 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
153 				ETH_RSS_FRAG_IPV4),
154 		.flow_priority = 1,
155 		.ip_version = MLX5_IPV4,
156 	},
157 	[HASH_RXQ_TCPV6] = {
158 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
159 				IBV_RX_HASH_DST_IPV6 |
160 				IBV_RX_HASH_SRC_PORT_TCP |
161 				IBV_RX_HASH_DST_PORT_TCP),
162 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
163 		.flow_priority = 0,
164 		.ip_version = MLX5_IPV6,
165 	},
166 	[HASH_RXQ_UDPV6] = {
167 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
168 				IBV_RX_HASH_DST_IPV6 |
169 				IBV_RX_HASH_SRC_PORT_UDP |
170 				IBV_RX_HASH_DST_PORT_UDP),
171 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
172 		.flow_priority = 0,
173 		.ip_version = MLX5_IPV6,
174 	},
175 	[HASH_RXQ_IPV6] = {
176 		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
177 				IBV_RX_HASH_DST_IPV6),
178 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
179 				ETH_RSS_FRAG_IPV6),
180 		.flow_priority = 1,
181 		.ip_version = MLX5_IPV6,
182 	},
183 	[HASH_RXQ_ETH] = {
184 		.hash_fields = 0,
185 		.dpdk_rss_hf = 0,
186 		.flow_priority = 2,
187 	},
188 };
189 
190 /* Number of entries in hash_rxq_init[]. */
191 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
192 
193 /** Structure for Drop queue. */
194 struct mlx5_hrxq_drop {
195 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
196 	struct ibv_qp *qp; /**< Verbs queue pair. */
197 	struct ibv_wq *wq; /**< Verbs work queue. */
198 	struct ibv_cq *cq; /**< Verbs completion queue. */
199 };
200 
201 /* Flows structures. */
202 struct mlx5_flow {
203 	uint64_t hash_fields; /**< Fields that participate in the hash. */
204 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
205 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
207 };
208 
209 /* Drop flows structures. */
210 struct mlx5_flow_drop {
211 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
212 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 };
214 
215 struct rte_flow {
216 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
217 	uint32_t mark:1; /**< Set if the flow is marked. */
218 	uint32_t drop:1; /**< Drop queue. */
219 	uint16_t queues_n; /**< Number of entries in queue[]. */
220 	uint16_t (*queues)[]; /**< Queues indexes to use. */
221 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
222 	uint8_t rss_key[40]; /**< copy of the RSS key. */
223 	union {
224 		struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
225 		/**< Flow with Rx queue. */
226 		struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
227 	};
228 };
229 
230 /** Static initializer for items. */
231 #define ITEMS(...) \
232 	(const enum rte_flow_item_type []){ \
233 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
234 	}
235 
236 /** Structure to generate a simple graph of layers supported by the NIC. */
237 struct mlx5_flow_items {
238 	/** List of possible actions for these items. */
239 	const enum rte_flow_action_type *const actions;
240 	/** Bit-masks corresponding to the possibilities for the item. */
241 	const void *mask;
242 	/**
243 	 * Default bit-masks to use when item->mask is not provided. When
244 	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
245 	 * used instead.
246 	 */
247 	const void *default_mask;
248 	/** Bit-masks size in bytes. */
249 	const unsigned int mask_sz;
250 	/**
251 	 * Conversion function from rte_flow to NIC specific flow.
252 	 *
253 	 * @param item
254 	 *   rte_flow item to convert.
255 	 * @param default_mask
256 	 *   Default bit-masks to use when item->mask is not provided.
257 	 * @param data
258 	 *   Internal structure to store the conversion.
259 	 *
260 	 * @return
261 	 *   0 on success, negative value otherwise.
262 	 */
263 	int (*convert)(const struct rte_flow_item *item,
264 		       const void *default_mask,
265 		       void *data);
266 	/** Size in bytes of the destination structure. */
267 	const unsigned int dst_sz;
268 	/** List of possible following items.  */
269 	const enum rte_flow_item_type *const items;
270 };
271 
272 /** Valid action for this PMD. */
273 static const enum rte_flow_action_type valid_actions[] = {
274 	RTE_FLOW_ACTION_TYPE_DROP,
275 	RTE_FLOW_ACTION_TYPE_QUEUE,
276 	RTE_FLOW_ACTION_TYPE_MARK,
277 	RTE_FLOW_ACTION_TYPE_FLAG,
278 	RTE_FLOW_ACTION_TYPE_END,
279 };
280 
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283 	[RTE_FLOW_ITEM_TYPE_END] = {
284 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285 			       RTE_FLOW_ITEM_TYPE_VXLAN),
286 	},
287 	[RTE_FLOW_ITEM_TYPE_ETH] = {
288 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289 			       RTE_FLOW_ITEM_TYPE_IPV4,
290 			       RTE_FLOW_ITEM_TYPE_IPV6),
291 		.actions = valid_actions,
292 		.mask = &(const struct rte_flow_item_eth){
293 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 			.type = -1,
296 		},
297 		.default_mask = &rte_flow_item_eth_mask,
298 		.mask_sz = sizeof(struct rte_flow_item_eth),
299 		.convert = mlx5_flow_create_eth,
300 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
301 	},
302 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
303 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304 			       RTE_FLOW_ITEM_TYPE_IPV6),
305 		.actions = valid_actions,
306 		.mask = &(const struct rte_flow_item_vlan){
307 			.tci = -1,
308 		},
309 		.default_mask = &rte_flow_item_vlan_mask,
310 		.mask_sz = sizeof(struct rte_flow_item_vlan),
311 		.convert = mlx5_flow_create_vlan,
312 		.dst_sz = 0,
313 	},
314 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
315 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316 			       RTE_FLOW_ITEM_TYPE_TCP),
317 		.actions = valid_actions,
318 		.mask = &(const struct rte_flow_item_ipv4){
319 			.hdr = {
320 				.src_addr = -1,
321 				.dst_addr = -1,
322 				.type_of_service = -1,
323 				.next_proto_id = -1,
324 			},
325 		},
326 		.default_mask = &rte_flow_item_ipv4_mask,
327 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
328 		.convert = mlx5_flow_create_ipv4,
329 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
330 	},
331 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
332 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333 			       RTE_FLOW_ITEM_TYPE_TCP),
334 		.actions = valid_actions,
335 		.mask = &(const struct rte_flow_item_ipv6){
336 			.hdr = {
337 				.src_addr = {
338 					0xff, 0xff, 0xff, 0xff,
339 					0xff, 0xff, 0xff, 0xff,
340 					0xff, 0xff, 0xff, 0xff,
341 					0xff, 0xff, 0xff, 0xff,
342 				},
343 				.dst_addr = {
344 					0xff, 0xff, 0xff, 0xff,
345 					0xff, 0xff, 0xff, 0xff,
346 					0xff, 0xff, 0xff, 0xff,
347 					0xff, 0xff, 0xff, 0xff,
348 				},
349 				.vtc_flow = -1,
350 				.proto = -1,
351 				.hop_limits = -1,
352 			},
353 		},
354 		.default_mask = &rte_flow_item_ipv6_mask,
355 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
356 		.convert = mlx5_flow_create_ipv6,
357 		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
358 	},
359 	[RTE_FLOW_ITEM_TYPE_UDP] = {
360 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361 		.actions = valid_actions,
362 		.mask = &(const struct rte_flow_item_udp){
363 			.hdr = {
364 				.src_port = -1,
365 				.dst_port = -1,
366 			},
367 		},
368 		.default_mask = &rte_flow_item_udp_mask,
369 		.mask_sz = sizeof(struct rte_flow_item_udp),
370 		.convert = mlx5_flow_create_udp,
371 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
372 	},
373 	[RTE_FLOW_ITEM_TYPE_TCP] = {
374 		.actions = valid_actions,
375 		.mask = &(const struct rte_flow_item_tcp){
376 			.hdr = {
377 				.src_port = -1,
378 				.dst_port = -1,
379 			},
380 		},
381 		.default_mask = &rte_flow_item_tcp_mask,
382 		.mask_sz = sizeof(struct rte_flow_item_tcp),
383 		.convert = mlx5_flow_create_tcp,
384 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
385 	},
386 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
387 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388 		.actions = valid_actions,
389 		.mask = &(const struct rte_flow_item_vxlan){
390 			.vni = "\xff\xff\xff",
391 		},
392 		.default_mask = &rte_flow_item_vxlan_mask,
393 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
394 		.convert = mlx5_flow_create_vxlan,
395 		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
396 	},
397 };
398 
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401 	uint32_t inner; /**< Set once VXLAN is encountered. */
402 	uint32_t create:1;
403 	/**< Whether resources should remain after a validate. */
404 	uint32_t drop:1; /**< Target is a drop queue. */
405 	uint32_t mark:1; /**< Mark is present in the flow. */
406 	uint32_t mark_id; /**< Mark identifier. */
407 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
408 	uint16_t queues_n; /**< Number of entries in queue[]. */
409 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
410 	uint8_t rss_key[40]; /**< copy of the RSS key. */
411 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
412 	union {
413 		struct {
414 			struct ibv_flow_attr *ibv_attr;
415 			/**< Pointer to Verbs attributes. */
416 			unsigned int offset;
417 			/**< Current position or total size of the attribute. */
418 		} queue[RTE_DIM(hash_rxq_init)];
419 		struct {
420 			struct ibv_flow_attr *ibv_attr;
421 			/**< Pointer to Verbs attributes. */
422 			unsigned int offset;
423 			/**< Current position or total size of the attribute. */
424 		} drop_q;
425 	};
426 };
427 
428 static const struct rte_flow_ops mlx5_flow_ops = {
429 	.validate = mlx5_flow_validate,
430 	.create = mlx5_flow_create,
431 	.destroy = mlx5_flow_destroy,
432 	.flush = mlx5_flow_flush,
433 	.query = NULL,
434 	.isolate = mlx5_flow_isolate,
435 };
436 
437 /* Convert FDIR request to Generic flow. */
438 struct mlx5_fdir {
439 	struct rte_flow_attr attr;
440 	struct rte_flow_action actions[2];
441 	struct rte_flow_item items[4];
442 	struct rte_flow_item_eth l2;
443 	union {
444 		struct rte_flow_item_ipv4 ipv4;
445 		struct rte_flow_item_ipv6 ipv6;
446 	} l3;
447 	union {
448 		struct rte_flow_item_udp udp;
449 		struct rte_flow_item_tcp tcp;
450 	} l4;
451 	struct rte_flow_action_queue queue;
452 };
453 
454 /* Verbs specification header. */
455 struct ibv_spec_header {
456 	enum ibv_flow_spec_type type;
457 	uint16_t size;
458 };
459 
460 /**
461  * Check support for a given item.
462  *
463  * @param item[in]
464  *   Item specification.
465  * @param mask[in]
466  *   Bit-masks covering supported fields to compare with spec, last and mask in
467  *   \item.
468  * @param size
469  *   Bit-Mask size in bytes.
470  *
471  * @return
472  *   0 on success.
473  */
474 static int
475 mlx5_flow_item_validate(const struct rte_flow_item *item,
476 			const uint8_t *mask, unsigned int size)
477 {
478 	int ret = 0;
479 
480 	if (!item->spec && (item->mask || item->last))
481 		return -1;
482 	if (item->spec && !item->mask) {
483 		unsigned int i;
484 		const uint8_t *spec = item->spec;
485 
486 		for (i = 0; i < size; ++i)
487 			if ((spec[i] | mask[i]) != mask[i])
488 				return -1;
489 	}
490 	if (item->last && !item->mask) {
491 		unsigned int i;
492 		const uint8_t *spec = item->last;
493 
494 		for (i = 0; i < size; ++i)
495 			if ((spec[i] | mask[i]) != mask[i])
496 				return -1;
497 	}
498 	if (item->mask) {
499 		unsigned int i;
500 		const uint8_t *spec = item->mask;
501 
502 		for (i = 0; i < size; ++i)
503 			if ((spec[i] | mask[i]) != mask[i])
504 				return -1;
505 	}
506 	if (item->spec && item->last) {
507 		uint8_t spec[size];
508 		uint8_t last[size];
509 		const uint8_t *apply = mask;
510 		unsigned int i;
511 
512 		if (item->mask)
513 			apply = item->mask;
514 		for (i = 0; i < size; ++i) {
515 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
516 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
517 		}
518 		ret = memcmp(spec, last, size);
519 	}
520 	return ret;
521 }
522 
523 /**
524  * Copy the RSS configuration from the user ones.
525  *
526  * @param priv
527  *   Pointer to private structure.
528  * @param parser
529  *   Internal parser structure.
530  * @param rss_conf
531  *   User RSS configuration to save.
532  *
533  * @return
534  *   0 on success, errno value on failure.
535  */
536 static int
537 priv_flow_convert_rss_conf(struct priv *priv,
538 			   struct mlx5_flow_parse *parser,
539 			   const struct rte_eth_rss_conf *rss_conf)
540 {
541 	const struct rte_eth_rss_conf *rss =
542 		rss_conf ? rss_conf : &priv->rss_conf;
543 
544 	if (rss->rss_key_len > 40)
545 		return EINVAL;
546 	parser->rss_conf.rss_key_len = rss->rss_key_len;
547 	parser->rss_conf.rss_hf = rss->rss_hf;
548 	memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
549 	parser->rss_conf.rss_key = parser->rss_key;
550 	return 0;
551 }
552 
553 /**
554  * Extract attribute to the parser.
555  *
556  * @param priv
557  *   Pointer to private structure.
558  * @param[in] attr
559  *   Flow rule attributes.
560  * @param[out] error
561  *   Perform verbose error reporting if not NULL.
562  * @param[in, out] parser
563  *   Internal parser structure.
564  *
565  * @return
566  *   0 on success, a negative errno value otherwise and rte_errno is set.
567  */
568 static int
569 priv_flow_convert_attributes(struct priv *priv,
570 			     const struct rte_flow_attr *attr,
571 			     struct rte_flow_error *error,
572 			     struct mlx5_flow_parse *parser)
573 {
574 	(void)priv;
575 	(void)parser;
576 	if (attr->group) {
577 		rte_flow_error_set(error, ENOTSUP,
578 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
579 				   NULL,
580 				   "groups are not supported");
581 		return -rte_errno;
582 	}
583 	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
584 		rte_flow_error_set(error, ENOTSUP,
585 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
586 				   NULL,
587 				   "priorities are not supported");
588 		return -rte_errno;
589 	}
590 	if (attr->egress) {
591 		rte_flow_error_set(error, ENOTSUP,
592 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
593 				   NULL,
594 				   "egress is not supported");
595 		return -rte_errno;
596 	}
597 	if (!attr->ingress) {
598 		rte_flow_error_set(error, ENOTSUP,
599 				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
600 				   NULL,
601 				   "only ingress is supported");
602 		return -rte_errno;
603 	}
604 	return 0;
605 }
606 
607 /**
608  * Extract actions request to the parser.
609  *
610  * @param priv
611  *   Pointer to private structure.
612  * @param[in] actions
613  *   Associated actions (list terminated by the END action).
614  * @param[out] error
615  *   Perform verbose error reporting if not NULL.
616  * @param[in, out] parser
617  *   Internal parser structure.
618  *
619  * @return
620  *   0 on success, a negative errno value otherwise and rte_errno is set.
621  */
622 static int
623 priv_flow_convert_actions(struct priv *priv,
624 			  const struct rte_flow_action actions[],
625 			  struct rte_flow_error *error,
626 			  struct mlx5_flow_parse *parser)
627 {
628 	/*
629 	 * Add default RSS configuration necessary for Verbs to create QP even
630 	 * if no RSS is necessary.
631 	 */
632 	priv_flow_convert_rss_conf(priv, parser,
633 				   (const struct rte_eth_rss_conf *)
634 				   &priv->rss_conf);
635 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
636 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
637 			continue;
638 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
639 			parser->drop = 1;
640 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
641 			const struct rte_flow_action_queue *queue =
642 				(const struct rte_flow_action_queue *)
643 				actions->conf;
644 			uint16_t n;
645 			uint16_t found = 0;
646 
647 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
648 				goto exit_action_not_supported;
649 			for (n = 0; n < parser->queues_n; ++n) {
650 				if (parser->queues[n] == queue->index) {
651 					found = 1;
652 					break;
653 				}
654 			}
655 			if (parser->queues_n > 1 && !found) {
656 				rte_flow_error_set(error, ENOTSUP,
657 					   RTE_FLOW_ERROR_TYPE_ACTION,
658 					   actions,
659 					   "queue action not in RSS queues");
660 				return -rte_errno;
661 			}
662 			if (!found) {
663 				parser->queues_n = 1;
664 				parser->queues[0] = queue->index;
665 			}
666 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
667 			const struct rte_flow_action_rss *rss =
668 				(const struct rte_flow_action_rss *)
669 				actions->conf;
670 			uint16_t n;
671 
672 			if (!rss || !rss->num) {
673 				rte_flow_error_set(error, EINVAL,
674 						   RTE_FLOW_ERROR_TYPE_ACTION,
675 						   actions,
676 						   "no valid queues");
677 				return -rte_errno;
678 			}
679 			if (parser->queues_n == 1) {
680 				uint16_t found = 0;
681 
682 				assert(parser->queues_n);
683 				for (n = 0; n < rss->num; ++n) {
684 					if (parser->queues[0] ==
685 					    rss->queue[n]) {
686 						found = 1;
687 						break;
688 					}
689 				}
690 				if (!found) {
691 					rte_flow_error_set(error, ENOTSUP,
692 						   RTE_FLOW_ERROR_TYPE_ACTION,
693 						   actions,
694 						   "queue action not in RSS"
695 						   " queues");
696 					return -rte_errno;
697 				}
698 			}
699 			for (n = 0; n < rss->num; ++n) {
700 				if (rss->queue[n] >= priv->rxqs_n) {
701 					rte_flow_error_set(error, EINVAL,
702 						   RTE_FLOW_ERROR_TYPE_ACTION,
703 						   actions,
704 						   "queue id > number of"
705 						   " queues");
706 					return -rte_errno;
707 				}
708 			}
709 			for (n = 0; n < rss->num; ++n)
710 				parser->queues[n] = rss->queue[n];
711 			parser->queues_n = rss->num;
712 			if (priv_flow_convert_rss_conf(priv, parser,
713 						       rss->rss_conf)) {
714 				rte_flow_error_set(error, EINVAL,
715 						   RTE_FLOW_ERROR_TYPE_ACTION,
716 						   actions,
717 						   "wrong RSS configuration");
718 				return -rte_errno;
719 			}
720 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
721 			const struct rte_flow_action_mark *mark =
722 				(const struct rte_flow_action_mark *)
723 				actions->conf;
724 
725 			if (!mark) {
726 				rte_flow_error_set(error, EINVAL,
727 						   RTE_FLOW_ERROR_TYPE_ACTION,
728 						   actions,
729 						   "mark must be defined");
730 				return -rte_errno;
731 			} else if (mark->id >= MLX5_FLOW_MARK_MAX) {
732 				rte_flow_error_set(error, ENOTSUP,
733 						   RTE_FLOW_ERROR_TYPE_ACTION,
734 						   actions,
735 						   "mark must be between 0"
736 						   " and 16777199");
737 				return -rte_errno;
738 			}
739 			parser->mark = 1;
740 			parser->mark_id = mark->id;
741 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
742 			parser->mark = 1;
743 		} else {
744 			goto exit_action_not_supported;
745 		}
746 	}
747 	if (!parser->queues_n && !parser->drop) {
748 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
749 				   NULL, "no valid action");
750 		return -rte_errno;
751 	}
752 	return 0;
753 exit_action_not_supported:
754 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
755 			   actions, "action not supported");
756 	return -rte_errno;
757 }
758 
759 /**
760  * Validate items.
761  *
762  * @param priv
763  *   Pointer to private structure.
764  * @param[in] items
765  *   Pattern specification (list terminated by the END pattern item).
766  * @param[out] error
767  *   Perform verbose error reporting if not NULL.
768  * @param[in, out] parser
769  *   Internal parser structure.
770  *
771  * @return
772  *   0 on success, a negative errno value otherwise and rte_errno is set.
773  */
774 static int
775 priv_flow_convert_items_validate(struct priv *priv,
776 				 const struct rte_flow_item items[],
777 				 struct rte_flow_error *error,
778 				 struct mlx5_flow_parse *parser)
779 {
780 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
781 	unsigned int i;
782 
783 	(void)priv;
784 	/* Initialise the offsets to start after verbs attribute. */
785 	if (parser->drop) {
786 		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
787 	} else {
788 		for (i = 0; i != hash_rxq_init_n; ++i)
789 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
790 	}
791 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
792 		const struct mlx5_flow_items *token = NULL;
793 		unsigned int n;
794 		int err;
795 
796 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
797 			continue;
798 		for (i = 0;
799 		     cur_item->items &&
800 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
801 		     ++i) {
802 			if (cur_item->items[i] == items->type) {
803 				token = &mlx5_flow_items[items->type];
804 				break;
805 			}
806 		}
807 		if (!token)
808 			goto exit_item_not_supported;
809 		cur_item = token;
810 		err = mlx5_flow_item_validate(items,
811 					      (const uint8_t *)cur_item->mask,
812 					      cur_item->mask_sz);
813 		if (err)
814 			goto exit_item_not_supported;
815 		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
816 			if (parser->inner) {
817 				rte_flow_error_set(error, ENOTSUP,
818 						   RTE_FLOW_ERROR_TYPE_ITEM,
819 						   items,
820 						   "cannot recognize multiple"
821 						   " VXLAN encapsulations");
822 				return -rte_errno;
823 			}
824 			parser->inner = 1;
825 		}
826 		if (parser->drop) {
827 			parser->drop_q.offset += cur_item->dst_sz;
828 		} else if (parser->queues_n == 1) {
829 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
830 		} else {
831 			for (n = 0; n != hash_rxq_init_n; ++n)
832 				parser->queue[n].offset += cur_item->dst_sz;
833 		}
834 	}
835 	if (parser->mark) {
836 		for (i = 0; i != hash_rxq_init_n; ++i)
837 			parser->queue[i].offset +=
838 				sizeof(struct ibv_flow_spec_action_tag);
839 	}
840 	return 0;
841 exit_item_not_supported:
842 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
843 			   items, "item not supported");
844 	return -rte_errno;
845 }
846 
847 /**
848  * Allocate memory space to store verbs flow attributes.
849  *
850  * @param priv
851  *   Pointer to private structure.
852  * @param[in] priority
853  *   Flow priority.
854  * @param[in] size
855  *   Amount of byte to allocate.
856  * @param[out] error
857  *   Perform verbose error reporting if not NULL.
858  *
859  * @return
860  *   A verbs flow attribute on success, NULL otherwise.
861  */
862 static struct ibv_flow_attr*
863 priv_flow_convert_allocate(struct priv *priv,
864 			   unsigned int priority,
865 			   unsigned int size,
866 			   struct rte_flow_error *error)
867 {
868 	struct ibv_flow_attr *ibv_attr;
869 
870 	(void)priv;
871 	ibv_attr = rte_calloc(__func__, 1, size, 0);
872 	if (!ibv_attr) {
873 		rte_flow_error_set(error, ENOMEM,
874 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
875 				   NULL,
876 				   "cannot allocate verbs spec attributes.");
877 		return NULL;
878 	}
879 	ibv_attr->priority = priority;
880 	return ibv_attr;
881 }
882 
883 /**
884  * Finalise verbs flow attributes.
885  *
886  * @param priv
887  *   Pointer to private structure.
888  * @param[in, out] parser
889  *   Internal parser structure.
890  */
891 static void
892 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
893 {
894 	const unsigned int ipv4 =
895 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
896 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
897 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
898 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
899 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
900 	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
901 	unsigned int i;
902 
903 	(void)priv;
904 	if (parser->layer == HASH_RXQ_ETH) {
905 		goto fill;
906 	} else {
907 		/*
908 		 * This layer becomes useless as the pattern define under
909 		 * layers.
910 		 */
911 		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
912 		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
913 	}
914 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
915 	for (i = ohmin; i != (ohmax + 1); ++i) {
916 		if (!parser->queue[i].ibv_attr)
917 			continue;
918 		rte_free(parser->queue[i].ibv_attr);
919 		parser->queue[i].ibv_attr = NULL;
920 	}
921 	/* Remove impossible flow according to the RSS configuration. */
922 	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
923 	    parser->rss_conf.rss_hf) {
924 		/* Remove any other flow. */
925 		for (i = hmin; i != (hmax + 1); ++i) {
926 			if ((i == parser->layer) ||
927 			     (!parser->queue[i].ibv_attr))
928 				continue;
929 			rte_free(parser->queue[i].ibv_attr);
930 			parser->queue[i].ibv_attr = NULL;
931 		}
932 	} else  if (!parser->queue[ip].ibv_attr) {
933 		/* no RSS possible with the current configuration. */
934 		parser->queues_n = 1;
935 		return;
936 	}
937 fill:
938 	/*
939 	 * Fill missing layers in verbs specifications, or compute the correct
940 	 * offset to allocate the memory space for the attributes and
941 	 * specifications.
942 	 */
943 	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
944 		union {
945 			struct ibv_flow_spec_ipv4_ext ipv4;
946 			struct ibv_flow_spec_ipv6 ipv6;
947 			struct ibv_flow_spec_tcp_udp udp_tcp;
948 		} specs;
949 		void *dst;
950 		uint16_t size;
951 
952 		if (i == parser->layer)
953 			continue;
954 		if (parser->layer == HASH_RXQ_ETH) {
955 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
956 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
957 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
958 					.type = IBV_FLOW_SPEC_IPV4_EXT |
959 						parser->inner,
960 					.size = size,
961 				};
962 			} else {
963 				size = sizeof(struct ibv_flow_spec_ipv6);
964 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
965 					.type = IBV_FLOW_SPEC_IPV6 |
966 						parser->inner,
967 					.size = size,
968 				};
969 			}
970 			if (parser->queue[i].ibv_attr) {
971 				dst = (void *)((uintptr_t)
972 					       parser->queue[i].ibv_attr +
973 					       parser->queue[i].offset);
974 				memcpy(dst, &specs, size);
975 				++parser->queue[i].ibv_attr->num_of_specs;
976 			}
977 			parser->queue[i].offset += size;
978 		}
979 		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
980 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
981 			size = sizeof(struct ibv_flow_spec_tcp_udp);
982 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
983 				.type = ((i == HASH_RXQ_UDPV4 ||
984 					  i == HASH_RXQ_UDPV6) ?
985 					 IBV_FLOW_SPEC_UDP :
986 					 IBV_FLOW_SPEC_TCP) |
987 					parser->inner,
988 				.size = size,
989 			};
990 			if (parser->queue[i].ibv_attr) {
991 				dst = (void *)((uintptr_t)
992 					       parser->queue[i].ibv_attr +
993 					       parser->queue[i].offset);
994 				memcpy(dst, &specs, size);
995 				++parser->queue[i].ibv_attr->num_of_specs;
996 			}
997 			parser->queue[i].offset += size;
998 		}
999 	}
1000 }
1001 
1002 /**
1003  * Validate and convert a flow supported by the NIC.
1004  *
1005  * @param priv
1006  *   Pointer to private structure.
1007  * @param[in] attr
1008  *   Flow rule attributes.
1009  * @param[in] pattern
1010  *   Pattern specification (list terminated by the END pattern item).
1011  * @param[in] actions
1012  *   Associated actions (list terminated by the END action).
1013  * @param[out] error
1014  *   Perform verbose error reporting if not NULL.
1015  * @param[in, out] parser
1016  *   Internal parser structure.
1017  *
1018  * @return
1019  *   0 on success, a negative errno value otherwise and rte_errno is set.
1020  */
1021 static int
1022 priv_flow_convert(struct priv *priv,
1023 		  const struct rte_flow_attr *attr,
1024 		  const struct rte_flow_item items[],
1025 		  const struct rte_flow_action actions[],
1026 		  struct rte_flow_error *error,
1027 		  struct mlx5_flow_parse *parser)
1028 {
1029 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1030 	unsigned int i;
1031 	int ret;
1032 
1033 	/* First step. Validate the attributes, items and actions. */
1034 	*parser = (struct mlx5_flow_parse){
1035 		.create = parser->create,
1036 		.layer = HASH_RXQ_ETH,
1037 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
1038 	};
1039 	ret = priv_flow_convert_attributes(priv, attr, error, parser);
1040 	if (ret)
1041 		return ret;
1042 	ret = priv_flow_convert_actions(priv, actions, error, parser);
1043 	if (ret)
1044 		return ret;
1045 	ret = priv_flow_convert_items_validate(priv, items, error, parser);
1046 	if (ret)
1047 		return ret;
1048 	priv_flow_convert_finalise(priv, parser);
1049 	/*
1050 	 * Second step.
1051 	 * Allocate the memory space to store verbs specifications.
1052 	 */
1053 	if (parser->drop) {
1054 		parser->drop_q.ibv_attr =
1055 			priv_flow_convert_allocate(priv, attr->priority,
1056 						   parser->drop_q.offset,
1057 						   error);
1058 		if (!parser->drop_q.ibv_attr)
1059 			return ENOMEM;
1060 		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
1061 	} else if (parser->queues_n == 1) {
1062 		unsigned int priority =
1063 			attr->priority +
1064 			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1065 		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1066 
1067 		parser->queue[HASH_RXQ_ETH].ibv_attr =
1068 			priv_flow_convert_allocate(priv, priority,
1069 						   offset, error);
1070 		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1071 			return ENOMEM;
1072 		parser->queue[HASH_RXQ_ETH].offset =
1073 			sizeof(struct ibv_flow_attr);
1074 	} else {
1075 		for (i = 0; i != hash_rxq_init_n; ++i) {
1076 			unsigned int priority =
1077 				attr->priority +
1078 				hash_rxq_init[i].flow_priority;
1079 			unsigned int offset;
1080 
1081 			if (!(parser->rss_conf.rss_hf &
1082 			      hash_rxq_init[i].dpdk_rss_hf) &&
1083 			    (i != HASH_RXQ_ETH))
1084 				continue;
1085 			offset = parser->queue[i].offset;
1086 			parser->queue[i].ibv_attr =
1087 				priv_flow_convert_allocate(priv, priority,
1088 							   offset, error);
1089 			if (!parser->queue[i].ibv_attr)
1090 				goto exit_enomem;
1091 			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1092 		}
1093 	}
1094 	/* Third step. Conversion parse, fill the specifications. */
1095 	parser->inner = 0;
1096 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1097 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1098 			continue;
1099 		cur_item = &mlx5_flow_items[items->type];
1100 		ret = cur_item->convert(items,
1101 					(cur_item->default_mask ?
1102 					 cur_item->default_mask :
1103 					 cur_item->mask),
1104 					parser);
1105 		if (ret) {
1106 			rte_flow_error_set(error, ENOTSUP,
1107 					   RTE_FLOW_ERROR_TYPE_ITEM,
1108 					   items, "item not supported");
1109 			goto exit_free;
1110 		}
1111 	}
1112 	if (parser->mark)
1113 		mlx5_flow_create_flag_mark(parser, parser->mark_id);
1114 	/*
1115 	 * Last step. Complete missing specification to reach the RSS
1116 	 * configuration.
1117 	 */
1118 	if (parser->queues_n > 1)
1119 		priv_flow_convert_finalise(priv, parser);
1120 exit_free:
1121 	/* Only verification is expected, all resources should be released. */
1122 	if (!parser->create) {
1123 		if (parser->drop) {
1124 			rte_free(parser->drop_q.ibv_attr);
1125 			parser->drop_q.ibv_attr = NULL;
1126 		}
1127 		for (i = 0; i != hash_rxq_init_n; ++i) {
1128 			if (parser->queue[i].ibv_attr) {
1129 				rte_free(parser->queue[i].ibv_attr);
1130 				parser->queue[i].ibv_attr = NULL;
1131 			}
1132 		}
1133 	}
1134 	return ret;
1135 exit_enomem:
1136 	for (i = 0; i != hash_rxq_init_n; ++i) {
1137 		if (parser->queue[i].ibv_attr) {
1138 			rte_free(parser->queue[i].ibv_attr);
1139 			parser->queue[i].ibv_attr = NULL;
1140 		}
1141 	}
1142 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1143 			   NULL, "cannot allocate verbs spec attributes.");
1144 	return ret;
1145 }
1146 
1147 /**
1148  * Copy the specification created into the flow.
1149  *
1150  * @param parser
1151  *   Internal parser structure.
1152  * @param src
1153  *   Create specification.
1154  * @param size
1155  *   Size in bytes of the specification to copy.
1156  */
1157 static void
1158 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1159 		      unsigned int size)
1160 {
1161 	unsigned int i;
1162 	void *dst;
1163 
1164 	if (parser->drop) {
1165 		dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1166 				parser->drop_q.offset);
1167 		memcpy(dst, src, size);
1168 		++parser->drop_q.ibv_attr->num_of_specs;
1169 		parser->drop_q.offset += size;
1170 		return;
1171 	}
1172 	for (i = 0; i != hash_rxq_init_n; ++i) {
1173 		if (!parser->queue[i].ibv_attr)
1174 			continue;
1175 		/* Specification must be the same l3 type or none. */
1176 		if (parser->layer == HASH_RXQ_ETH ||
1177 		    (hash_rxq_init[parser->layer].ip_version ==
1178 		     hash_rxq_init[i].ip_version) ||
1179 		    (hash_rxq_init[i].ip_version == 0)) {
1180 			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1181 					parser->queue[i].offset);
1182 			memcpy(dst, src, size);
1183 			++parser->queue[i].ibv_attr->num_of_specs;
1184 			parser->queue[i].offset += size;
1185 		}
1186 	}
1187 }
1188 
1189 /**
1190  * Convert Ethernet item to Verbs specification.
1191  *
1192  * @param item[in]
1193  *   Item specification.
1194  * @param default_mask[in]
1195  *   Default bit-masks to use when item->mask is not provided.
1196  * @param data[in, out]
1197  *   User structure.
1198  */
1199 static int
1200 mlx5_flow_create_eth(const struct rte_flow_item *item,
1201 		     const void *default_mask,
1202 		     void *data)
1203 {
1204 	const struct rte_flow_item_eth *spec = item->spec;
1205 	const struct rte_flow_item_eth *mask = item->mask;
1206 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1207 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1208 	struct ibv_flow_spec_eth eth = {
1209 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
1210 		.size = eth_size,
1211 	};
1212 
1213 	parser->layer = HASH_RXQ_ETH;
1214 	if (spec) {
1215 		unsigned int i;
1216 
1217 		if (!mask)
1218 			mask = default_mask;
1219 		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1220 		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1221 		eth.val.ether_type = spec->type;
1222 		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1223 		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1224 		eth.mask.ether_type = mask->type;
1225 		/* Remove unwanted bits from values. */
1226 		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1227 			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1228 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
1229 		}
1230 		eth.val.ether_type &= eth.mask.ether_type;
1231 	}
1232 	mlx5_flow_create_copy(parser, &eth, eth_size);
1233 	return 0;
1234 }
1235 
1236 /**
1237  * Convert VLAN item to Verbs specification.
1238  *
1239  * @param item[in]
1240  *   Item specification.
1241  * @param default_mask[in]
1242  *   Default bit-masks to use when item->mask is not provided.
1243  * @param data[in, out]
1244  *   User structure.
1245  */
1246 static int
1247 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1248 		      const void *default_mask,
1249 		      void *data)
1250 {
1251 	const struct rte_flow_item_vlan *spec = item->spec;
1252 	const struct rte_flow_item_vlan *mask = item->mask;
1253 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1254 	struct ibv_flow_spec_eth *eth;
1255 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1256 
1257 	if (spec) {
1258 		unsigned int i;
1259 		if (!mask)
1260 			mask = default_mask;
1261 
1262 		if (parser->drop) {
1263 			eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1264 				       parser->drop_q.offset - eth_size);
1265 			eth->val.vlan_tag = spec->tci;
1266 			eth->mask.vlan_tag = mask->tci;
1267 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1268 			return 0;
1269 		}
1270 		for (i = 0; i != hash_rxq_init_n; ++i) {
1271 			if (!parser->queue[i].ibv_attr)
1272 				continue;
1273 
1274 			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1275 				       parser->queue[i].offset - eth_size);
1276 			eth->val.vlan_tag = spec->tci;
1277 			eth->mask.vlan_tag = mask->tci;
1278 			eth->val.vlan_tag &= eth->mask.vlan_tag;
1279 		}
1280 	}
1281 	return 0;
1282 }
1283 
1284 /**
1285  * Convert IPv4 item to Verbs specification.
1286  *
1287  * @param item[in]
1288  *   Item specification.
1289  * @param default_mask[in]
1290  *   Default bit-masks to use when item->mask is not provided.
1291  * @param data[in, out]
1292  *   User structure.
1293  */
1294 static int
1295 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1296 		      const void *default_mask,
1297 		      void *data)
1298 {
1299 	const struct rte_flow_item_ipv4 *spec = item->spec;
1300 	const struct rte_flow_item_ipv4 *mask = item->mask;
1301 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1302 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1303 	struct ibv_flow_spec_ipv4_ext ipv4 = {
1304 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1305 		.size = ipv4_size,
1306 	};
1307 
1308 	parser->layer = HASH_RXQ_IPV4;
1309 	if (spec) {
1310 		if (!mask)
1311 			mask = default_mask;
1312 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1313 			.src_ip = spec->hdr.src_addr,
1314 			.dst_ip = spec->hdr.dst_addr,
1315 			.proto = spec->hdr.next_proto_id,
1316 			.tos = spec->hdr.type_of_service,
1317 		};
1318 		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1319 			.src_ip = mask->hdr.src_addr,
1320 			.dst_ip = mask->hdr.dst_addr,
1321 			.proto = mask->hdr.next_proto_id,
1322 			.tos = mask->hdr.type_of_service,
1323 		};
1324 		/* Remove unwanted bits from values. */
1325 		ipv4.val.src_ip &= ipv4.mask.src_ip;
1326 		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1327 		ipv4.val.proto &= ipv4.mask.proto;
1328 		ipv4.val.tos &= ipv4.mask.tos;
1329 	}
1330 	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1331 	return 0;
1332 }
1333 
1334 /**
1335  * Convert IPv6 item to Verbs specification.
1336  *
1337  * @param item[in]
1338  *   Item specification.
1339  * @param default_mask[in]
1340  *   Default bit-masks to use when item->mask is not provided.
1341  * @param data[in, out]
1342  *   User structure.
1343  */
1344 static int
1345 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1346 		      const void *default_mask,
1347 		      void *data)
1348 {
1349 	const struct rte_flow_item_ipv6 *spec = item->spec;
1350 	const struct rte_flow_item_ipv6 *mask = item->mask;
1351 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1352 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1353 	struct ibv_flow_spec_ipv6 ipv6 = {
1354 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
1355 		.size = ipv6_size,
1356 	};
1357 
1358 	parser->layer = HASH_RXQ_IPV6;
1359 	if (spec) {
1360 		unsigned int i;
1361 
1362 		if (!mask)
1363 			mask = default_mask;
1364 		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1365 		       RTE_DIM(ipv6.val.src_ip));
1366 		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1367 		       RTE_DIM(ipv6.val.dst_ip));
1368 		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1369 		       RTE_DIM(ipv6.mask.src_ip));
1370 		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1371 		       RTE_DIM(ipv6.mask.dst_ip));
1372 		ipv6.mask.flow_label = mask->hdr.vtc_flow;
1373 		ipv6.mask.next_hdr = mask->hdr.proto;
1374 		ipv6.mask.hop_limit = mask->hdr.hop_limits;
1375 		/* Remove unwanted bits from values. */
1376 		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1377 			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1378 			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1379 		}
1380 		ipv6.val.flow_label &= ipv6.mask.flow_label;
1381 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1382 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1383 	}
1384 	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1385 	return 0;
1386 }
1387 
1388 /**
1389  * Convert UDP item to Verbs specification.
1390  *
1391  * @param item[in]
1392  *   Item specification.
1393  * @param default_mask[in]
1394  *   Default bit-masks to use when item->mask is not provided.
1395  * @param data[in, out]
1396  *   User structure.
1397  */
1398 static int
1399 mlx5_flow_create_udp(const struct rte_flow_item *item,
1400 		     const void *default_mask,
1401 		     void *data)
1402 {
1403 	const struct rte_flow_item_udp *spec = item->spec;
1404 	const struct rte_flow_item_udp *mask = item->mask;
1405 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1406 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1407 	struct ibv_flow_spec_tcp_udp udp = {
1408 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
1409 		.size = udp_size,
1410 	};
1411 
1412 	if (parser->layer == HASH_RXQ_IPV4)
1413 		parser->layer = HASH_RXQ_UDPV4;
1414 	else
1415 		parser->layer = HASH_RXQ_UDPV6;
1416 	if (spec) {
1417 		if (!mask)
1418 			mask = default_mask;
1419 		udp.val.dst_port = spec->hdr.dst_port;
1420 		udp.val.src_port = spec->hdr.src_port;
1421 		udp.mask.dst_port = mask->hdr.dst_port;
1422 		udp.mask.src_port = mask->hdr.src_port;
1423 		/* Remove unwanted bits from values. */
1424 		udp.val.src_port &= udp.mask.src_port;
1425 		udp.val.dst_port &= udp.mask.dst_port;
1426 	}
1427 	mlx5_flow_create_copy(parser, &udp, udp_size);
1428 	return 0;
1429 }
1430 
1431 /**
1432  * Convert TCP item to Verbs specification.
1433  *
1434  * @param item[in]
1435  *   Item specification.
1436  * @param default_mask[in]
1437  *   Default bit-masks to use when item->mask is not provided.
1438  * @param data[in, out]
1439  *   User structure.
1440  */
1441 static int
1442 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1443 		     const void *default_mask,
1444 		     void *data)
1445 {
1446 	const struct rte_flow_item_tcp *spec = item->spec;
1447 	const struct rte_flow_item_tcp *mask = item->mask;
1448 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1449 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1450 	struct ibv_flow_spec_tcp_udp tcp = {
1451 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
1452 		.size = tcp_size,
1453 	};
1454 
1455 	if (parser->layer == HASH_RXQ_IPV4)
1456 		parser->layer = HASH_RXQ_TCPV4;
1457 	else
1458 		parser->layer = HASH_RXQ_TCPV6;
1459 	if (spec) {
1460 		if (!mask)
1461 			mask = default_mask;
1462 		tcp.val.dst_port = spec->hdr.dst_port;
1463 		tcp.val.src_port = spec->hdr.src_port;
1464 		tcp.mask.dst_port = mask->hdr.dst_port;
1465 		tcp.mask.src_port = mask->hdr.src_port;
1466 		/* Remove unwanted bits from values. */
1467 		tcp.val.src_port &= tcp.mask.src_port;
1468 		tcp.val.dst_port &= tcp.mask.dst_port;
1469 	}
1470 	mlx5_flow_create_copy(parser, &tcp, tcp_size);
1471 	return 0;
1472 }
1473 
1474 /**
1475  * Convert VXLAN item to Verbs specification.
1476  *
1477  * @param item[in]
1478  *   Item specification.
1479  * @param default_mask[in]
1480  *   Default bit-masks to use when item->mask is not provided.
1481  * @param data[in, out]
1482  *   User structure.
1483  */
1484 static int
1485 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1486 		       const void *default_mask,
1487 		       void *data)
1488 {
1489 	const struct rte_flow_item_vxlan *spec = item->spec;
1490 	const struct rte_flow_item_vxlan *mask = item->mask;
1491 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1492 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1493 	struct ibv_flow_spec_tunnel vxlan = {
1494 		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1495 		.size = size,
1496 	};
1497 	union vni {
1498 		uint32_t vlan_id;
1499 		uint8_t vni[4];
1500 	} id;
1501 
1502 	id.vni[0] = 0;
1503 	parser->inner = IBV_FLOW_SPEC_INNER;
1504 	if (spec) {
1505 		if (!mask)
1506 			mask = default_mask;
1507 		memcpy(&id.vni[1], spec->vni, 3);
1508 		vxlan.val.tunnel_id = id.vlan_id;
1509 		memcpy(&id.vni[1], mask->vni, 3);
1510 		vxlan.mask.tunnel_id = id.vlan_id;
1511 		/* Remove unwanted bits from values. */
1512 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1513 	}
1514 	mlx5_flow_create_copy(parser, &vxlan, size);
1515 	return 0;
1516 }
1517 
1518 /**
1519  * Convert mark/flag action to Verbs specification.
1520  *
1521  * @param parser
1522  *   Internal parser structure.
1523  * @param mark_id
1524  *   Mark identifier.
1525  */
1526 static int
1527 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1528 {
1529 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1530 	struct ibv_flow_spec_action_tag tag = {
1531 		.type = IBV_FLOW_SPEC_ACTION_TAG,
1532 		.size = size,
1533 		.tag_id = mlx5_flow_mark_set(mark_id),
1534 	};
1535 
1536 	assert(parser->mark);
1537 	mlx5_flow_create_copy(parser, &tag, size);
1538 	return 0;
1539 }
1540 
1541 /**
1542  * Complete flow rule creation with a drop queue.
1543  *
1544  * @param priv
1545  *   Pointer to private structure.
1546  * @param parser
1547  *   Internal parser structure.
1548  * @param flow
1549  *   Pointer to the rte_flow.
1550  * @param[out] error
1551  *   Perform verbose error reporting if not NULL.
1552  *
1553  * @return
1554  *   0 on success, errno value on failure.
1555  */
1556 static int
1557 priv_flow_create_action_queue_drop(struct priv *priv,
1558 				   struct mlx5_flow_parse *parser,
1559 				   struct rte_flow *flow,
1560 				   struct rte_flow_error *error)
1561 {
1562 	struct ibv_flow_spec_action_drop *drop;
1563 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1564 	int err = 0;
1565 
1566 	assert(priv->pd);
1567 	assert(priv->ctx);
1568 	flow->drop = 1;
1569 	drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1570 			parser->drop_q.offset);
1571 	*drop = (struct ibv_flow_spec_action_drop){
1572 			.type = IBV_FLOW_SPEC_ACTION_DROP,
1573 			.size = size,
1574 	};
1575 	++parser->drop_q.ibv_attr->num_of_specs;
1576 	parser->drop_q.offset += size;
1577 	if (!priv->dev->data->dev_started)
1578 		return 0;
1579 	flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
1580 	parser->drop_q.ibv_attr = NULL;
1581 	flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
1582 					      flow->drxq.ibv_attr);
1583 	if (!flow->drxq.ibv_flow) {
1584 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1585 				   NULL, "flow rule creation failure");
1586 		err = ENOMEM;
1587 		goto error;
1588 	}
1589 	return 0;
1590 error:
1591 	assert(flow);
1592 	if (flow->drxq.ibv_flow) {
1593 		claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1594 		flow->drxq.ibv_flow = NULL;
1595 	}
1596 	if (flow->drxq.ibv_attr) {
1597 		rte_free(flow->drxq.ibv_attr);
1598 		flow->drxq.ibv_attr = NULL;
1599 	}
1600 	return err;
1601 }
1602 
1603 /**
1604  * Create hash Rx queues when RSS is enabled.
1605  *
1606  * @param priv
1607  *   Pointer to private structure.
1608  * @param parser
1609  *   Internal parser structure.
1610  * @param flow
1611  *   Pointer to the rte_flow.
1612  * @param[out] error
1613  *   Perform verbose error reporting if not NULL.
1614  *
1615  * @return
1616  *   0 on success, a errno value otherwise and rte_errno is set.
1617  */
1618 static int
1619 priv_flow_create_action_queue_rss(struct priv *priv,
1620 				  struct mlx5_flow_parse *parser,
1621 				  struct rte_flow *flow,
1622 				  struct rte_flow_error *error)
1623 {
1624 	unsigned int i;
1625 
1626 	for (i = 0; i != hash_rxq_init_n; ++i) {
1627 		uint64_t hash_fields;
1628 
1629 		if (!parser->queue[i].ibv_attr)
1630 			continue;
1631 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1632 		parser->queue[i].ibv_attr = NULL;
1633 		hash_fields = hash_rxq_init[i].hash_fields;
1634 		flow->frxq[i].hrxq =
1635 			mlx5_priv_hrxq_get(priv,
1636 					   parser->rss_conf.rss_key,
1637 					   parser->rss_conf.rss_key_len,
1638 					   hash_fields,
1639 					   parser->queues,
1640 					   hash_fields ? parser->queues_n : 1);
1641 		if (flow->frxq[i].hrxq)
1642 			continue;
1643 		flow->frxq[i].hrxq =
1644 			mlx5_priv_hrxq_new(priv,
1645 					   parser->rss_conf.rss_key,
1646 					   parser->rss_conf.rss_key_len,
1647 					   hash_fields,
1648 					   parser->queues,
1649 					   hash_fields ? parser->queues_n : 1);
1650 		if (!flow->frxq[i].hrxq) {
1651 			rte_flow_error_set(error, ENOMEM,
1652 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1653 					   NULL, "cannot create hash rxq");
1654 			return ENOMEM;
1655 		}
1656 	}
1657 	return 0;
1658 }
1659 
1660 /**
1661  * Complete flow rule creation.
1662  *
1663  * @param priv
1664  *   Pointer to private structure.
1665  * @param parser
1666  *   Internal parser structure.
1667  * @param flow
1668  *   Pointer to the rte_flow.
1669  * @param[out] error
1670  *   Perform verbose error reporting if not NULL.
1671  *
1672  * @return
1673  *   0 on success, a errno value otherwise and rte_errno is set.
1674  */
1675 static int
1676 priv_flow_create_action_queue(struct priv *priv,
1677 			      struct mlx5_flow_parse *parser,
1678 			      struct rte_flow *flow,
1679 			      struct rte_flow_error *error)
1680 {
1681 	int err = 0;
1682 	unsigned int i;
1683 
1684 	assert(priv->pd);
1685 	assert(priv->ctx);
1686 	assert(!parser->drop);
1687 	err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1688 	if (err)
1689 		goto error;
1690 	if (!priv->dev->data->dev_started)
1691 		return 0;
1692 	for (i = 0; i != hash_rxq_init_n; ++i) {
1693 		if (!flow->frxq[i].hrxq)
1694 			continue;
1695 		flow->frxq[i].ibv_flow =
1696 			ibv_create_flow(flow->frxq[i].hrxq->qp,
1697 					flow->frxq[i].ibv_attr);
1698 		if (!flow->frxq[i].ibv_flow) {
1699 			rte_flow_error_set(error, ENOMEM,
1700 					   RTE_FLOW_ERROR_TYPE_HANDLE,
1701 					   NULL, "flow rule creation failure");
1702 			err = ENOMEM;
1703 			goto error;
1704 		}
1705 		DEBUG("%p type %d QP %p ibv_flow %p",
1706 		      (void *)flow, i,
1707 		      (void *)flow->frxq[i].hrxq,
1708 		      (void *)flow->frxq[i].ibv_flow);
1709 	}
1710 	for (i = 0; i != parser->queues_n; ++i) {
1711 		struct mlx5_rxq_data *q =
1712 			(*priv->rxqs)[parser->queues[i]];
1713 
1714 		q->mark |= parser->mark;
1715 	}
1716 	return 0;
1717 error:
1718 	assert(flow);
1719 	for (i = 0; i != hash_rxq_init_n; ++i) {
1720 		if (flow->frxq[i].ibv_flow) {
1721 			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1722 
1723 			claim_zero(ibv_destroy_flow(ibv_flow));
1724 		}
1725 		if (flow->frxq[i].hrxq)
1726 			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1727 		if (flow->frxq[i].ibv_attr)
1728 			rte_free(flow->frxq[i].ibv_attr);
1729 	}
1730 	return err;
1731 }
1732 
1733 /**
1734  * Convert a flow.
1735  *
1736  * @param priv
1737  *   Pointer to private structure.
1738  * @param list
1739  *   Pointer to a TAILQ flow list.
1740  * @param[in] attr
1741  *   Flow rule attributes.
1742  * @param[in] pattern
1743  *   Pattern specification (list terminated by the END pattern item).
1744  * @param[in] actions
1745  *   Associated actions (list terminated by the END action).
1746  * @param[out] error
1747  *   Perform verbose error reporting if not NULL.
1748  *
1749  * @return
1750  *   A flow on success, NULL otherwise.
1751  */
1752 static struct rte_flow *
1753 priv_flow_create(struct priv *priv,
1754 		 struct mlx5_flows *list,
1755 		 const struct rte_flow_attr *attr,
1756 		 const struct rte_flow_item items[],
1757 		 const struct rte_flow_action actions[],
1758 		 struct rte_flow_error *error)
1759 {
1760 	struct mlx5_flow_parse parser = { .create = 1, };
1761 	struct rte_flow *flow = NULL;
1762 	unsigned int i;
1763 	int err;
1764 
1765 	err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1766 	if (err)
1767 		goto exit;
1768 	flow = rte_calloc(__func__, 1,
1769 			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1770 			  0);
1771 	if (!flow) {
1772 		rte_flow_error_set(error, ENOMEM,
1773 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1774 				   NULL,
1775 				   "cannot allocate flow memory");
1776 		return NULL;
1777 	}
1778 	/* Copy queues configuration. */
1779 	flow->queues = (uint16_t (*)[])(flow + 1);
1780 	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1781 	flow->queues_n = parser.queues_n;
1782 	/* Copy RSS configuration. */
1783 	flow->rss_conf = parser.rss_conf;
1784 	flow->rss_conf.rss_key = flow->rss_key;
1785 	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1786 	/* finalise the flow. */
1787 	if (parser.drop)
1788 		err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1789 							 error);
1790 	else
1791 		err = priv_flow_create_action_queue(priv, &parser, flow, error);
1792 	if (err)
1793 		goto exit;
1794 	TAILQ_INSERT_TAIL(list, flow, next);
1795 	DEBUG("Flow created %p", (void *)flow);
1796 	return flow;
1797 exit:
1798 	if (parser.drop) {
1799 		rte_free(parser.drop_q.ibv_attr);
1800 	} else {
1801 		for (i = 0; i != hash_rxq_init_n; ++i) {
1802 			if (parser.queue[i].ibv_attr)
1803 				rte_free(parser.queue[i].ibv_attr);
1804 		}
1805 	}
1806 	rte_free(flow);
1807 	return NULL;
1808 }
1809 
1810 /**
1811  * Validate a flow supported by the NIC.
1812  *
1813  * @see rte_flow_validate()
1814  * @see rte_flow_ops
1815  */
1816 int
1817 mlx5_flow_validate(struct rte_eth_dev *dev,
1818 		   const struct rte_flow_attr *attr,
1819 		   const struct rte_flow_item items[],
1820 		   const struct rte_flow_action actions[],
1821 		   struct rte_flow_error *error)
1822 {
1823 	struct priv *priv = dev->data->dev_private;
1824 	int ret;
1825 	struct mlx5_flow_parse parser = { .create = 0, };
1826 
1827 	priv_lock(priv);
1828 	ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1829 	priv_unlock(priv);
1830 	return ret;
1831 }
1832 
1833 /**
1834  * Create a flow.
1835  *
1836  * @see rte_flow_create()
1837  * @see rte_flow_ops
1838  */
1839 struct rte_flow *
1840 mlx5_flow_create(struct rte_eth_dev *dev,
1841 		 const struct rte_flow_attr *attr,
1842 		 const struct rte_flow_item items[],
1843 		 const struct rte_flow_action actions[],
1844 		 struct rte_flow_error *error)
1845 {
1846 	struct priv *priv = dev->data->dev_private;
1847 	struct rte_flow *flow;
1848 
1849 	priv_lock(priv);
1850 	flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1851 				error);
1852 	priv_unlock(priv);
1853 	return flow;
1854 }
1855 
1856 /**
1857  * Destroy a flow.
1858  *
1859  * @param priv
1860  *   Pointer to private structure.
1861  * @param list
1862  *   Pointer to a TAILQ flow list.
1863  * @param[in] flow
1864  *   Flow to destroy.
1865  */
1866 static void
1867 priv_flow_destroy(struct priv *priv,
1868 		  struct mlx5_flows *list,
1869 		  struct rte_flow *flow)
1870 {
1871 	unsigned int i;
1872 
1873 	if (flow->drop || !flow->mark)
1874 		goto free;
1875 	for (i = 0; i != flow->queues_n; ++i) {
1876 		struct rte_flow *tmp;
1877 		int mark = 0;
1878 
1879 		/*
1880 		 * To remove the mark from the queue, the queue must not be
1881 		 * present in any other marked flow (RSS or not).
1882 		 */
1883 		TAILQ_FOREACH(tmp, list, next) {
1884 			unsigned int j;
1885 			uint16_t *tqs = NULL;
1886 			uint16_t tq_n = 0;
1887 
1888 			if (!tmp->mark)
1889 				continue;
1890 			for (j = 0; j != hash_rxq_init_n; ++j) {
1891 				if (!tmp->frxq[j].hrxq)
1892 					continue;
1893 				tqs = tmp->frxq[j].hrxq->ind_table->queues;
1894 				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1895 			}
1896 			if (!tq_n)
1897 				continue;
1898 			for (j = 0; (j != tq_n) && !mark; j++)
1899 				if (tqs[j] == (*flow->queues)[i])
1900 					mark = 1;
1901 		}
1902 		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1903 	}
1904 free:
1905 	if (flow->drop) {
1906 		if (flow->drxq.ibv_flow)
1907 			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1908 		rte_free(flow->drxq.ibv_attr);
1909 	} else {
1910 		for (i = 0; i != hash_rxq_init_n; ++i) {
1911 			struct mlx5_flow *frxq = &flow->frxq[i];
1912 
1913 			if (frxq->ibv_flow)
1914 				claim_zero(ibv_destroy_flow(frxq->ibv_flow));
1915 			if (frxq->hrxq)
1916 				mlx5_priv_hrxq_release(priv, frxq->hrxq);
1917 			if (frxq->ibv_attr)
1918 				rte_free(frxq->ibv_attr);
1919 		}
1920 	}
1921 	TAILQ_REMOVE(list, flow, next);
1922 	DEBUG("Flow destroyed %p", (void *)flow);
1923 	rte_free(flow);
1924 }
1925 
1926 /**
1927  * Destroy all flows.
1928  *
1929  * @param priv
1930  *   Pointer to private structure.
1931  * @param list
1932  *   Pointer to a TAILQ flow list.
1933  */
1934 void
1935 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
1936 {
1937 	while (!TAILQ_EMPTY(list)) {
1938 		struct rte_flow *flow;
1939 
1940 		flow = TAILQ_FIRST(list);
1941 		priv_flow_destroy(priv, list, flow);
1942 	}
1943 }
1944 
1945 /**
1946  * Create drop queue.
1947  *
1948  * @param priv
1949  *   Pointer to private structure.
1950  *
1951  * @return
1952  *   0 on success.
1953  */
1954 int
1955 priv_flow_create_drop_queue(struct priv *priv)
1956 {
1957 	struct mlx5_hrxq_drop *fdq = NULL;
1958 
1959 	assert(priv->pd);
1960 	assert(priv->ctx);
1961 	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1962 	if (!fdq) {
1963 		WARN("cannot allocate memory for drop queue");
1964 		goto error;
1965 	}
1966 	fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1967 	if (!fdq->cq) {
1968 		WARN("cannot allocate CQ for drop queue");
1969 		goto error;
1970 	}
1971 	fdq->wq = ibv_create_wq(priv->ctx,
1972 			&(struct ibv_wq_init_attr){
1973 			.wq_type = IBV_WQT_RQ,
1974 			.max_wr = 1,
1975 			.max_sge = 1,
1976 			.pd = priv->pd,
1977 			.cq = fdq->cq,
1978 			});
1979 	if (!fdq->wq) {
1980 		WARN("cannot allocate WQ for drop queue");
1981 		goto error;
1982 	}
1983 	fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1984 			&(struct ibv_rwq_ind_table_init_attr){
1985 			.log_ind_tbl_size = 0,
1986 			.ind_tbl = &fdq->wq,
1987 			.comp_mask = 0,
1988 			});
1989 	if (!fdq->ind_table) {
1990 		WARN("cannot allocate indirection table for drop queue");
1991 		goto error;
1992 	}
1993 	fdq->qp = ibv_create_qp_ex(priv->ctx,
1994 		&(struct ibv_qp_init_attr_ex){
1995 			.qp_type = IBV_QPT_RAW_PACKET,
1996 			.comp_mask =
1997 				IBV_QP_INIT_ATTR_PD |
1998 				IBV_QP_INIT_ATTR_IND_TABLE |
1999 				IBV_QP_INIT_ATTR_RX_HASH,
2000 			.rx_hash_conf = (struct ibv_rx_hash_conf){
2001 				.rx_hash_function =
2002 					IBV_RX_HASH_FUNC_TOEPLITZ,
2003 				.rx_hash_key_len = rss_hash_default_key_len,
2004 				.rx_hash_key = rss_hash_default_key,
2005 				.rx_hash_fields_mask = 0,
2006 				},
2007 			.rwq_ind_tbl = fdq->ind_table,
2008 			.pd = priv->pd
2009 		});
2010 	if (!fdq->qp) {
2011 		WARN("cannot allocate QP for drop queue");
2012 		goto error;
2013 	}
2014 	priv->flow_drop_queue = fdq;
2015 	return 0;
2016 error:
2017 	if (fdq->qp)
2018 		claim_zero(ibv_destroy_qp(fdq->qp));
2019 	if (fdq->ind_table)
2020 		claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2021 	if (fdq->wq)
2022 		claim_zero(ibv_destroy_wq(fdq->wq));
2023 	if (fdq->cq)
2024 		claim_zero(ibv_destroy_cq(fdq->cq));
2025 	if (fdq)
2026 		rte_free(fdq);
2027 	priv->flow_drop_queue = NULL;
2028 	return -1;
2029 }
2030 
2031 /**
2032  * Delete drop queue.
2033  *
2034  * @param priv
2035  *   Pointer to private structure.
2036  */
2037 void
2038 priv_flow_delete_drop_queue(struct priv *priv)
2039 {
2040 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2041 
2042 	if (!fdq)
2043 		return;
2044 	if (fdq->qp)
2045 		claim_zero(ibv_destroy_qp(fdq->qp));
2046 	if (fdq->ind_table)
2047 		claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2048 	if (fdq->wq)
2049 		claim_zero(ibv_destroy_wq(fdq->wq));
2050 	if (fdq->cq)
2051 		claim_zero(ibv_destroy_cq(fdq->cq));
2052 	rte_free(fdq);
2053 	priv->flow_drop_queue = NULL;
2054 }
2055 
2056 /**
2057  * Remove all flows.
2058  *
2059  * @param priv
2060  *   Pointer to private structure.
2061  * @param list
2062  *   Pointer to a TAILQ flow list.
2063  */
2064 void
2065 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2066 {
2067 	struct rte_flow *flow;
2068 
2069 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2070 		unsigned int i;
2071 
2072 		if (flow->drop) {
2073 			if (!flow->drxq.ibv_flow)
2074 				continue;
2075 			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
2076 			flow->drxq.ibv_flow = NULL;
2077 			/* Next flow. */
2078 			continue;
2079 		}
2080 		if (flow->mark) {
2081 			struct mlx5_ind_table_ibv *ind_tbl = NULL;
2082 
2083 			for (i = 0; i != hash_rxq_init_n; ++i) {
2084 				if (!flow->frxq[i].hrxq)
2085 					continue;
2086 				ind_tbl = flow->frxq[i].hrxq->ind_table;
2087 			}
2088 			assert(ind_tbl);
2089 			for (i = 0; i != ind_tbl->queues_n; ++i)
2090 				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2091 		}
2092 		for (i = 0; i != hash_rxq_init_n; ++i) {
2093 			if (!flow->frxq[i].ibv_flow)
2094 				continue;
2095 			claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2096 			flow->frxq[i].ibv_flow = NULL;
2097 			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2098 			flow->frxq[i].hrxq = NULL;
2099 		}
2100 		DEBUG("Flow %p removed", (void *)flow);
2101 	}
2102 }
2103 
2104 /**
2105  * Add all flows.
2106  *
2107  * @param priv
2108  *   Pointer to private structure.
2109  * @param list
2110  *   Pointer to a TAILQ flow list.
2111  *
2112  * @return
2113  *   0 on success, a errno value otherwise and rte_errno is set.
2114  */
2115 int
2116 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2117 {
2118 	struct rte_flow *flow;
2119 
2120 	TAILQ_FOREACH(flow, list, next) {
2121 		unsigned int i;
2122 
2123 		if (flow->drop) {
2124 			flow->drxq.ibv_flow =
2125 				ibv_create_flow(priv->flow_drop_queue->qp,
2126 						flow->drxq.ibv_attr);
2127 			if (!flow->drxq.ibv_flow) {
2128 				DEBUG("Flow %p cannot be applied",
2129 				      (void *)flow);
2130 				rte_errno = EINVAL;
2131 				return rte_errno;
2132 			}
2133 			DEBUG("Flow %p applied", (void *)flow);
2134 			/* Next flow. */
2135 			continue;
2136 		}
2137 		for (i = 0; i != hash_rxq_init_n; ++i) {
2138 			if (!flow->frxq[i].ibv_attr)
2139 				continue;
2140 			flow->frxq[i].hrxq =
2141 				mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2142 						   flow->rss_conf.rss_key_len,
2143 						   hash_rxq_init[i].hash_fields,
2144 						   (*flow->queues),
2145 						   flow->queues_n);
2146 			if (flow->frxq[i].hrxq)
2147 				goto flow_create;
2148 			flow->frxq[i].hrxq =
2149 				mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2150 						   flow->rss_conf.rss_key_len,
2151 						   hash_rxq_init[i].hash_fields,
2152 						   (*flow->queues),
2153 						   flow->queues_n);
2154 			if (!flow->frxq[i].hrxq) {
2155 				DEBUG("Flow %p cannot be applied",
2156 				      (void *)flow);
2157 				rte_errno = EINVAL;
2158 				return rte_errno;
2159 			}
2160 flow_create:
2161 			flow->frxq[i].ibv_flow =
2162 				ibv_create_flow(flow->frxq[i].hrxq->qp,
2163 						flow->frxq[i].ibv_attr);
2164 			if (!flow->frxq[i].ibv_flow) {
2165 				DEBUG("Flow %p cannot be applied",
2166 				      (void *)flow);
2167 				rte_errno = EINVAL;
2168 				return rte_errno;
2169 			}
2170 			DEBUG("Flow %p applied", (void *)flow);
2171 		}
2172 		if (!flow->mark)
2173 			continue;
2174 		for (i = 0; i != flow->queues_n; ++i)
2175 			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2176 	}
2177 	return 0;
2178 }
2179 
2180 /**
2181  * Verify the flow list is empty
2182  *
2183  * @param priv
2184  *  Pointer to private structure.
2185  *
2186  * @return the number of flows not released.
2187  */
2188 int
2189 priv_flow_verify(struct priv *priv)
2190 {
2191 	struct rte_flow *flow;
2192 	int ret = 0;
2193 
2194 	TAILQ_FOREACH(flow, &priv->flows, next) {
2195 		DEBUG("%p: flow %p still referenced", (void *)priv,
2196 		      (void *)flow);
2197 		++ret;
2198 	}
2199 	return ret;
2200 }
2201 
2202 /**
2203  * Enable a control flow configured from the control plane.
2204  *
2205  * @param dev
2206  *   Pointer to Ethernet device.
2207  * @param eth_spec
2208  *   An Ethernet flow spec to apply.
2209  * @param eth_mask
2210  *   An Ethernet flow mask to apply.
2211  * @param vlan_spec
2212  *   A VLAN flow spec to apply.
2213  * @param vlan_mask
2214  *   A VLAN flow mask to apply.
2215  *
2216  * @return
2217  *   0 on success.
2218  */
2219 int
2220 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2221 		    struct rte_flow_item_eth *eth_spec,
2222 		    struct rte_flow_item_eth *eth_mask,
2223 		    struct rte_flow_item_vlan *vlan_spec,
2224 		    struct rte_flow_item_vlan *vlan_mask)
2225 {
2226 	struct priv *priv = dev->data->dev_private;
2227 	const struct rte_flow_attr attr = {
2228 		.ingress = 1,
2229 		.priority = MLX5_CTRL_FLOW_PRIORITY,
2230 	};
2231 	struct rte_flow_item items[] = {
2232 		{
2233 			.type = RTE_FLOW_ITEM_TYPE_ETH,
2234 			.spec = eth_spec,
2235 			.last = NULL,
2236 			.mask = eth_mask,
2237 		},
2238 		{
2239 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2240 				RTE_FLOW_ITEM_TYPE_END,
2241 			.spec = vlan_spec,
2242 			.last = NULL,
2243 			.mask = vlan_mask,
2244 		},
2245 		{
2246 			.type = RTE_FLOW_ITEM_TYPE_END,
2247 		},
2248 	};
2249 	struct rte_flow_action actions[] = {
2250 		{
2251 			.type = RTE_FLOW_ACTION_TYPE_RSS,
2252 		},
2253 		{
2254 			.type = RTE_FLOW_ACTION_TYPE_END,
2255 		},
2256 	};
2257 	struct rte_flow *flow;
2258 	struct rte_flow_error error;
2259 	unsigned int i;
2260 	union {
2261 		struct rte_flow_action_rss rss;
2262 		struct {
2263 			const struct rte_eth_rss_conf *rss_conf;
2264 			uint16_t num;
2265 			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2266 		} local;
2267 	} action_rss;
2268 
2269 	if (!priv->reta_idx_n)
2270 		return EINVAL;
2271 	for (i = 0; i != priv->reta_idx_n; ++i)
2272 		action_rss.local.queue[i] = (*priv->reta_idx)[i];
2273 	action_rss.local.rss_conf = &priv->rss_conf;
2274 	action_rss.local.num = priv->reta_idx_n;
2275 	actions[0].conf = (const void *)&action_rss.rss;
2276 	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2277 				&error);
2278 	if (!flow)
2279 		return rte_errno;
2280 	return 0;
2281 }
2282 
2283 /**
2284  * Enable a flow control configured from the control plane.
2285  *
2286  * @param dev
2287  *   Pointer to Ethernet device.
2288  * @param eth_spec
2289  *   An Ethernet flow spec to apply.
2290  * @param eth_mask
2291  *   An Ethernet flow mask to apply.
2292  *
2293  * @return
2294  *   0 on success.
2295  */
2296 int
2297 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2298 	       struct rte_flow_item_eth *eth_spec,
2299 	       struct rte_flow_item_eth *eth_mask)
2300 {
2301 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2302 }
2303 
2304 /**
2305  * Destroy a flow.
2306  *
2307  * @see rte_flow_destroy()
2308  * @see rte_flow_ops
2309  */
2310 int
2311 mlx5_flow_destroy(struct rte_eth_dev *dev,
2312 		  struct rte_flow *flow,
2313 		  struct rte_flow_error *error)
2314 {
2315 	struct priv *priv = dev->data->dev_private;
2316 
2317 	(void)error;
2318 	priv_lock(priv);
2319 	priv_flow_destroy(priv, &priv->flows, flow);
2320 	priv_unlock(priv);
2321 	return 0;
2322 }
2323 
2324 /**
2325  * Destroy all flows.
2326  *
2327  * @see rte_flow_flush()
2328  * @see rte_flow_ops
2329  */
2330 int
2331 mlx5_flow_flush(struct rte_eth_dev *dev,
2332 		struct rte_flow_error *error)
2333 {
2334 	struct priv *priv = dev->data->dev_private;
2335 
2336 	(void)error;
2337 	priv_lock(priv);
2338 	priv_flow_flush(priv, &priv->flows);
2339 	priv_unlock(priv);
2340 	return 0;
2341 }
2342 
2343 /**
2344  * Isolated mode.
2345  *
2346  * @see rte_flow_isolate()
2347  * @see rte_flow_ops
2348  */
2349 int
2350 mlx5_flow_isolate(struct rte_eth_dev *dev,
2351 		  int enable,
2352 		  struct rte_flow_error *error)
2353 {
2354 	struct priv *priv = dev->data->dev_private;
2355 
2356 	priv_lock(priv);
2357 	if (dev->data->dev_started) {
2358 		rte_flow_error_set(error, EBUSY,
2359 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2360 				   NULL,
2361 				   "port must be stopped first");
2362 		priv_unlock(priv);
2363 		return -rte_errno;
2364 	}
2365 	priv->isolated = !!enable;
2366 	if (enable)
2367 		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2368 	else
2369 		priv->dev->dev_ops = &mlx5_dev_ops;
2370 	priv_unlock(priv);
2371 	return 0;
2372 }
2373 
2374 /**
2375  * Convert a flow director filter to a generic flow.
2376  *
2377  * @param priv
2378  *   Private structure.
2379  * @param fdir_filter
2380  *   Flow director filter to add.
2381  * @param attributes
2382  *   Generic flow parameters structure.
2383  *
2384  * @return
2385  *  0 on success, errno value on error.
2386  */
2387 static int
2388 priv_fdir_filter_convert(struct priv *priv,
2389 			 const struct rte_eth_fdir_filter *fdir_filter,
2390 			 struct mlx5_fdir *attributes)
2391 {
2392 	const struct rte_eth_fdir_input *input = &fdir_filter->input;
2393 
2394 	/* Validate queue number. */
2395 	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2396 		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2397 		return EINVAL;
2398 	}
2399 	/* Validate the behavior. */
2400 	if (fdir_filter->action.behavior != RTE_ETH_FDIR_ACCEPT) {
2401 		ERROR("invalid behavior %d", fdir_filter->action.behavior);
2402 		return ENOTSUP;
2403 	}
2404 	attributes->attr.ingress = 1;
2405 	attributes->items[0] = (struct rte_flow_item) {
2406 		.type = RTE_FLOW_ITEM_TYPE_ETH,
2407 		.spec = &attributes->l2,
2408 	};
2409 	attributes->actions[0] = (struct rte_flow_action){
2410 		.type = RTE_FLOW_ACTION_TYPE_QUEUE,
2411 		.conf = &attributes->queue,
2412 	};
2413 	attributes->queue.index = fdir_filter->action.rx_queue;
2414 	switch (fdir_filter->input.flow_type) {
2415 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2416 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2417 			.src_addr = input->flow.udp4_flow.ip.src_ip,
2418 			.dst_addr = input->flow.udp4_flow.ip.dst_ip,
2419 			.time_to_live = input->flow.udp4_flow.ip.ttl,
2420 			.type_of_service = input->flow.udp4_flow.ip.tos,
2421 			.next_proto_id = input->flow.udp4_flow.ip.proto,
2422 		};
2423 		attributes->l4.udp.hdr = (struct udp_hdr){
2424 			.src_port = input->flow.udp4_flow.src_port,
2425 			.dst_port = input->flow.udp4_flow.dst_port,
2426 		};
2427 		attributes->items[1] = (struct rte_flow_item){
2428 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2429 			.spec = &attributes->l3,
2430 		};
2431 		attributes->items[2] = (struct rte_flow_item){
2432 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2433 			.spec = &attributes->l4,
2434 		};
2435 		break;
2436 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2437 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2438 			.src_addr = input->flow.tcp4_flow.ip.src_ip,
2439 			.dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2440 			.time_to_live = input->flow.tcp4_flow.ip.ttl,
2441 			.type_of_service = input->flow.tcp4_flow.ip.tos,
2442 			.next_proto_id = input->flow.tcp4_flow.ip.proto,
2443 		};
2444 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2445 			.src_port = input->flow.tcp4_flow.src_port,
2446 			.dst_port = input->flow.tcp4_flow.dst_port,
2447 		};
2448 		attributes->items[1] = (struct rte_flow_item){
2449 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2450 			.spec = &attributes->l3,
2451 		};
2452 		attributes->items[2] = (struct rte_flow_item){
2453 			.type = RTE_FLOW_ITEM_TYPE_TCP,
2454 			.spec = &attributes->l4,
2455 		};
2456 		break;
2457 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2458 		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2459 			.src_addr = input->flow.ip4_flow.src_ip,
2460 			.dst_addr = input->flow.ip4_flow.dst_ip,
2461 			.time_to_live = input->flow.ip4_flow.ttl,
2462 			.type_of_service = input->flow.ip4_flow.tos,
2463 			.next_proto_id = input->flow.ip4_flow.proto,
2464 		};
2465 		attributes->items[1] = (struct rte_flow_item){
2466 			.type = RTE_FLOW_ITEM_TYPE_IPV4,
2467 			.spec = &attributes->l3,
2468 		};
2469 		break;
2470 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2471 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2472 			.hop_limits = input->flow.udp6_flow.ip.hop_limits,
2473 			.proto = input->flow.udp6_flow.ip.proto,
2474 		};
2475 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2476 		       input->flow.udp6_flow.ip.src_ip,
2477 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2478 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2479 		       input->flow.udp6_flow.ip.dst_ip,
2480 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2481 		attributes->l4.udp.hdr = (struct udp_hdr){
2482 			.src_port = input->flow.udp6_flow.src_port,
2483 			.dst_port = input->flow.udp6_flow.dst_port,
2484 		};
2485 		attributes->items[1] = (struct rte_flow_item){
2486 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2487 			.spec = &attributes->l3,
2488 		};
2489 		attributes->items[2] = (struct rte_flow_item){
2490 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2491 			.spec = &attributes->l4,
2492 		};
2493 		break;
2494 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2495 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2496 			.hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2497 			.proto = input->flow.tcp6_flow.ip.proto,
2498 		};
2499 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2500 		       input->flow.tcp6_flow.ip.src_ip,
2501 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2502 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2503 		       input->flow.tcp6_flow.ip.dst_ip,
2504 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2505 		attributes->l4.tcp.hdr = (struct tcp_hdr){
2506 			.src_port = input->flow.tcp6_flow.src_port,
2507 			.dst_port = input->flow.tcp6_flow.dst_port,
2508 		};
2509 		attributes->items[1] = (struct rte_flow_item){
2510 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2511 			.spec = &attributes->l3,
2512 		};
2513 		attributes->items[2] = (struct rte_flow_item){
2514 			.type = RTE_FLOW_ITEM_TYPE_UDP,
2515 			.spec = &attributes->l4,
2516 		};
2517 		break;
2518 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2519 		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2520 			.hop_limits = input->flow.ipv6_flow.hop_limits,
2521 			.proto = input->flow.ipv6_flow.proto,
2522 		};
2523 		memcpy(attributes->l3.ipv6.hdr.src_addr,
2524 		       input->flow.ipv6_flow.src_ip,
2525 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2526 		memcpy(attributes->l3.ipv6.hdr.dst_addr,
2527 		       input->flow.ipv6_flow.dst_ip,
2528 		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2529 		attributes->items[1] = (struct rte_flow_item){
2530 			.type = RTE_FLOW_ITEM_TYPE_IPV6,
2531 			.spec = &attributes->l3,
2532 		};
2533 		break;
2534 	default:
2535 		ERROR("invalid flow type%d",
2536 		      fdir_filter->input.flow_type);
2537 		return ENOTSUP;
2538 	}
2539 	return 0;
2540 }
2541 
2542 /**
2543  * Add new flow director filter and store it in list.
2544  *
2545  * @param priv
2546  *   Private structure.
2547  * @param fdir_filter
2548  *   Flow director filter to add.
2549  *
2550  * @return
2551  *   0 on success, errno value on failure.
2552  */
2553 static int
2554 priv_fdir_filter_add(struct priv *priv,
2555 		     const struct rte_eth_fdir_filter *fdir_filter)
2556 {
2557 	struct mlx5_fdir attributes = {
2558 		.attr.group = 0,
2559 	};
2560 	struct mlx5_flow_parse parser = {
2561 		.layer = HASH_RXQ_ETH,
2562 	};
2563 	struct rte_flow_error error;
2564 	struct rte_flow *flow;
2565 	int ret;
2566 
2567 	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2568 	if (ret)
2569 		return -ret;
2570 	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2571 				attributes.actions, &error, &parser);
2572 	if (ret)
2573 		return -ret;
2574 	flow = priv_flow_create(priv,
2575 				&priv->flows,
2576 				&attributes.attr,
2577 				attributes.items,
2578 				attributes.actions,
2579 				&error);
2580 	if (flow) {
2581 		TAILQ_INSERT_TAIL(&priv->flows, flow, next);
2582 		DEBUG("FDIR created %p", (void *)flow);
2583 		return 0;
2584 	}
2585 	return ENOTSUP;
2586 }
2587 
2588 /**
2589  * Delete specific filter.
2590  *
2591  * @param priv
2592  *   Private structure.
2593  * @param fdir_filter
2594  *   Filter to be deleted.
2595  *
2596  * @return
2597  *   0 on success, errno value on failure.
2598  */
2599 static int
2600 priv_fdir_filter_delete(struct priv *priv,
2601 			const struct rte_eth_fdir_filter *fdir_filter)
2602 {
2603 	struct mlx5_fdir attributes;
2604 	struct mlx5_flow_parse parser = {
2605 		.create = 1,
2606 		.layer = HASH_RXQ_ETH,
2607 	};
2608 	struct rte_flow_error error;
2609 	struct rte_flow *flow;
2610 	unsigned int i;
2611 	int ret;
2612 
2613 	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2614 	if (ret)
2615 		return -ret;
2616 	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2617 				attributes.actions, &error, &parser);
2618 	if (ret)
2619 		goto exit;
2620 	TAILQ_FOREACH(flow, &priv->flows, next) {
2621 		struct ibv_flow_attr *attr;
2622 		struct ibv_spec_header *attr_h;
2623 		void *spec;
2624 		struct ibv_flow_attr *flow_attr;
2625 		struct ibv_spec_header *flow_h;
2626 		void *flow_spec;
2627 		unsigned int specs_n;
2628 
2629 		if (parser.drop)
2630 			attr = parser.drop_q.ibv_attr;
2631 		else
2632 			attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2633 		if (flow->drop)
2634 			flow_attr = flow->drxq.ibv_attr;
2635 		else
2636 			flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2637 		/* Compare first the attributes. */
2638 		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2639 			continue;
2640 		if (attr->num_of_specs == 0)
2641 			continue;
2642 		spec = (void *)((uintptr_t)attr +
2643 				sizeof(struct ibv_flow_attr));
2644 		flow_spec = (void *)((uintptr_t)flow_attr +
2645 				     sizeof(struct ibv_flow_attr));
2646 		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2647 		for (i = 0; i != specs_n; ++i) {
2648 			attr_h = spec;
2649 			flow_h = flow_spec;
2650 			if (memcmp(spec, flow_spec,
2651 				   RTE_MIN(attr_h->size, flow_h->size)))
2652 				continue;
2653 			spec = (void *)((uintptr_t)attr + attr_h->size);
2654 			flow_spec = (void *)((uintptr_t)flow_attr +
2655 					     flow_h->size);
2656 		}
2657 		/* At this point, the flow match. */
2658 		break;
2659 	}
2660 	if (flow)
2661 		priv_flow_destroy(priv, &priv->flows, flow);
2662 exit:
2663 	if (parser.drop) {
2664 		rte_free(parser.drop_q.ibv_attr);
2665 	} else {
2666 		for (i = 0; i != hash_rxq_init_n; ++i) {
2667 			if (parser.queue[i].ibv_attr)
2668 				rte_free(parser.queue[i].ibv_attr);
2669 		}
2670 	}
2671 	return -ret;
2672 }
2673 
2674 /**
2675  * Update queue for specific filter.
2676  *
2677  * @param priv
2678  *   Private structure.
2679  * @param fdir_filter
2680  *   Filter to be updated.
2681  *
2682  * @return
2683  *   0 on success, errno value on failure.
2684  */
2685 static int
2686 priv_fdir_filter_update(struct priv *priv,
2687 			const struct rte_eth_fdir_filter *fdir_filter)
2688 {
2689 	int ret;
2690 
2691 	ret = priv_fdir_filter_delete(priv, fdir_filter);
2692 	if (ret)
2693 		return ret;
2694 	ret = priv_fdir_filter_add(priv, fdir_filter);
2695 	return ret;
2696 }
2697 
2698 /**
2699  * Flush all filters.
2700  *
2701  * @param priv
2702  *   Private structure.
2703  */
2704 static void
2705 priv_fdir_filter_flush(struct priv *priv)
2706 {
2707 	priv_flow_flush(priv, &priv->flows);
2708 }
2709 
2710 /**
2711  * Get flow director information.
2712  *
2713  * @param priv
2714  *   Private structure.
2715  * @param[out] fdir_info
2716  *   Resulting flow director information.
2717  */
2718 static void
2719 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2720 {
2721 	struct rte_eth_fdir_masks *mask =
2722 		&priv->dev->data->dev_conf.fdir_conf.mask;
2723 
2724 	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2725 	fdir_info->guarant_spc = 0;
2726 	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2727 	fdir_info->max_flexpayload = 0;
2728 	fdir_info->flow_types_mask[0] = 0;
2729 	fdir_info->flex_payload_unit = 0;
2730 	fdir_info->max_flex_payload_segment_num = 0;
2731 	fdir_info->flex_payload_limit = 0;
2732 	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2733 }
2734 
2735 /**
2736  * Deal with flow director operations.
2737  *
2738  * @param priv
2739  *   Pointer to private structure.
2740  * @param filter_op
2741  *   Operation to perform.
2742  * @param arg
2743  *   Pointer to operation-specific structure.
2744  *
2745  * @return
2746  *   0 on success, errno value on failure.
2747  */
2748 static int
2749 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2750 {
2751 	enum rte_fdir_mode fdir_mode =
2752 		priv->dev->data->dev_conf.fdir_conf.mode;
2753 	int ret = 0;
2754 
2755 	if (filter_op == RTE_ETH_FILTER_NOP)
2756 		return 0;
2757 	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2758 	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2759 		ERROR("%p: flow director mode %d not supported",
2760 		      (void *)priv, fdir_mode);
2761 		return EINVAL;
2762 	}
2763 	switch (filter_op) {
2764 	case RTE_ETH_FILTER_ADD:
2765 		ret = priv_fdir_filter_add(priv, arg);
2766 		break;
2767 	case RTE_ETH_FILTER_UPDATE:
2768 		ret = priv_fdir_filter_update(priv, arg);
2769 		break;
2770 	case RTE_ETH_FILTER_DELETE:
2771 		ret = priv_fdir_filter_delete(priv, arg);
2772 		break;
2773 	case RTE_ETH_FILTER_FLUSH:
2774 		priv_fdir_filter_flush(priv);
2775 		break;
2776 	case RTE_ETH_FILTER_INFO:
2777 		priv_fdir_info_get(priv, arg);
2778 		break;
2779 	default:
2780 		DEBUG("%p: unknown operation %u", (void *)priv,
2781 		      filter_op);
2782 		ret = EINVAL;
2783 		break;
2784 	}
2785 	return ret;
2786 }
2787 
2788 /**
2789  * Manage filter operations.
2790  *
2791  * @param dev
2792  *   Pointer to Ethernet device structure.
2793  * @param filter_type
2794  *   Filter type.
2795  * @param filter_op
2796  *   Operation to perform.
2797  * @param arg
2798  *   Pointer to operation-specific structure.
2799  *
2800  * @return
2801  *   0 on success, negative errno value on failure.
2802  */
2803 int
2804 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2805 		     enum rte_filter_type filter_type,
2806 		     enum rte_filter_op filter_op,
2807 		     void *arg)
2808 {
2809 	int ret = EINVAL;
2810 	struct priv *priv = dev->data->dev_private;
2811 
2812 	switch (filter_type) {
2813 	case RTE_ETH_FILTER_GENERIC:
2814 		if (filter_op != RTE_ETH_FILTER_GET)
2815 			return -EINVAL;
2816 		*(const void **)arg = &mlx5_flow_ops;
2817 		return 0;
2818 	case RTE_ETH_FILTER_FDIR:
2819 		priv_lock(priv);
2820 		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
2821 		priv_unlock(priv);
2822 		break;
2823 	default:
2824 		ERROR("%p: filter type (%d) not supported",
2825 		      (void *)dev, filter_type);
2826 		break;
2827 	}
2828 	return -ret;
2829 }
2830