xref: /dpdk/drivers/net/tap/tap_flow.c (revision 43fd3624fdfe3a33904a9b64d94306dd3d4f2c13)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <errno.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sys/queue.h>
10 #include <sys/resource.h>
11 
12 #include <rte_byteorder.h>
13 #include <rte_jhash.h>
14 #include <rte_thash.h>
15 #include <rte_random.h>
16 #include <rte_malloc.h>
17 #include <rte_eth_tap.h>
18 #include <rte_uuid.h>
19 
20 #include <tap_flow.h>
21 #include <tap_tcmsgs.h>
22 #include <tap_rss.h>
23 
24 #ifdef HAVE_BPF_RSS
25 /* Workaround for warning in bpftool generated skeleton code */
26 __rte_diagnostic_push
27 __rte_diagnostic_ignored_wcast_qual
28 #include "tap_rss.skel.h"
29 __rte_diagnostic_pop
30 #endif
31 
32 #define ISOLATE_HANDLE 1
33 #define REMOTE_PROMISCUOUS_HANDLE 2
34 
35 struct rte_flow {
36 	LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */
37 	struct rte_flow *remote_flow; /* associated remote flow */
38 	struct tap_nlmsg msg;
39 };
40 
41 struct convert_data {
42 	uint16_t eth_type;
43 	uint16_t ip_proto;
44 	uint8_t vlan;
45 	struct rte_flow *flow;
46 };
47 
48 struct remote_rule {
49 	struct rte_flow_attr attr;
50 	struct rte_flow_item items[2];
51 	struct rte_flow_action actions[2];
52 	int mirred;
53 };
54 
55 struct action_data {
56 	char id[16];
57 
58 	union {
59 		struct tc_gact gact;
60 		struct tc_mirred mirred;
61 		struct skbedit {
62 			struct tc_skbedit skbedit;
63 			uint16_t queue;
64 			uint32_t mark;
65 		} skbedit;
66 #ifdef HAVE_BPF_RSS
67 		struct bpf {
68 			struct tc_act_bpf bpf;
69 			uint32_t map_key;
70 			int bpf_fd;
71 			const char *annotation;
72 		} bpf;
73 #endif
74 	};
75 };
76 
77 static int tap_flow_create_eth(const struct rte_flow_item *item, struct convert_data *info);
78 static int tap_flow_create_vlan(const struct rte_flow_item *item, struct convert_data *info);
79 static int tap_flow_create_ipv4(const struct rte_flow_item *item, struct convert_data *info);
80 static int tap_flow_create_ipv6(const struct rte_flow_item *item, struct convert_data *info);
81 static int tap_flow_create_udp(const struct rte_flow_item *item, struct convert_data *info);
82 static int tap_flow_create_tcp(const struct rte_flow_item *item, struct convert_data *info);
83 static int
84 tap_flow_validate(struct rte_eth_dev *dev,
85 		  const struct rte_flow_attr *attr,
86 		  const struct rte_flow_item items[],
87 		  const struct rte_flow_action actions[],
88 		  struct rte_flow_error *error);
89 
90 static struct rte_flow *
91 tap_flow_create(struct rte_eth_dev *dev,
92 		const struct rte_flow_attr *attr,
93 		const struct rte_flow_item items[],
94 		const struct rte_flow_action actions[],
95 		struct rte_flow_error *error);
96 
97 static void
98 tap_flow_free(struct pmd_internals *pmd,
99 	struct rte_flow *flow);
100 
101 static int
102 tap_flow_destroy(struct rte_eth_dev *dev,
103 		 struct rte_flow *flow,
104 		 struct rte_flow_error *error);
105 
106 static int
107 tap_flow_isolate(struct rte_eth_dev *dev,
108 		 int set,
109 		 struct rte_flow_error *error);
110 
111 #ifdef HAVE_BPF_RSS
112 static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error);
113 static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,
114 			const struct rte_flow_action_rss *rss,
115 			struct rte_flow_error *error);
116 #endif
117 
118 static const struct rte_flow_ops tap_flow_ops = {
119 	.validate = tap_flow_validate,
120 	.create = tap_flow_create,
121 	.destroy = tap_flow_destroy,
122 	.flush = tap_flow_flush,
123 	.isolate = tap_flow_isolate,
124 };
125 
126 /* Static initializer for items. */
127 #define ITEMS(...) \
128 	(const enum rte_flow_item_type []){ \
129 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
130 	}
131 
132 /* Structure to generate a simple graph of layers supported by the NIC. */
133 struct tap_flow_items {
134 	/* Bit-mask corresponding to what is supported for this item. */
135 	const void *mask;
136 	const unsigned int mask_sz; /* Bit-mask size in bytes. */
137 	/*
138 	 * Bit-mask corresponding to the default mask, if none is provided
139 	 * along with the item.
140 	 */
141 	const void *default_mask;
142 	/* Conversion function from rte_flow to netlink attributes. */
143 	int (*convert)(const struct rte_flow_item *item, struct convert_data *info);
144 
145 	/* List of possible following items.  */
146 	const enum rte_flow_item_type *const items;
147 };
148 
149 /* Graph of supported items and associated actions. */
150 static const struct tap_flow_items tap_flow_items[] = {
151 	[RTE_FLOW_ITEM_TYPE_END] = {
152 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
153 	},
154 	[RTE_FLOW_ITEM_TYPE_ETH] = {
155 		.items = ITEMS(
156 			RTE_FLOW_ITEM_TYPE_VLAN,
157 			RTE_FLOW_ITEM_TYPE_IPV4,
158 			RTE_FLOW_ITEM_TYPE_IPV6),
159 		.mask = &(const struct rte_flow_item_eth){
160 			.hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
161 			.hdr.src_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
162 			.hdr.ether_type = -1,
163 		},
164 		.mask_sz = sizeof(struct rte_flow_item_eth),
165 		.default_mask = &rte_flow_item_eth_mask,
166 		.convert = tap_flow_create_eth,
167 	},
168 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
169 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
170 			       RTE_FLOW_ITEM_TYPE_IPV6),
171 		.mask = &(const struct rte_flow_item_vlan){
172 			/* DEI matching is not supported */
173 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
174 			.hdr.vlan_tci = 0xffef,
175 #else
176 			.hdr.vlan_tci = 0xefff,
177 #endif
178 			.hdr.eth_proto = -1,
179 		},
180 		.mask_sz = sizeof(struct rte_flow_item_vlan),
181 		.default_mask = &rte_flow_item_vlan_mask,
182 		.convert = tap_flow_create_vlan,
183 	},
184 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
185 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
186 			       RTE_FLOW_ITEM_TYPE_TCP),
187 		.mask = &(const struct rte_flow_item_ipv4){
188 			.hdr = {
189 				.src_addr = -1,
190 				.dst_addr = -1,
191 				.next_proto_id = -1,
192 			},
193 		},
194 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
195 		.default_mask = &rte_flow_item_ipv4_mask,
196 		.convert = tap_flow_create_ipv4,
197 	},
198 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
199 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
200 			       RTE_FLOW_ITEM_TYPE_TCP),
201 		.mask = &(const struct rte_flow_item_ipv6){
202 			.hdr = {
203 				.src_addr = RTE_IPV6_MASK_FULL,
204 				.dst_addr = RTE_IPV6_MASK_FULL,
205 				.proto = -1,
206 			},
207 		},
208 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
209 		.default_mask = &rte_flow_item_ipv6_mask,
210 		.convert = tap_flow_create_ipv6,
211 	},
212 	[RTE_FLOW_ITEM_TYPE_UDP] = {
213 		.mask = &(const struct rte_flow_item_udp){
214 			.hdr = {
215 				.src_port = -1,
216 				.dst_port = -1,
217 			},
218 		},
219 		.mask_sz = sizeof(struct rte_flow_item_udp),
220 		.default_mask = &rte_flow_item_udp_mask,
221 		.convert = tap_flow_create_udp,
222 	},
223 	[RTE_FLOW_ITEM_TYPE_TCP] = {
224 		.mask = &(const struct rte_flow_item_tcp){
225 			.hdr = {
226 				.src_port = -1,
227 				.dst_port = -1,
228 			},
229 		},
230 		.mask_sz = sizeof(struct rte_flow_item_tcp),
231 		.default_mask = &rte_flow_item_tcp_mask,
232 		.convert = tap_flow_create_tcp,
233 	},
234 };
235 
236 /*
237  *                TC rules, by growing priority
238  *
239  *        Remote netdevice                  Tap netdevice
240  * +-------------+-------------+  +-------------+-------------+
241  * |   Ingress   |   Egress    |  |   Ingress   |   Egress    |
242  * |-------------|-------------|  |-------------|-------------|
243  * |             |  \       /  |  |             |  REMOTE TX  | prio 1
244  * |             |   \     /   |  |             |   \     /   | prio 2
245  * |  EXPLICIT   |    \   /    |  |  EXPLICIT   |    \   /    |   .
246  * |             |     \ /     |  |             |     \ /     |   .
247  * |    RULES    |      X      |  |    RULES    |      X      |   .
248  * |      .      |     / \     |  |      .      |     / \     |   .
249  * |      .      |    /   \    |  |      .      |    /   \    |   .
250  * |      .      |   /     \   |  |      .      |   /     \   |   .
251  * |      .      |  /       \  |  |      .      |  /       \  |   .
252  *
253  *      ....           ....           ....           ....
254  *
255  * |      .      |  \       /  |  |      .      |  \       /  |   .
256  * |      .      |   \     /   |  |      .      |   \     /   |   .
257  * |             |    \   /    |  |             |    \   /    |
258  * |  LOCAL_MAC  |     \ /     |  |    \   /    |     \ /     | last prio - 5
259  * |   PROMISC   |      X      |  |     \ /     |      X      | last prio - 4
260  * |   ALLMULTI  |     / \     |  |      X      |     / \     | last prio - 3
261  * |  BROADCAST  |    /   \    |  |     / \     |    /   \    | last prio - 2
262  * | BROADCASTV6 |   /     \   |  |    /   \    |   /     \   | last prio - 1
263  * |     xx      |  /       \  |  |   ISOLATE   |  /       \  | last prio
264  * +-------------+-------------+  +-------------+-------------+
265  *
266  * The implicit flow rules are stored in a list in with mandatorily the last two
267  * being the ISOLATE and REMOTE_TX rules. e.g.:
268  *
269  * LOCAL_MAC -> BROADCAST -> BROADCASTV6 -> REMOTE_TX -> ISOLATE -> NULL
270  *
271  * That enables tap_flow_isolate() to remove implicit rules by popping the list
272  * head and remove it as long as it applies on the remote netdevice. The
273  * implicit rule for TX redirection is not removed, as isolate concerns only
274  * incoming traffic.
275  */
276 
277 static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = {
278 	[TAP_REMOTE_LOCAL_MAC] = {
279 		.attr = {
280 			.group = MAX_GROUP,
281 			.priority = PRIORITY_MASK - TAP_REMOTE_LOCAL_MAC,
282 			.ingress = 1,
283 		},
284 		.items[0] = {
285 			.type = RTE_FLOW_ITEM_TYPE_ETH,
286 			.mask =  &(const struct rte_flow_item_eth){
287 				.hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
288 			},
289 		},
290 		.items[1] = {
291 			.type = RTE_FLOW_ITEM_TYPE_END,
292 		},
293 		.mirred = TCA_EGRESS_REDIR,
294 	},
295 	[TAP_REMOTE_BROADCAST] = {
296 		.attr = {
297 			.group = MAX_GROUP,
298 			.priority = PRIORITY_MASK - TAP_REMOTE_BROADCAST,
299 			.ingress = 1,
300 		},
301 		.items[0] = {
302 			.type = RTE_FLOW_ITEM_TYPE_ETH,
303 			.mask =  &(const struct rte_flow_item_eth){
304 				.hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
305 			},
306 			.spec = &(const struct rte_flow_item_eth){
307 				.hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
308 			},
309 		},
310 		.items[1] = {
311 			.type = RTE_FLOW_ITEM_TYPE_END,
312 		},
313 		.mirred = TCA_EGRESS_MIRROR,
314 	},
315 	[TAP_REMOTE_BROADCASTV6] = {
316 		.attr = {
317 			.group = MAX_GROUP,
318 			.priority = PRIORITY_MASK - TAP_REMOTE_BROADCASTV6,
319 			.ingress = 1,
320 		},
321 		.items[0] = {
322 			.type = RTE_FLOW_ITEM_TYPE_ETH,
323 			.mask =  &(const struct rte_flow_item_eth){
324 				.hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 },
325 			},
326 			.spec = &(const struct rte_flow_item_eth){
327 				.hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 },
328 			},
329 		},
330 		.items[1] = {
331 			.type = RTE_FLOW_ITEM_TYPE_END,
332 		},
333 		.mirred = TCA_EGRESS_MIRROR,
334 	},
335 	[TAP_REMOTE_PROMISC] = {
336 		.attr = {
337 			.group = MAX_GROUP,
338 			.priority = PRIORITY_MASK - TAP_REMOTE_PROMISC,
339 			.ingress = 1,
340 		},
341 		.items[0] = {
342 			.type = RTE_FLOW_ITEM_TYPE_VOID,
343 		},
344 		.items[1] = {
345 			.type = RTE_FLOW_ITEM_TYPE_END,
346 		},
347 		.mirred = TCA_EGRESS_MIRROR,
348 	},
349 	[TAP_REMOTE_ALLMULTI] = {
350 		.attr = {
351 			.group = MAX_GROUP,
352 			.priority = PRIORITY_MASK - TAP_REMOTE_ALLMULTI,
353 			.ingress = 1,
354 		},
355 		.items[0] = {
356 			.type = RTE_FLOW_ITEM_TYPE_ETH,
357 			.mask =  &(const struct rte_flow_item_eth){
358 				.hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 },
359 			},
360 			.spec = &(const struct rte_flow_item_eth){
361 				.hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 },
362 			},
363 		},
364 		.items[1] = {
365 			.type = RTE_FLOW_ITEM_TYPE_END,
366 		},
367 		.mirred = TCA_EGRESS_MIRROR,
368 	},
369 	[TAP_REMOTE_TX] = {
370 		.attr = {
371 			.group = 0,
372 			.priority = TAP_REMOTE_TX,
373 			.egress = 1,
374 		},
375 		.items[0] = {
376 			.type = RTE_FLOW_ITEM_TYPE_VOID,
377 		},
378 		.items[1] = {
379 			.type = RTE_FLOW_ITEM_TYPE_END,
380 		},
381 		.mirred = TCA_EGRESS_MIRROR,
382 	},
383 	[TAP_ISOLATE] = {
384 		.attr = {
385 			.group = MAX_GROUP,
386 			.priority = PRIORITY_MASK - TAP_ISOLATE,
387 			.ingress = 1,
388 		},
389 		.items[0] = {
390 			.type = RTE_FLOW_ITEM_TYPE_VOID,
391 		},
392 		.items[1] = {
393 			.type = RTE_FLOW_ITEM_TYPE_END,
394 		},
395 	},
396 };
397 
398 /**
399  * Make as much checks as possible on an Ethernet item, and if a flow is
400  * provided, fill it appropriately with Ethernet info.
401  *
402  * @param[in] item
403  *   Item specification.
404  * @param[in, out] data
405  *   Additional data structure to tell next layers we've been here.
406  *
407  * @return
408  *   0 if checks are alright, -1 otherwise.
409  */
410 static int
411 tap_flow_create_eth(const struct rte_flow_item *item, struct convert_data *info)
412 {
413 	const struct rte_flow_item_eth *spec = item->spec;
414 	const struct rte_flow_item_eth *mask = item->mask;
415 	struct rte_flow *flow = info->flow;
416 	struct tap_nlmsg *msg;
417 
418 	/* use default mask if none provided */
419 	if (!mask)
420 		mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_ETH].default_mask;
421 	/* TC does not support eth_type masking. Only accept if exact match. */
422 	if (mask->hdr.ether_type && mask->hdr.ether_type != 0xffff)
423 		return -1;
424 	if (!spec)
425 		return 0;
426 	/* store eth_type for consistency if ipv4/6 pattern item comes next */
427 	if (spec->hdr.ether_type & mask->hdr.ether_type)
428 		info->eth_type = spec->hdr.ether_type;
429 	if (!flow)
430 		return 0;
431 	msg = &flow->msg;
432 	if (!rte_is_zero_ether_addr(&mask->hdr.dst_addr)) {
433 		tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST,
434 			RTE_ETHER_ADDR_LEN,
435 			   &spec->hdr.dst_addr.addr_bytes);
436 		tap_nlattr_add(&msg->nh,
437 			   TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN,
438 			   &mask->hdr.dst_addr.addr_bytes);
439 	}
440 	if (!rte_is_zero_ether_addr(&mask->hdr.src_addr)) {
441 		tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC,
442 			RTE_ETHER_ADDR_LEN,
443 			&spec->hdr.src_addr.addr_bytes);
444 		tap_nlattr_add(&msg->nh,
445 			   TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN,
446 			   &mask->hdr.src_addr.addr_bytes);
447 	}
448 	return 0;
449 }
450 
451 /**
452  * Make as much checks as possible on a VLAN item, and if a flow is provided,
453  * fill it appropriately with VLAN info.
454  *
455  * @param[in] item
456  *   Item specification.
457  * @param[in, out] data
458  *   Additional data structure to tell next layers we've been here.
459  *
460  * @return
461  *   0 if checks are alright, -1 otherwise.
462  */
463 static int
464 tap_flow_create_vlan(const struct rte_flow_item *item, struct convert_data *info)
465 {
466 	const struct rte_flow_item_vlan *spec = item->spec;
467 	const struct rte_flow_item_vlan *mask = item->mask;
468 	struct rte_flow *flow = info->flow;
469 	struct tap_nlmsg *msg;
470 
471 	/* use default mask if none provided */
472 	if (!mask)
473 		mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_VLAN].default_mask;
474 	/* Outer TPID cannot be matched. */
475 	if (info->eth_type)
476 		return -1;
477 	/* Double-tagging not supported. */
478 	if (info->vlan)
479 		return -1;
480 	info->vlan = 1;
481 	if (mask->hdr.eth_proto) {
482 		/* TC does not support partial eth_type masking */
483 		if (mask->hdr.eth_proto != RTE_BE16(0xffff))
484 			return -1;
485 		info->eth_type = spec->hdr.eth_proto;
486 	}
487 	if (!flow)
488 		return 0;
489 	msg = &flow->msg;
490 	msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_8021Q));
491 #define VLAN_PRIO(tci) ((tci) >> 13)
492 #define VLAN_ID(tci) ((tci) & 0xfff)
493 	if (!spec)
494 		return 0;
495 	if (spec->hdr.vlan_tci) {
496 		uint16_t tci = ntohs(spec->hdr.vlan_tci) & mask->hdr.vlan_tci;
497 		uint16_t prio = VLAN_PRIO(tci);
498 		uint8_t vid = VLAN_ID(tci);
499 
500 		if (prio)
501 			tap_nlattr_add8(&msg->nh,
502 					TCA_FLOWER_KEY_VLAN_PRIO, prio);
503 		if (vid)
504 			tap_nlattr_add16(&msg->nh,
505 					 TCA_FLOWER_KEY_VLAN_ID, vid);
506 	}
507 	return 0;
508 }
509 
510 /**
511  * Make as much checks as possible on an IPv4 item, and if a flow is provided,
512  * fill it appropriately with IPv4 info.
513  *
514  * @param[in] item
515  *   Item specification.
516  * @param[in, out] data
517  *   Additional data structure to tell next layers we've been here.
518  *
519  * @return
520  *   0 if checks are alright, -1 otherwise.
521  */
522 static int
523 tap_flow_create_ipv4(const struct rte_flow_item *item, struct convert_data *info)
524 {
525 	const struct rte_flow_item_ipv4 *spec = item->spec;
526 	const struct rte_flow_item_ipv4 *mask = item->mask;
527 	struct rte_flow *flow = info->flow;
528 	struct tap_nlmsg *msg;
529 
530 	/* use default mask if none provided */
531 	if (!mask)
532 		mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV4].default_mask;
533 	/* check that previous eth type is compatible with ipv4 */
534 	if (info->eth_type && info->eth_type != htons(ETH_P_IP))
535 		return -1;
536 	/* store ip_proto for consistency if udp/tcp pattern item comes next */
537 	if (spec)
538 		info->ip_proto = spec->hdr.next_proto_id;
539 	if (!flow)
540 		return 0;
541 	msg = &flow->msg;
542 	if (!info->eth_type)
543 		info->eth_type = htons(ETH_P_IP);
544 	if (!spec)
545 		return 0;
546 	if (mask->hdr.dst_addr) {
547 		tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST,
548 			     spec->hdr.dst_addr);
549 		tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK,
550 			     mask->hdr.dst_addr);
551 	}
552 	if (mask->hdr.src_addr) {
553 		tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC,
554 			     spec->hdr.src_addr);
555 		tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK,
556 			     mask->hdr.src_addr);
557 	}
558 	if (spec->hdr.next_proto_id)
559 		tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO,
560 			    spec->hdr.next_proto_id);
561 	return 0;
562 }
563 
564 /**
565  * Make as much checks as possible on an IPv6 item, and if a flow is provided,
566  * fill it appropriately with IPv6 info.
567  *
568  * @param[in] item
569  *   Item specification.
570  * @param[in, out] data
571  *   Additional data structure to tell next layers we've been here.
572  *
573  * @return
574  *   0 if checks are alright, -1 otherwise.
575  */
576 static int
577 tap_flow_create_ipv6(const struct rte_flow_item *item, struct convert_data *info)
578 {
579 	const struct rte_flow_item_ipv6 *spec = item->spec;
580 	const struct rte_flow_item_ipv6 *mask = item->mask;
581 	struct rte_flow *flow = info->flow;
582 	uint8_t empty_addr[16] = { 0 };
583 	struct tap_nlmsg *msg;
584 
585 	/* use default mask if none provided */
586 	if (!mask)
587 		mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV6].default_mask;
588 	/* check that previous eth type is compatible with ipv6 */
589 	if (info->eth_type && info->eth_type != htons(ETH_P_IPV6))
590 		return -1;
591 	/* store ip_proto for consistency if udp/tcp pattern item comes next */
592 	if (spec)
593 		info->ip_proto = spec->hdr.proto;
594 	if (!flow)
595 		return 0;
596 	msg = &flow->msg;
597 	if (!info->eth_type)
598 		info->eth_type = htons(ETH_P_IPV6);
599 	if (!spec)
600 		return 0;
601 	if (memcmp(&mask->hdr.dst_addr, empty_addr, 16)) {
602 		tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST,
603 			   sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr);
604 		tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK,
605 			   sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr);
606 	}
607 	if (memcmp(&mask->hdr.src_addr, empty_addr, 16)) {
608 		tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC,
609 			   sizeof(spec->hdr.src_addr), &spec->hdr.src_addr);
610 		tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
611 			   sizeof(mask->hdr.src_addr), &mask->hdr.src_addr);
612 	}
613 	if (spec->hdr.proto)
614 		tap_nlattr_add8(&msg->nh,
615 				TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto);
616 	return 0;
617 }
618 
619 /**
620  * Make as much checks as possible on a UDP item, and if a flow is provided,
621  * fill it appropriately with UDP info.
622  *
623  * @param[in] item
624  *   Item specification.
625  * @param[in, out] data
626  *   Additional data structure to tell next layers we've been here.
627  *
628  * @return
629  *   0 if checks are alright, -1 otherwise.
630  */
631 static int
632 tap_flow_create_udp(const struct rte_flow_item *item, struct convert_data *info)
633 {
634 	const struct rte_flow_item_udp *spec = item->spec;
635 	const struct rte_flow_item_udp *mask = item->mask;
636 	struct rte_flow *flow = info->flow;
637 	struct tap_nlmsg *msg;
638 
639 	/* use default mask if none provided */
640 	if (!mask)
641 		mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_UDP].default_mask;
642 	/* check that previous ip_proto is compatible with udp */
643 	if (info->ip_proto && info->ip_proto != IPPROTO_UDP)
644 		return -1;
645 	/* TC does not support UDP port masking. Only accept if exact match. */
646 	if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) ||
647 	    (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff))
648 		return -1;
649 	if (!flow)
650 		return 0;
651 	msg = &flow->msg;
652 	tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP);
653 	if (!spec)
654 		return 0;
655 	if (mask->hdr.dst_port)
656 		tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST,
657 			     spec->hdr.dst_port);
658 	if (mask->hdr.src_port)
659 		tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC,
660 			     spec->hdr.src_port);
661 	return 0;
662 }
663 
664 /**
665  * Make as much checks as possible on a TCP item, and if a flow is provided,
666  * fill it appropriately with TCP info.
667  *
668  * @param[in] item
669  *   Item specification.
670  * @param[in, out] data
671  *   Additional data structure to tell next layers we've been here.
672  *
673  * @return
674  *   0 if checks are alright, -1 otherwise.
675  */
676 static int
677 tap_flow_create_tcp(const struct rte_flow_item *item, struct convert_data *info)
678 {
679 	const struct rte_flow_item_tcp *spec = item->spec;
680 	const struct rte_flow_item_tcp *mask = item->mask;
681 	struct rte_flow *flow = info->flow;
682 	struct tap_nlmsg *msg;
683 
684 	/* use default mask if none provided */
685 	if (!mask)
686 		mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_TCP].default_mask;
687 	/* check that previous ip_proto is compatible with tcp */
688 	if (info->ip_proto && info->ip_proto != IPPROTO_TCP)
689 		return -1;
690 	/* TC does not support TCP port masking. Only accept if exact match. */
691 	if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) ||
692 	    (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff))
693 		return -1;
694 	if (!flow)
695 		return 0;
696 	msg = &flow->msg;
697 	tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP);
698 	if (!spec)
699 		return 0;
700 	if (mask->hdr.dst_port)
701 		tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST,
702 			     spec->hdr.dst_port);
703 	if (mask->hdr.src_port)
704 		tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC,
705 			     spec->hdr.src_port);
706 	return 0;
707 }
708 
709 /**
710  * Check support for a given item.
711  *
712  * @param[in] item
713  *   Item specification.
714  * @param size
715  *   Bit-Mask size in bytes.
716  * @param[in] supported_mask
717  *   Bit-mask covering supported fields to compare with spec, last and mask in
718  *   \item.
719  * @param[in] default_mask
720  *   Bit-mask default mask if none is provided in \item.
721  *
722  * @return
723  *   0 on success.
724  */
725 static int
726 tap_flow_item_validate(const struct rte_flow_item *item,
727 		       unsigned int size,
728 		       const uint8_t *supported_mask,
729 		       const uint8_t *default_mask)
730 {
731 	int ret = 0;
732 
733 	/* An empty layer is allowed, as long as all fields are NULL */
734 	if (!item->spec && (item->mask || item->last))
735 		return -1;
736 	/* Is the item spec compatible with what the NIC supports? */
737 	if (item->spec && !item->mask) {
738 		unsigned int i;
739 		const uint8_t *spec = item->spec;
740 
741 		for (i = 0; i < size; ++i)
742 			if ((spec[i] | supported_mask[i]) != supported_mask[i])
743 				return -1;
744 		/* Is the default mask compatible with what the NIC supports? */
745 		for (i = 0; i < size; i++)
746 			if ((default_mask[i] | supported_mask[i]) !=
747 			    supported_mask[i])
748 				return -1;
749 	}
750 	/* Is the item last compatible with what the NIC supports? */
751 	if (item->last && !item->mask) {
752 		unsigned int i;
753 		const uint8_t *spec = item->last;
754 
755 		for (i = 0; i < size; ++i)
756 			if ((spec[i] | supported_mask[i]) != supported_mask[i])
757 				return -1;
758 	}
759 	/* Is the item mask compatible with what the NIC supports? */
760 	if (item->mask) {
761 		unsigned int i;
762 		const uint8_t *spec = item->mask;
763 
764 		for (i = 0; i < size; ++i)
765 			if ((spec[i] | supported_mask[i]) != supported_mask[i])
766 				return -1;
767 	}
768 	/**
769 	 * Once masked, Are item spec and item last equal?
770 	 * TC does not support range so anything else is invalid.
771 	 */
772 	if (item->spec && item->last) {
773 		uint8_t spec[size];
774 		uint8_t last[size];
775 		const uint8_t *apply = default_mask;
776 		unsigned int i;
777 
778 		if (item->mask)
779 			apply = item->mask;
780 		for (i = 0; i < size; ++i) {
781 			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
782 			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
783 		}
784 		ret = memcmp(spec, last, size);
785 	}
786 	return ret;
787 }
788 
789 /**
790  * Configure the kernel with a TC action and its configured parameters
791  * Handled actions: "gact", "mirred", "skbedit", "bpf"
792  *
793  * @param[in] flow
794  *   Pointer to rte flow containing the netlink message
795  *
796  * @param[in, out] act_index
797  *   Pointer to action sequence number in the TC command
798  *
799  * @param[in] adata
800  *  Pointer to struct holding the action parameters
801  *
802  * @return
803  *   -1 on failure, 0 on success
804  */
805 static int
806 add_action(struct rte_flow *flow, size_t *act_index, struct action_data *adata)
807 {
808 	struct tap_nlmsg *msg = &flow->msg;
809 
810 	if (tap_nlattr_nested_start(msg, (*act_index)++) < 0)
811 		return -1;
812 
813 	tap_nlattr_add(&msg->nh, TCA_ACT_KIND,
814 				strlen(adata->id) + 1, adata->id);
815 	if (tap_nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0)
816 		return -1;
817 	if (strcmp("gact", adata->id) == 0) {
818 		tap_nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(adata->gact),
819 			   &adata->gact);
820 	} else if (strcmp("mirred", adata->id) == 0) {
821 		if (adata->mirred.eaction == TCA_EGRESS_MIRROR)
822 			adata->mirred.action = TC_ACT_PIPE;
823 		else /* REDIRECT */
824 			adata->mirred.action = TC_ACT_STOLEN;
825 		tap_nlattr_add(&msg->nh, TCA_MIRRED_PARMS,
826 			   sizeof(adata->mirred),
827 			   &adata->mirred);
828 	} else if (strcmp("skbedit", adata->id) == 0) {
829 		tap_nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS,
830 			   sizeof(adata->skbedit.skbedit), &adata->skbedit.skbedit);
831 		if (adata->skbedit.mark)
832 			tap_nlattr_add32(&msg->nh, TCA_SKBEDIT_MARK, adata->skbedit.mark);
833 		else
834 			tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, adata->skbedit.queue);
835 	} else if (strcmp("bpf", adata->id) == 0) {
836 #ifdef HAVE_BPF_RSS
837 		tap_nlattr_add32(&msg->nh, TCA_ACT_BPF_FD, adata->bpf.bpf_fd);
838 		tap_nlattr_add(&msg->nh, TCA_ACT_BPF_NAME,
839 			   strlen(adata->bpf.annotation) + 1,
840 			   adata->bpf.annotation);
841 		tap_nlattr_add(&msg->nh, TCA_ACT_BPF_PARMS,
842 			   sizeof(adata->bpf.bpf),
843 			   &adata->bpf.bpf);
844 #else
845 		TAP_LOG(ERR, "Internal error: bpf requested but not supported");
846 		return -1;
847 #endif
848 	} else {
849 		TAP_LOG(ERR, "Internal error: unknown action: %s", adata->id);
850 		return -1;
851 	}
852 	tap_nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */
853 	tap_nlattr_nested_finish(msg); /* nested act_index */
854 	return 0;
855 }
856 
857 /**
858  * Helper function to send a series of TC actions to the kernel
859  *
860  * @param[in] flow
861  *   Pointer to rte flow containing the netlink message
862  *
863  * @param[in] nb_actions
864  *   Number of actions in an array of action structs
865  *
866  * @param[in] data
867  *   Pointer to an array of action structs
868  *
869  * @param[in] classifier_actions
870  *   The classifier on behave of which the actions are configured
871  *
872  * @return
873  *   -1 on failure, 0 on success
874  */
875 static int
876 add_actions(struct rte_flow *flow, int nb_actions, struct action_data *data,
877 	    int classifier_action)
878 {
879 	struct tap_nlmsg *msg = &flow->msg;
880 	size_t act_index = 1;
881 	int i;
882 
883 	if (tap_nlattr_nested_start(msg, classifier_action) < 0)
884 		return -1;
885 	for (i = 0; i < nb_actions; i++)
886 		if (add_action(flow, &act_index, data + i) < 0)
887 			return -1;
888 	tap_nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */
889 	return 0;
890 }
891 
892 /**
893  * Validate a flow supported by TC.
894  * If flow param is not NULL, then also fill the netlink message inside.
895  *
896  * @param pmd
897  *   Pointer to private structure.
898  * @param[in] attr
899  *   Flow rule attributes.
900  * @param[in] pattern
901  *   Pattern specification (list terminated by the END pattern item).
902  * @param[in] actions
903  *   Associated actions (list terminated by the END action).
904  * @param[out] error
905  *   Perform verbose error reporting if not NULL.
906  * @param[in, out] flow
907  *   Flow structure to update.
908  * @param[in] mirred
909  *   If set to TCA_EGRESS_REDIR, provided actions will be replaced with a
910  *   redirection to the tap netdevice, and the TC rule will be configured
911  *   on the remote netdevice in pmd.
912  *   If set to TCA_EGRESS_MIRROR, provided actions will be replaced with a
913  *   mirroring to the tap netdevice, and the TC rule will be configured
914  *   on the remote netdevice in pmd. Matching packets will thus be duplicated.
915  *   If set to 0, the standard behavior is to be used: set correct actions for
916  *   the TC rule, and apply it on the tap netdevice.
917  *
918  * @return
919  *   0 on success, a negative errno value otherwise and rte_errno is set.
920  */
921 static int
922 priv_flow_process(struct pmd_internals *pmd,
923 		  const struct rte_flow_attr *attr,
924 		  const struct rte_flow_item items[],
925 		  const struct rte_flow_action actions[],
926 		  struct rte_flow_error *error,
927 		  struct rte_flow *flow,
928 		  int mirred)
929 {
930 	const struct tap_flow_items *cur_item = tap_flow_items;
931 	struct convert_data data = {
932 		.eth_type = 0,
933 		.ip_proto = 0,
934 		.flow = flow,
935 	};
936 	int action = 0; /* Only one action authorized for now */
937 
938 	if (attr->transfer) {
939 		rte_flow_error_set(
940 			error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
941 			NULL, "transfer is not supported");
942 		return -rte_errno;
943 	}
944 	if (attr->group > MAX_GROUP) {
945 		rte_flow_error_set(
946 			error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
947 			NULL, "group value too big: cannot exceed 15");
948 		return -rte_errno;
949 	}
950 	if (attr->priority > MAX_PRIORITY) {
951 		rte_flow_error_set(
952 			error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
953 			NULL, "priority value too big");
954 		return -rte_errno;
955 	} else if (flow) {
956 		uint16_t group = attr->group << GROUP_SHIFT;
957 		uint16_t prio = group | (attr->priority +
958 				RSS_PRIORITY_OFFSET + PRIORITY_OFFSET);
959 		flow->msg.t.tcm_info = TC_H_MAKE(prio << 16,
960 						 flow->msg.t.tcm_info);
961 	}
962 	if (flow) {
963 		if (mirred) {
964 			/*
965 			 * If attr->ingress, the rule applies on remote ingress
966 			 * to match incoming packets
967 			 * If attr->egress, the rule applies on tap ingress (as
968 			 * seen from the kernel) to deal with packets going out
969 			 * from the DPDK app.
970 			 */
971 			flow->msg.t.tcm_parent = TC_H_MAKE(TC_H_INGRESS, 0);
972 		} else {
973 			/* Standard rule on tap egress (kernel standpoint). */
974 			flow->msg.t.tcm_parent =
975 				TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);
976 		}
977 		/* use flower filter type */
978 		tap_nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower");
979 		if (tap_nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0) {
980 			rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION,
981 					   actions, "could not allocated netlink msg");
982 			goto exit_return_error;
983 		}
984 	}
985 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
986 		const struct tap_flow_items *token = NULL;
987 		unsigned int i;
988 		int err = 0;
989 
990 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
991 			continue;
992 		for (i = 0;
993 		     cur_item->items &&
994 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
995 		     ++i) {
996 			if (cur_item->items[i] == items->type) {
997 				token = &tap_flow_items[items->type];
998 				break;
999 			}
1000 		}
1001 		if (!token)
1002 			goto exit_item_not_supported;
1003 		cur_item = token;
1004 		err = tap_flow_item_validate(
1005 			items, cur_item->mask_sz,
1006 			(const uint8_t *)cur_item->mask,
1007 			(const uint8_t *)cur_item->default_mask);
1008 		if (err)
1009 			goto exit_item_not_supported;
1010 		if (flow && cur_item->convert) {
1011 			err = cur_item->convert(items, &data);
1012 			if (err)
1013 				goto exit_item_not_supported;
1014 		}
1015 	}
1016 	if (flow) {
1017 		if (data.vlan) {
1018 			tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
1019 				     htons(ETH_P_8021Q));
1020 			tap_nlattr_add16(&flow->msg.nh,
1021 				     TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1022 				     data.eth_type ?
1023 				     data.eth_type : htons(ETH_P_ALL));
1024 		} else if (data.eth_type) {
1025 			tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
1026 				     data.eth_type);
1027 		}
1028 	}
1029 	if (mirred && flow) {
1030 		struct action_data adata = {
1031 			.id = "mirred",
1032 			.mirred = {
1033 				.eaction = mirred,
1034 			},
1035 		};
1036 
1037 		/*
1038 		 * If attr->egress && mirred, then this is a special
1039 		 * case where the rule must be applied on the tap, to
1040 		 * redirect packets coming from the DPDK App, out
1041 		 * through the remote netdevice.
1042 		 */
1043 		adata.mirred.ifindex = attr->ingress ? pmd->if_index :
1044 			pmd->remote_if_index;
1045 		if (mirred == TCA_EGRESS_MIRROR)
1046 			adata.mirred.action = TC_ACT_PIPE;
1047 		else
1048 			adata.mirred.action = TC_ACT_STOLEN;
1049 		if (add_actions(flow, 1, &adata, TCA_FLOWER_ACT) < 0)
1050 			goto exit_action_not_supported;
1051 		else
1052 			goto end;
1053 	}
1054 actions:
1055 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1056 		int err = 0;
1057 
1058 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1059 			continue;
1060 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1061 			if (action)
1062 				goto exit_action_not_supported;
1063 			action = 1;
1064 			if (flow) {
1065 				struct action_data adata = {
1066 					.id = "gact",
1067 					.gact = {
1068 						.action = TC_ACT_SHOT,
1069 					},
1070 				};
1071 
1072 				err = add_actions(flow, 1, &adata,
1073 						  TCA_FLOWER_ACT);
1074 			}
1075 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_PASSTHRU) {
1076 			if (action)
1077 				goto exit_action_not_supported;
1078 			action = 1;
1079 			if (flow) {
1080 				struct action_data adata = {
1081 					.id = "gact",
1082 					.gact = {
1083 						/* continue */
1084 						.action = TC_ACT_UNSPEC,
1085 					},
1086 				};
1087 
1088 				err = add_actions(flow, 1, &adata, TCA_FLOWER_ACT);
1089 			}
1090 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1091 			const struct rte_flow_action_queue *queue =
1092 				(const struct rte_flow_action_queue *)
1093 				actions->conf;
1094 
1095 			if (action)
1096 				goto exit_action_not_supported;
1097 			action = 1;
1098 			if (queue->index >= pmd->dev->data->nb_rx_queues) {
1099 				rte_flow_error_set(error, ERANGE,
1100 						   RTE_FLOW_ERROR_TYPE_ACTION, actions,
1101 						   "queue index out of range");
1102 				goto exit_return_error;
1103 			}
1104 			if (flow) {
1105 				struct action_data adata = {
1106 					.id = "skbedit",
1107 					.skbedit = {
1108 						.skbedit = {
1109 							.action = TC_ACT_PIPE,
1110 						},
1111 						.queue = queue->index,
1112 					},
1113 				};
1114 
1115 				err = add_actions(flow, 1, &adata,
1116 					TCA_FLOWER_ACT);
1117 			}
1118 #ifdef HAVE_BPF_RSS
1119 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
1120 			const struct rte_flow_action_rss *rss =
1121 				(const struct rte_flow_action_rss *)
1122 				actions->conf;
1123 
1124 			if (action++)
1125 				goto exit_action_not_supported;
1126 
1127 			if (pmd->rss == NULL) {
1128 				err = rss_enable(pmd, error);
1129 				if (err)
1130 					goto exit_return_error;
1131 			}
1132 			if (flow)
1133 				err = rss_add_actions(flow, pmd, rss, error);
1134 #endif
1135 		} else {
1136 			goto exit_action_not_supported;
1137 		}
1138 		if (err)
1139 			goto exit_return_error;
1140 	}
1141 	/* When fate is unknown, drop traffic. */
1142 	if (!action) {
1143 		static const struct rte_flow_action drop[] = {
1144 			{ .type = RTE_FLOW_ACTION_TYPE_DROP, },
1145 			{ .type = RTE_FLOW_ACTION_TYPE_END, },
1146 		};
1147 
1148 		actions = drop;
1149 		goto actions;
1150 	}
1151 end:
1152 	if (flow)
1153 		tap_nlattr_nested_finish(&flow->msg); /* nested TCA_OPTIONS */
1154 	return 0;
1155 exit_item_not_supported:
1156 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
1157 			   items, "item not supported");
1158 	return -rte_errno;
1159 exit_action_not_supported:
1160 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
1161 			   actions, "action not supported");
1162 exit_return_error:
1163 	return -rte_errno;
1164 }
1165 
1166 
1167 
1168 /**
1169  * Validate a flow.
1170  *
1171  * @see rte_flow_validate()
1172  * @see rte_flow_ops
1173  */
1174 static int
1175 tap_flow_validate(struct rte_eth_dev *dev,
1176 		  const struct rte_flow_attr *attr,
1177 		  const struct rte_flow_item items[],
1178 		  const struct rte_flow_action actions[],
1179 		  struct rte_flow_error *error)
1180 {
1181 	struct pmd_internals *pmd = dev->data->dev_private;
1182 
1183 	return priv_flow_process(pmd, attr, items, actions, error, NULL, 0);
1184 }
1185 
1186 /**
1187  * Set a unique handle in a flow.
1188  *
1189  * The kernel supports TC rules with equal priority, as long as they use the
1190  * same matching fields (e.g.: dst mac and ipv4) with different values (and
1191  * full mask to ensure no collision is possible).
1192  * In those rules, the handle (uint32_t) is the part that would identify
1193  * specifically each rule.
1194  *
1195  * Use jhash of the flow pointer to make a unique handle.
1196  *
1197  * @param[in, out] flow
1198  *   The flow that needs its handle set.
1199  */
1200 static void
1201 tap_flow_set_handle(struct rte_flow *flow)
1202 {
1203 	union {
1204 		struct rte_flow *flow;
1205 		uint32_t words[sizeof(flow) / sizeof(uint32_t)];
1206 	} tmp = {
1207 		.flow = flow,
1208 	};
1209 	uint32_t handle;
1210 	static uint64_t hash_seed;
1211 
1212 	if (hash_seed == 0)
1213 		hash_seed = rte_rand();
1214 
1215 	handle = rte_jhash_32b(tmp.words, sizeof(flow) / sizeof(uint32_t), hash_seed);
1216 
1217 	/* must be at least 1 to avoid letting the kernel choose one for us */
1218 	if (!handle)
1219 		handle = 1;
1220 	flow->msg.t.tcm_handle = handle;
1221 }
1222 
1223 /**
1224  * Free the flow opened file descriptors and allocated memory
1225  *
1226  * @param[in] flow
1227  *   Pointer to the flow to free
1228  *
1229  */
1230 static void
1231 tap_flow_free(struct pmd_internals *pmd __rte_unused, struct rte_flow *flow)
1232 {
1233 	if (!flow)
1234 		return;
1235 
1236 #ifdef HAVE_BPF_RSS
1237 	struct tap_rss *rss = pmd->rss;
1238 	if (rss)
1239 		bpf_map__delete_elem(rss->maps.rss_map,
1240 				     &flow->msg.t.tcm_handle, sizeof(uint32_t), 0);
1241 #endif
1242 	/* Free flow allocated memory */
1243 	rte_free(flow);
1244 }
1245 
1246 /**
1247  * Create a flow.
1248  *
1249  * @see rte_flow_create()
1250  * @see rte_flow_ops
1251  */
1252 static struct rte_flow *
1253 tap_flow_create(struct rte_eth_dev *dev,
1254 		const struct rte_flow_attr *attr,
1255 		const struct rte_flow_item items[],
1256 		const struct rte_flow_action actions[],
1257 		struct rte_flow_error *error)
1258 {
1259 	struct pmd_internals *pmd = dev->data->dev_private;
1260 	struct rte_flow *remote_flow = NULL;
1261 	struct rte_flow *flow = NULL;
1262 	struct tap_nlmsg *msg = NULL;
1263 	int err;
1264 
1265 	if (!pmd->if_index) {
1266 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1267 				   NULL,
1268 				   "can't create rule, ifindex not found");
1269 		goto fail;
1270 	}
1271 	/*
1272 	 * No rules configured through standard rte_flow should be set on the
1273 	 * priorities used by implicit rules.
1274 	 */
1275 	if ((attr->group == MAX_GROUP) &&
1276 	    attr->priority > (MAX_PRIORITY - TAP_REMOTE_MAX_IDX)) {
1277 		rte_flow_error_set(
1278 			error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1279 			NULL, "priority value too big");
1280 		goto fail;
1281 	}
1282 	flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);
1283 	if (!flow) {
1284 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1285 				   NULL, "cannot allocate memory for rte_flow");
1286 		goto fail;
1287 	}
1288 	msg = &flow->msg;
1289 	tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER,
1290 		    NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1291 	msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1292 	tap_flow_set_handle(flow);
1293 	if (priv_flow_process(pmd, attr, items, actions, error, flow, 0))
1294 		goto fail;
1295 	err = tap_nl_send(pmd->nlsk_fd, &msg->nh);
1296 	if (err < 0) {
1297 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1298 				   NULL, "couldn't send request to kernel");
1299 		goto fail;
1300 	}
1301 	err = tap_nl_recv_ack(pmd->nlsk_fd);
1302 	if (err < 0) {
1303 		TAP_LOG(ERR,
1304 			"Kernel refused TC filter rule creation (%d): %s",
1305 			errno, strerror(errno));
1306 		rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE,
1307 				   NULL,
1308 				   "overlapping rules or Kernel too old for flower support");
1309 		goto fail;
1310 	}
1311 	LIST_INSERT_HEAD(&pmd->flows, flow, next);
1312 	/**
1313 	 * If a remote device is configured, a TC rule with identical items for
1314 	 * matching must be set on that device, with a single action: redirect
1315 	 * to the local pmd->if_index.
1316 	 */
1317 	if (pmd->remote_if_index) {
1318 		remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);
1319 		if (!remote_flow) {
1320 			rte_flow_error_set(
1321 				error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1322 				"cannot allocate memory for rte_flow");
1323 			goto fail;
1324 		}
1325 		msg = &remote_flow->msg;
1326 		/* set the rule if_index for the remote netdevice */
1327 		tc_init_msg(
1328 			msg, pmd->remote_if_index, RTM_NEWTFILTER,
1329 			NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1330 		msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1331 		tap_flow_set_handle(remote_flow);
1332 		if (priv_flow_process(pmd, attr, items, NULL,
1333 				      error, remote_flow, TCA_EGRESS_REDIR)) {
1334 			rte_flow_error_set(
1335 				error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1336 				NULL, "rte flow rule validation failed");
1337 			goto fail;
1338 		}
1339 		err = tap_nl_send(pmd->nlsk_fd, &msg->nh);
1340 		if (err < 0) {
1341 			rte_flow_error_set(
1342 				error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1343 				NULL, "Failure sending nl request");
1344 			goto fail;
1345 		}
1346 		err = tap_nl_recv_ack(pmd->nlsk_fd);
1347 		if (err < 0) {
1348 			TAP_LOG(ERR,
1349 				"Kernel refused TC filter rule creation (%d): %s",
1350 				errno, strerror(errno));
1351 			rte_flow_error_set(
1352 				error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1353 				NULL,
1354 				"overlapping rules or Kernel too old for flower support");
1355 			goto fail;
1356 		}
1357 		flow->remote_flow = remote_flow;
1358 	}
1359 	return flow;
1360 fail:
1361 	rte_free(remote_flow);
1362 	if (flow)
1363 		tap_flow_free(pmd, flow);
1364 	return NULL;
1365 }
1366 
1367 /**
1368  * Destroy a flow using pointer to pmd_internal.
1369  *
1370  * @param[in, out] pmd
1371  *   Pointer to private structure.
1372  * @param[in] flow
1373  *   Pointer to the flow to destroy.
1374  * @param[in, out] error
1375  *   Pointer to the flow error handler
1376  *
1377  * @return 0 if the flow could be destroyed, -1 otherwise.
1378  */
1379 static int
1380 tap_flow_destroy_pmd(struct pmd_internals *pmd,
1381 		     struct rte_flow *flow,
1382 		     struct rte_flow_error *error)
1383 {
1384 	struct rte_flow *remote_flow = flow->remote_flow;
1385 	int ret = 0;
1386 
1387 	LIST_REMOVE(flow, next);
1388 	flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1389 	flow->msg.nh.nlmsg_type = RTM_DELTFILTER;
1390 
1391 	ret = tap_nl_send(pmd->nlsk_fd, &flow->msg.nh);
1392 	if (ret < 0) {
1393 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1394 				   NULL, "couldn't send request to kernel");
1395 		goto end;
1396 	}
1397 	ret = tap_nl_recv_ack(pmd->nlsk_fd);
1398 	/* If errno is ENOENT, the rule is already no longer in the kernel. */
1399 	if (ret < 0 && errno == ENOENT)
1400 		ret = 0;
1401 	if (ret < 0) {
1402 		TAP_LOG(ERR,
1403 			"Kernel refused TC filter rule deletion (%d): %s",
1404 			errno, strerror(errno));
1405 		rte_flow_error_set(
1406 			error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1407 			"couldn't receive kernel ack to our request");
1408 		goto end;
1409 	}
1410 
1411 	if (remote_flow) {
1412 		remote_flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1413 		remote_flow->msg.nh.nlmsg_type = RTM_DELTFILTER;
1414 
1415 		ret = tap_nl_send(pmd->nlsk_fd, &remote_flow->msg.nh);
1416 		if (ret < 0) {
1417 			rte_flow_error_set(
1418 				error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1419 				NULL, "Failure sending nl request");
1420 			goto end;
1421 		}
1422 		ret = tap_nl_recv_ack(pmd->nlsk_fd);
1423 		if (ret < 0 && errno == ENOENT)
1424 			ret = 0;
1425 		if (ret < 0) {
1426 			TAP_LOG(ERR,
1427 				"Kernel refused TC filter rule deletion (%d): %s",
1428 				errno, strerror(errno));
1429 			rte_flow_error_set(
1430 				error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1431 				NULL, "Failure trying to receive nl ack");
1432 			goto end;
1433 		}
1434 	}
1435 end:
1436 	rte_free(remote_flow);
1437 	tap_flow_free(pmd, flow);
1438 	return ret;
1439 }
1440 
1441 /**
1442  * Destroy a flow.
1443  *
1444  * @see rte_flow_destroy()
1445  * @see rte_flow_ops
1446  */
1447 static int
1448 tap_flow_destroy(struct rte_eth_dev *dev,
1449 		 struct rte_flow *flow,
1450 		 struct rte_flow_error *error)
1451 {
1452 	struct pmd_internals *pmd = dev->data->dev_private;
1453 
1454 	return tap_flow_destroy_pmd(pmd, flow, error);
1455 }
1456 
1457 /**
1458  * Enable/disable flow isolation.
1459  *
1460  * @see rte_flow_isolate()
1461  * @see rte_flow_ops
1462  */
1463 static int
1464 tap_flow_isolate(struct rte_eth_dev *dev,
1465 		 int set,
1466 		 struct rte_flow_error *error __rte_unused)
1467 {
1468 	struct pmd_internals *pmd = dev->data->dev_private;
1469 	struct pmd_process_private *process_private = dev->process_private;
1470 
1471 	/* normalize 'set' variable to contain 0 or 1 values */
1472 	if (set)
1473 		set = 1;
1474 	/* if already in the right isolation mode - nothing to do */
1475 	if ((set ^ pmd->flow_isolate) == 0)
1476 		return 0;
1477 	/* mark the isolation mode for tap_flow_implicit_create() */
1478 	pmd->flow_isolate = set;
1479 	/*
1480 	 * If netdevice is there, setup appropriate flow rules immediately.
1481 	 * Otherwise it will be set when bringing up the netdevice (tun_alloc).
1482 	 */
1483 	if (process_private->fds[0] == -1)
1484 		return 0;
1485 
1486 	if (set) {
1487 		struct rte_flow *remote_flow;
1488 
1489 		while (1) {
1490 			remote_flow = LIST_FIRST(&pmd->implicit_flows);
1491 			if (!remote_flow)
1492 				break;
1493 			/*
1494 			 * Remove all implicit rules on the remote.
1495 			 * Keep the local rule to redirect packets on TX.
1496 			 * Keep also the last implicit local rule: ISOLATE.
1497 			 */
1498 			if (remote_flow->msg.t.tcm_ifindex == pmd->if_index)
1499 				break;
1500 			if (tap_flow_destroy_pmd(pmd, remote_flow, NULL) < 0)
1501 				goto error;
1502 		}
1503 		/* Switch the TC rule according to pmd->flow_isolate */
1504 		if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
1505 			goto error;
1506 	} else {
1507 		/* Switch the TC rule according to pmd->flow_isolate */
1508 		if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
1509 			goto error;
1510 		if (!pmd->remote_if_index)
1511 			return 0;
1512 		if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0)
1513 			goto error;
1514 		if (tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0)
1515 			goto error;
1516 		if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0)
1517 			goto error;
1518 		if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0)
1519 			goto error;
1520 		if (dev->data->promiscuous &&
1521 		    tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC) < 0)
1522 			goto error;
1523 		if (dev->data->all_multicast &&
1524 		    tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI) < 0)
1525 			goto error;
1526 	}
1527 	return 0;
1528 error:
1529 	pmd->flow_isolate = 0;
1530 	return rte_flow_error_set(
1531 		error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1532 		"TC rule creation failed");
1533 }
1534 
1535 /**
1536  * Destroy all flows.
1537  *
1538  * @see rte_flow_flush()
1539  * @see rte_flow_ops
1540  */
1541 int
1542 tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
1543 {
1544 	struct pmd_internals *pmd = dev->data->dev_private;
1545 	struct rte_flow *flow;
1546 
1547 	while (!LIST_EMPTY(&pmd->flows)) {
1548 		flow = LIST_FIRST(&pmd->flows);
1549 		if (tap_flow_destroy(dev, flow, error) < 0)
1550 			return -1;
1551 	}
1552 	return 0;
1553 }
1554 
1555 /**
1556  * Add an implicit flow rule on the remote device to make sure traffic gets to
1557  * the tap netdevice from there.
1558  *
1559  * @param pmd
1560  *   Pointer to private structure.
1561  * @param[in] idx
1562  *   The idx in the implicit_rte_flows array specifying which rule to apply.
1563  *
1564  * @return -1 if the rule couldn't be applied, 0 otherwise.
1565  */
1566 int tap_flow_implicit_create(struct pmd_internals *pmd,
1567 			     enum implicit_rule_index idx)
1568 {
1569 	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
1570 	struct rte_flow_action *actions = implicit_rte_flows[idx].actions;
1571 	struct rte_flow_action isolate_actions[2] = {
1572 		[1] = {
1573 			.type = RTE_FLOW_ACTION_TYPE_END,
1574 		},
1575 	};
1576 	struct rte_flow_item *items = implicit_rte_flows[idx].items;
1577 	struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr;
1578 	struct rte_flow_item_eth eth_local = { .hdr.ether_type = 0 };
1579 	unsigned int if_index = pmd->remote_if_index;
1580 	struct rte_flow *remote_flow = NULL;
1581 	struct tap_nlmsg *msg = NULL;
1582 	int err = 0;
1583 	struct rte_flow_item items_local[2] = {
1584 		[0] = {
1585 			.type = items[0].type,
1586 			.spec = &eth_local,
1587 			.mask = items[0].mask,
1588 		},
1589 		[1] = {
1590 			.type = items[1].type,
1591 		}
1592 	};
1593 
1594 	remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);
1595 	if (!remote_flow) {
1596 		TAP_LOG(ERR, "Cannot allocate memory for rte_flow");
1597 		goto fail;
1598 	}
1599 	msg = &remote_flow->msg;
1600 	if (idx == TAP_REMOTE_TX) {
1601 		if_index = pmd->if_index;
1602 	} else if (idx == TAP_ISOLATE) {
1603 		if_index = pmd->if_index;
1604 		/* Don't be exclusive for this rule, it can be changed later. */
1605 		flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
1606 		isolate_actions[0].type = pmd->flow_isolate ?
1607 			RTE_FLOW_ACTION_TYPE_DROP :
1608 			RTE_FLOW_ACTION_TYPE_PASSTHRU;
1609 		actions = isolate_actions;
1610 	} else if (idx == TAP_REMOTE_LOCAL_MAC) {
1611 		/*
1612 		 * eth addr couldn't be set in implicit_rte_flows[] as it is not
1613 		 * known at compile time.
1614 		 */
1615 		memcpy(&eth_local.hdr.dst_addr, &pmd->eth_addr, sizeof(pmd->eth_addr));
1616 		items = items_local;
1617 	}
1618 	tc_init_msg(msg, if_index, RTM_NEWTFILTER, flags);
1619 	msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1620 	/*
1621 	 * The ISOLATE rule is always present and must have a static handle, as
1622 	 * the action is changed whether the feature is enabled (DROP) or
1623 	 * disabled (PASSTHRU).
1624 	 * There is just one REMOTE_PROMISCUOUS rule in all cases. It should
1625 	 * have a static handle such that adding it twice will fail with EEXIST
1626 	 * with any kernel version. Remark: old kernels may falsely accept the
1627 	 * same REMOTE_PROMISCUOUS rules if they had different handles.
1628 	 */
1629 	if (idx == TAP_ISOLATE)
1630 		remote_flow->msg.t.tcm_handle = ISOLATE_HANDLE;
1631 	else if (idx == TAP_REMOTE_PROMISC)
1632 		remote_flow->msg.t.tcm_handle = REMOTE_PROMISCUOUS_HANDLE;
1633 	else
1634 		tap_flow_set_handle(remote_flow);
1635 	if (priv_flow_process(pmd, attr, items, actions, NULL,
1636 			      remote_flow, implicit_rte_flows[idx].mirred)) {
1637 		TAP_LOG(ERR, "rte flow rule validation failed");
1638 		goto fail;
1639 	}
1640 	err = tap_nl_send(pmd->nlsk_fd, &msg->nh);
1641 	if (err < 0) {
1642 		TAP_LOG(ERR, "Failure sending nl request");
1643 		goto fail;
1644 	}
1645 	err = tap_nl_recv_ack(pmd->nlsk_fd);
1646 	if (err < 0) {
1647 		/* Silently ignore re-entering existing rule */
1648 		if (errno == EEXIST)
1649 			goto success;
1650 		TAP_LOG(ERR,
1651 			"Kernel refused TC filter rule creation (%d): %s",
1652 			errno, strerror(errno));
1653 		goto fail;
1654 	}
1655 	LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next);
1656 success:
1657 	return 0;
1658 fail:
1659 	rte_free(remote_flow);
1660 	return -1;
1661 }
1662 
1663 /**
1664  * Remove specific implicit flow rule on the remote device.
1665  *
1666  * @param[in, out] pmd
1667  *   Pointer to private structure.
1668  * @param[in] idx
1669  *   The idx in the implicit_rte_flows array specifying which rule to remove.
1670  *
1671  * @return -1 if one of the implicit rules couldn't be created, 0 otherwise.
1672  */
1673 int tap_flow_implicit_destroy(struct pmd_internals *pmd,
1674 			      enum implicit_rule_index idx)
1675 {
1676 	struct rte_flow *remote_flow;
1677 	int cur_prio = -1;
1678 	int idx_prio = implicit_rte_flows[idx].attr.priority + PRIORITY_OFFSET;
1679 
1680 	for (remote_flow = LIST_FIRST(&pmd->implicit_flows);
1681 	     remote_flow;
1682 	     remote_flow = LIST_NEXT(remote_flow, next)) {
1683 		cur_prio = (remote_flow->msg.t.tcm_info >> 16) & PRIORITY_MASK;
1684 		if (cur_prio != idx_prio)
1685 			continue;
1686 		return tap_flow_destroy_pmd(pmd, remote_flow, NULL);
1687 	}
1688 	return 0;
1689 }
1690 
1691 /**
1692  * Destroy all implicit flows.
1693  *
1694  * @see rte_flow_flush()
1695  */
1696 int
1697 tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error)
1698 {
1699 	struct rte_flow *remote_flow;
1700 
1701 	while (!LIST_EMPTY(&pmd->implicit_flows)) {
1702 		remote_flow = LIST_FIRST(&pmd->implicit_flows);
1703 		if (tap_flow_destroy_pmd(pmd, remote_flow, error) < 0)
1704 			return -1;
1705 	}
1706 	return 0;
1707 }
1708 
1709 /**
1710  * Cleanup when device is closed
1711  */
1712 void tap_flow_bpf_destroy(struct pmd_internals *pmd __rte_unused)
1713 {
1714 #ifdef HAVE_BPF_RSS
1715 	tap_rss__destroy(pmd->rss);
1716 	pmd->rss = NULL;
1717 #endif
1718 }
1719 
1720 #ifdef HAVE_BPF_RSS
1721 /**
1722  * Enable RSS on tap: create TC rules for queuing.
1723  *
1724  * @param[in, out] pmd
1725  *   Pointer to private structure.
1726  *
1727  * @param[in] attr
1728  *   Pointer to rte_flow to get flow group
1729  *
1730  * @param[out] error
1731  *   Pointer to error reporting if not NULL.
1732  *
1733  * @return 0 on success, negative value on failure.
1734  */
1735 static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error)
1736 {
1737 	int err;
1738 
1739 	/* Load the BPF program (defined in tap_bpf.h from skeleton) */
1740 	pmd->rss = tap_rss__open_and_load();
1741 	if (pmd->rss == NULL) {
1742 		TAP_LOG(ERR, "Failed to load BPF object: %s", strerror(errno));
1743 		rte_flow_error_set(error, errno, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1744 			"BPF object could not be loaded");
1745 		return -errno;
1746 	}
1747 
1748 	/* Attach the maps defined in BPF program */
1749 	err = tap_rss__attach(pmd->rss);
1750 	if (err < 0) {
1751 		TAP_LOG(ERR, "Failed to attach BPF object: %d", err);
1752 		rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1753 			"BPF object could not be attached");
1754 		tap_flow_bpf_destroy(pmd);
1755 		return err;
1756 	}
1757 
1758 	return 0;
1759 }
1760 
1761 /* Default RSS hash key also used by mlx devices */
1762 static const uint8_t rss_hash_default_key[] = {
1763 	0x2c, 0xc6, 0x81, 0xd1,
1764 	0x5b, 0xdb, 0xf4, 0xf7,
1765 	0xfc, 0xa2, 0x83, 0x19,
1766 	0xdb, 0x1a, 0x3e, 0x94,
1767 	0x6b, 0x9e, 0x38, 0xd9,
1768 	0x2c, 0x9c, 0x03, 0xd1,
1769 	0xad, 0x99, 0x44, 0xa7,
1770 	0xd9, 0x56, 0x3d, 0x59,
1771 	0x06, 0x3c, 0x25, 0xf3,
1772 	0xfc, 0x1f, 0xdc, 0x2a,
1773 };
1774 
1775 /**
1776  * Add RSS hash calculations and queue selection
1777  *
1778  * @param[in, out] pmd
1779  *   Pointer to internal structure. Used to set/get RSS map fd
1780  *
1781  * @param[in] rss
1782  *   Pointer to RSS flow actions
1783  *
1784  * @param[out] error
1785  *   Pointer to error reporting if not NULL.
1786  *
1787  * @return 0 on success, negative value on failure
1788  */
1789 static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,
1790 			   const struct rte_flow_action_rss *rss,
1791 			   struct rte_flow_error *error)
1792 {
1793 	const struct bpf_program *rss_prog = pmd->rss->progs.rss_flow_action;
1794 	struct rss_key rss_entry = { };
1795 	const uint8_t *key_in;
1796 	uint32_t hash_type = 0;
1797 	uint32_t handle = flow->msg.t.tcm_handle;
1798 	unsigned int i;
1799 	int err;
1800 
1801 	/* Check supported RSS features */
1802 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT)
1803 		return rte_flow_error_set
1804 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1805 			 "non-default RSS hash functions are not supported");
1806 	if (rss->level)
1807 		return rte_flow_error_set
1808 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1809 			 "a nonzero RSS encapsulation level is not supported");
1810 
1811 	if (rss->queue_num == 0 || rss->queue_num >= TAP_MAX_QUEUES)
1812 		return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1813 					  "invalid number of queues");
1814 
1815 	/*
1816 	 * Follow the semantics of RSS key (see rte_ethdev.h)
1817 	 * There are two valid cases:
1818 	 *   1. key_length of zero, and key must be NULL;
1819 	 *      this uses the default driver key.
1820 	 *
1821 	 *   2. key_length is the TAP_RSS_HASH_KEY_SIZE (40 bytes)
1822 	 *      and the key must not be NULL.
1823 	 *
1824 	 * Anything else is an error.
1825 	 */
1826 	if (rss->key_len == 0) {
1827 		if (rss->key != NULL)
1828 			return rte_flow_error_set(error, ENOTSUP,
1829 						  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1830 						  &rss->key_len, "RSS hash key length 0");
1831 		key_in = rss_hash_default_key;
1832 	} else {
1833 		if (rss->key_len != TAP_RSS_HASH_KEY_SIZE)
1834 			return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1835 						  NULL, "RSS hash invalid key length");
1836 		if (rss->key == NULL)
1837 			return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1838 						  NULL, "RSS hash key is NULL");
1839 		key_in = rss->key;
1840 	}
1841 
1842 	if (rss->types & TAP_RSS_HF_MASK)
1843 		return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1844 					  NULL, "RSS hash type not supported");
1845 
1846 	if (rss->types & (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_TCP))
1847 		hash_type |= RTE_BIT32(HASH_FIELD_IPV4_L3_L4);
1848 	else if (rss->types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4))
1849 		hash_type |= RTE_BIT32(HASH_FIELD_IPV4_L3);
1850 
1851 	if (rss->types & (RTE_ETH_RSS_NONFRAG_IPV6_UDP | RTE_ETH_RSS_NONFRAG_IPV6_TCP))
1852 		hash_type |= RTE_BIT32(HASH_FIELD_IPV6_L3_L4);
1853 	else if (rss->types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_IPV6_EX))
1854 		hash_type |= RTE_BIT32(HASH_FIELD_IPV6_L3);
1855 
1856 	rss_entry.hash_fields = hash_type;
1857 	rte_convert_rss_key((const uint32_t *)key_in, (uint32_t *)rss_entry.key,
1858 			    TAP_RSS_HASH_KEY_SIZE);
1859 
1860 	/* Update RSS map entry with queues */
1861 	rss_entry.nb_queues = rss->queue_num;
1862 	for (i = 0; i < rss->queue_num; i++)
1863 		rss_entry.queues[i] = rss->queue[i];
1864 
1865 
1866 	/* Add this way for BPF to find  entry in map */
1867 	err = bpf_map__update_elem(pmd->rss->maps.rss_map,
1868 				   &handle, sizeof(handle),
1869 				   &rss_entry, sizeof(rss_entry), 0);
1870 	if (err) {
1871 		TAP_LOG(ERR,
1872 			"Failed to update BPF map entry %#x (%d): %s",
1873 			handle,  errno, strerror(errno));
1874 		rte_flow_error_set(
1875 			error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1876 			"Kernel too old or not configured "
1877 			"to support BPF maps updates");
1878 
1879 		return -ENOTSUP;
1880 	}
1881 
1882 	/* Add actions to mark packet then run the RSS BPF program */
1883 	struct action_data adata[] = {
1884 		{
1885 			.id = "skbedit",
1886 			.skbedit = {
1887 				.skbedit.action = TC_ACT_PIPE,
1888 				.mark = handle,
1889 			},
1890 		},
1891 		{
1892 			.id = "bpf",
1893 			.bpf = {
1894 				.bpf.action = TC_ACT_PIPE,
1895 				.annotation = "tap_rss",
1896 				.bpf_fd = bpf_program__fd(rss_prog),
1897 			},
1898 		},
1899 	};
1900 
1901 	return add_actions(flow, RTE_DIM(adata), adata, TCA_FLOWER_ACT);
1902 }
1903 #endif
1904 
1905 /**
1906  * Get rte_flow operations.
1907  *
1908  * @param dev
1909  *   Pointer to Ethernet device structure.
1910  * @param ops
1911  *   Pointer to operation-specific structure.
1912  *
1913  * @return
1914  *   0 on success, negative errno value on failure.
1915  */
1916 int
1917 tap_dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
1918 		     const struct rte_flow_ops **ops)
1919 {
1920 	*ops = &tap_flow_ops;
1921 	return 0;
1922 }
1923