xref: /dpdk/app/test-flow-perf/main.c (revision cb440babbd45a80c059f8bc80e87c48d09086fd7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  *
4  * This file contain the application main file
5  * This application provides the user the ability to test the
6  * insertion rate for specific rte_flow rule under stress state ~4M rule/
7  *
8  * Then it will also provide packet per second measurement after installing
9  * all rules, the user may send traffic to test the PPS that match the rules
10  * after all rules are installed, to check performance or functionality after
11  * the stress.
12  *
13  * The flows insertion will go for all ports first, then it will print the
14  * results, after that the application will go into forwarding packets mode
15  * it will start receiving traffic if any and then forwarding it back and
16  * gives packet per second measurement.
17  */
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdint.h>
23 #include <inttypes.h>
24 #include <stdarg.h>
25 #include <errno.h>
26 #include <getopt.h>
27 #include <stdbool.h>
28 #include <sys/time.h>
29 #include <signal.h>
30 #include <unistd.h>
31 
32 #include <rte_malloc.h>
33 #include <rte_mempool.h>
34 #include <rte_mbuf.h>
35 #include <rte_ethdev.h>
36 #include <rte_flow.h>
37 
38 #include "config.h"
39 #include "flow_gen.h"
40 
41 #define MAX_ITERATIONS             100
42 #define DEFAULT_RULES_COUNT    4000000
43 #define DEFAULT_ITERATION       100000
44 
45 struct rte_flow *flow;
46 static uint8_t flow_group;
47 
48 static uint64_t flow_items;
49 static uint64_t flow_actions;
50 static uint64_t flow_attrs;
51 
52 static volatile bool force_quit;
53 static bool dump_iterations;
54 static bool delete_flag;
55 static bool dump_socket_mem_flag;
56 static bool enable_fwd;
57 
58 static struct rte_mempool *mbuf_mp;
59 static uint32_t nb_lcores;
60 static uint32_t flows_count;
61 static uint32_t iterations_number;
62 static uint32_t hairpin_queues_num; /* total hairpin q number - default: 0 */
63 static uint32_t nb_lcores;
64 
65 #define MAX_PKT_BURST    32
66 #define LCORE_MODE_PKT    1
67 #define LCORE_MODE_STATS  2
68 #define MAX_STREAMS      64
69 #define MAX_LCORES       64
70 
71 struct stream {
72 	int tx_port;
73 	int tx_queue;
74 	int rx_port;
75 	int rx_queue;
76 };
77 
78 struct lcore_info {
79 	int mode;
80 	int streams_nb;
81 	struct stream streams[MAX_STREAMS];
82 	/* stats */
83 	uint64_t tx_pkts;
84 	uint64_t tx_drops;
85 	uint64_t rx_pkts;
86 	struct rte_mbuf *pkts[MAX_PKT_BURST];
87 } __rte_cache_aligned;
88 
89 static struct lcore_info lcore_infos[MAX_LCORES];
90 
91 static void
92 usage(char *progname)
93 {
94 	printf("\nusage: %s\n", progname);
95 	printf("\nControl configurations:\n");
96 	printf("  --flows-count=N: to set the number of needed"
97 		" flows to insert, default is 4,000,000\n");
98 	printf("  --dump-iterations: To print rates for each"
99 		" iteration\n");
100 	printf("  --deletion-rate: Enable deletion rate"
101 		" calculations\n");
102 	printf("  --dump-socket-mem: To dump all socket memory\n");
103 	printf("  --enable-fwd: To enable packets forwarding"
104 		" after insertion\n");
105 
106 	printf("To set flow attributes:\n");
107 	printf("  --ingress: set ingress attribute in flows\n");
108 	printf("  --egress: set egress attribute in flows\n");
109 	printf("  --transfer: set transfer attribute in flows\n");
110 	printf("  --group=N: set group for all flows,"
111 		" default is 0\n");
112 
113 	printf("To set flow items:\n");
114 	printf("  --ether: add ether layer in flow items\n");
115 	printf("  --vlan: add vlan layer in flow items\n");
116 	printf("  --ipv4: add ipv4 layer in flow items\n");
117 	printf("  --ipv6: add ipv6 layer in flow items\n");
118 	printf("  --tcp: add tcp layer in flow items\n");
119 	printf("  --udp: add udp layer in flow items\n");
120 	printf("  --vxlan: add vxlan layer in flow items\n");
121 	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
122 	printf("  --gre: add gre layer in flow items\n");
123 	printf("  --geneve: add geneve layer in flow items\n");
124 	printf("  --gtp: add gtp layer in flow items\n");
125 	printf("  --meta: add meta layer in flow items\n");
126 	printf("  --tag: add tag layer in flow items\n");
127 
128 	printf("To set flow actions:\n");
129 	printf("  --port-id: add port-id action in flow actions\n");
130 	printf("  --rss: add rss action in flow actions\n");
131 	printf("  --queue: add queue action in flow actions\n");
132 	printf("  --jump: add jump action in flow actions\n");
133 	printf("  --mark: add mark action in flow actions\n");
134 	printf("  --count: add count action in flow actions\n");
135 	printf("  --set-meta: add set meta action in flow actions\n");
136 	printf("  --set-tag: add set tag action in flow actions\n");
137 	printf("  --drop: add drop action in flow actions\n");
138 	printf("  --hairpin-queue=N: add hairpin-queue action in flow actions\n");
139 	printf("  --hairpin-rss=N: add hairpin-rss action in flow actions\n");
140 }
141 
142 static void
143 args_parse(int argc, char **argv)
144 {
145 	char **argvopt;
146 	int n, opt;
147 	int opt_idx;
148 	size_t i;
149 
150 	static const struct option_dict {
151 		const char *str;
152 		const uint64_t mask;
153 		uint64_t *bitmap;
154 	} flow_options[] = {
155 		{
156 			.str = "ether",
157 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
158 			.bitmap = &flow_items
159 		},
160 		{
161 			.str = "ipv4",
162 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV4),
163 			.bitmap = &flow_items
164 		},
165 		{
166 			.str = "ipv6",
167 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV6),
168 			.bitmap = &flow_items
169 		},
170 		{
171 			.str = "vlan",
172 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VLAN),
173 			.bitmap = &flow_items
174 		},
175 		{
176 			.str = "tcp",
177 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TCP),
178 			.bitmap = &flow_items
179 		},
180 		{
181 			.str = "udp",
182 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_UDP),
183 			.bitmap = &flow_items
184 		},
185 		{
186 			.str = "vxlan",
187 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN),
188 			.bitmap = &flow_items
189 		},
190 		{
191 			.str = "vxlan-gpe",
192 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
193 			.bitmap = &flow_items
194 		},
195 		{
196 			.str = "gre",
197 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GRE),
198 			.bitmap = &flow_items
199 		},
200 		{
201 			.str = "geneve",
202 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GENEVE),
203 			.bitmap = &flow_items
204 		},
205 		{
206 			.str = "gtp",
207 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GTP),
208 			.bitmap = &flow_items
209 		},
210 		{
211 			.str = "meta",
212 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_META),
213 			.bitmap = &flow_items
214 		},
215 		{
216 			.str = "tag",
217 			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TAG),
218 			.bitmap = &flow_items
219 		},
220 		{
221 			.str = "ingress",
222 			.mask = INGRESS,
223 			.bitmap = &flow_attrs
224 		},
225 		{
226 			.str = "egress",
227 			.mask = EGRESS,
228 			.bitmap = &flow_attrs
229 		},
230 		{
231 			.str = "transfer",
232 			.mask = TRANSFER,
233 			.bitmap = &flow_attrs
234 		},
235 		{
236 			.str = "port-id",
237 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_PORT_ID),
238 			.bitmap = &flow_actions
239 		},
240 		{
241 			.str = "rss",
242 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_RSS),
243 			.bitmap = &flow_actions
244 		},
245 		{
246 			.str = "queue",
247 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_QUEUE),
248 			.bitmap = &flow_actions
249 		},
250 		{
251 			.str = "jump",
252 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
253 			.bitmap = &flow_actions
254 		},
255 		{
256 			.str = "mark",
257 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_MARK),
258 			.bitmap = &flow_actions
259 		},
260 		{
261 			.str = "count",
262 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_COUNT),
263 			.bitmap = &flow_actions
264 		},
265 		{
266 			.str = "set-meta",
267 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_SET_META),
268 			.bitmap = &flow_actions
269 		},
270 		{
271 			.str = "set-tag",
272 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_SET_TAG),
273 			.bitmap = &flow_actions
274 		},
275 		{
276 			.str = "drop",
277 			.mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_DROP),
278 			.bitmap = &flow_actions
279 		}
280 	};
281 
282 	static const struct option lgopts[] = {
283 		/* Control */
284 		{ "help",                       0, 0, 0 },
285 		{ "flows-count",                1, 0, 0 },
286 		{ "dump-iterations",            0, 0, 0 },
287 		{ "deletion-rate",              0, 0, 0 },
288 		{ "dump-socket-mem",            0, 0, 0 },
289 		{ "enable-fwd",                 0, 0, 0 },
290 		/* Attributes */
291 		{ "ingress",                    0, 0, 0 },
292 		{ "egress",                     0, 0, 0 },
293 		{ "transfer",                   0, 0, 0 },
294 		{ "group",                      1, 0, 0 },
295 		/* Items */
296 		{ "ether",                      0, 0, 0 },
297 		{ "vlan",                       0, 0, 0 },
298 		{ "ipv4",                       0, 0, 0 },
299 		{ "ipv6",                       0, 0, 0 },
300 		{ "tcp",                        0, 0, 0 },
301 		{ "udp",                        0, 0, 0 },
302 		{ "vxlan",                      0, 0, 0 },
303 		{ "vxlan-gpe",                  0, 0, 0 },
304 		{ "gre",                        0, 0, 0 },
305 		{ "geneve",                     0, 0, 0 },
306 		{ "gtp",                        0, 0, 0 },
307 		{ "meta",                       0, 0, 0 },
308 		{ "tag",                        0, 0, 0 },
309 		/* Actions */
310 		{ "port-id",                    0, 0, 0 },
311 		{ "rss",                        0, 0, 0 },
312 		{ "queue",                      0, 0, 0 },
313 		{ "jump",                       0, 0, 0 },
314 		{ "mark",                       0, 0, 0 },
315 		{ "count",                      0, 0, 0 },
316 		{ "set-meta",                   0, 0, 0 },
317 		{ "set-tag",                    0, 0, 0 },
318 		{ "drop",                       0, 0, 0 },
319 		{ "hairpin-queue",              1, 0, 0 },
320 		{ "hairpin-rss",                1, 0, 0 },
321 	};
322 
323 	flow_items = 0;
324 	flow_actions = 0;
325 	flow_attrs = 0;
326 	hairpin_queues_num = 0;
327 	argvopt = argv;
328 
329 	printf(":: Flow -> ");
330 	while ((opt = getopt_long(argc, argvopt, "",
331 				lgopts, &opt_idx)) != EOF) {
332 		switch (opt) {
333 		case 0:
334 			if (strcmp(lgopts[opt_idx].name, "help") == 0) {
335 				usage(argv[0]);
336 				rte_exit(EXIT_SUCCESS, "Displayed help\n");
337 			}
338 
339 			if (strcmp(lgopts[opt_idx].name, "group") == 0) {
340 				n = atoi(optarg);
341 				if (n >= 0)
342 					flow_group = n;
343 				else
344 					rte_exit(EXIT_SUCCESS,
345 						"flow group should be >= 0\n");
346 				printf("group %d ", flow_group);
347 			}
348 
349 			for (i = 0; i < RTE_DIM(flow_options); i++)
350 				if (strcmp(lgopts[opt_idx].name,
351 						flow_options[i].str) == 0) {
352 					*flow_options[i].bitmap |=
353 						flow_options[i].mask;
354 					printf("%s / ", flow_options[i].str);
355 				}
356 
357 			if (strcmp(lgopts[opt_idx].name,
358 					"hairpin-rss") == 0) {
359 				n = atoi(optarg);
360 				if (n > 0)
361 					hairpin_queues_num = n;
362 				else
363 					rte_exit(EXIT_SUCCESS,
364 						"Hairpin queues should be > 0\n");
365 
366 				flow_actions |= HAIRPIN_RSS_ACTION;
367 				printf("hairpin-rss / ");
368 			}
369 			if (strcmp(lgopts[opt_idx].name,
370 					"hairpin-queue") == 0) {
371 				n = atoi(optarg);
372 				if (n > 0)
373 					hairpin_queues_num = n;
374 				else
375 					rte_exit(EXIT_SUCCESS,
376 						"Hairpin queues should be > 0\n");
377 
378 				flow_actions |= HAIRPIN_QUEUE_ACTION;
379 				printf("hairpin-queue / ");
380 			}
381 
382 			/* Control */
383 			if (strcmp(lgopts[opt_idx].name,
384 					"flows-count") == 0) {
385 				n = atoi(optarg);
386 				if (n > (int) iterations_number)
387 					flows_count = n;
388 				else {
389 					printf("\n\nflows_count should be > %d\n",
390 						iterations_number);
391 					rte_exit(EXIT_SUCCESS, " ");
392 				}
393 			}
394 			if (strcmp(lgopts[opt_idx].name,
395 					"dump-iterations") == 0)
396 				dump_iterations = true;
397 			if (strcmp(lgopts[opt_idx].name,
398 					"deletion-rate") == 0)
399 				delete_flag = true;
400 			if (strcmp(lgopts[opt_idx].name,
401 					"dump-socket-mem") == 0)
402 				dump_socket_mem_flag = true;
403 			if (strcmp(lgopts[opt_idx].name,
404 					"enable-fwd") == 0)
405 				enable_fwd = true;
406 			break;
407 		default:
408 			fprintf(stderr, "Invalid option: %s\n", argv[optind]);
409 			usage(argv[0]);
410 			rte_exit(EXIT_SUCCESS, "Invalid option\n");
411 			break;
412 		}
413 	}
414 	printf("end_flow\n");
415 }
416 
417 /* Dump the socket memory statistics on console */
418 static size_t
419 dump_socket_mem(FILE *f)
420 {
421 	struct rte_malloc_socket_stats socket_stats;
422 	unsigned int i = 0;
423 	size_t total = 0;
424 	size_t alloc = 0;
425 	size_t free = 0;
426 	unsigned int n_alloc = 0;
427 	unsigned int n_free = 0;
428 	bool active_nodes = false;
429 
430 
431 	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
432 		if (rte_malloc_get_socket_stats(i, &socket_stats) ||
433 		    !socket_stats.heap_totalsz_bytes)
434 			continue;
435 		active_nodes = true;
436 		total += socket_stats.heap_totalsz_bytes;
437 		alloc += socket_stats.heap_allocsz_bytes;
438 		free += socket_stats.heap_freesz_bytes;
439 		n_alloc += socket_stats.alloc_count;
440 		n_free += socket_stats.free_count;
441 		if (dump_socket_mem_flag) {
442 			fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
443 			fprintf(f,
444 				"\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
445 				" %.6lf(%.3lf%%)\nfree: %.6lf"
446 				"\nmax: %.6lf"
447 				"\ncount alloc: %u\nfree: %u\n",
448 				i,
449 				socket_stats.heap_totalsz_bytes / 1.0e6,
450 				socket_stats.heap_allocsz_bytes / 1.0e6,
451 				(double)socket_stats.heap_allocsz_bytes * 100 /
452 				(double)socket_stats.heap_totalsz_bytes,
453 				socket_stats.heap_freesz_bytes / 1.0e6,
454 				socket_stats.greatest_free_size / 1.0e6,
455 				socket_stats.alloc_count,
456 				socket_stats.free_count);
457 				fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
458 		}
459 	}
460 	if (dump_socket_mem_flag && active_nodes) {
461 		fprintf(f,
462 			"\nTotal: size(M)\ntotal: %.6lf"
463 			"\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
464 			"\ncount alloc: %u\nfree: %u\n",
465 			total / 1.0e6, alloc / 1.0e6,
466 			(double)alloc * 100 / (double)total, free / 1.0e6,
467 			n_alloc, n_free);
468 		fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
469 	}
470 	return alloc;
471 }
472 
473 static void
474 print_flow_error(struct rte_flow_error error)
475 {
476 	printf("Flow can't be created %d message: %s\n",
477 		error.type,
478 		error.message ? error.message : "(no stated reason)");
479 }
480 
481 static inline void
482 destroy_flows(int port_id, struct rte_flow **flow_list)
483 {
484 	struct rte_flow_error error;
485 	clock_t start_iter, end_iter;
486 	double cpu_time_used = 0;
487 	double flows_rate;
488 	double cpu_time_per_iter[MAX_ITERATIONS];
489 	double delta;
490 	uint32_t i;
491 	int iter_id;
492 
493 	for (i = 0; i < MAX_ITERATIONS; i++)
494 		cpu_time_per_iter[i] = -1;
495 
496 	if (iterations_number > flows_count)
497 		iterations_number = flows_count;
498 
499 	/* Deletion Rate */
500 	printf("Flows Deletion on port = %d\n", port_id);
501 	start_iter = clock();
502 	for (i = 0; i < flows_count; i++) {
503 		if (flow_list[i] == 0)
504 			break;
505 
506 		memset(&error, 0x33, sizeof(error));
507 		if (rte_flow_destroy(port_id, flow_list[i], &error)) {
508 			print_flow_error(error);
509 			rte_exit(EXIT_FAILURE, "Error in deleting flow");
510 		}
511 
512 		if (i && !((i + 1) % iterations_number)) {
513 			/* Save the deletion rate of each iter */
514 			end_iter = clock();
515 			delta = (double) (end_iter - start_iter);
516 			iter_id = ((i + 1) / iterations_number) - 1;
517 			cpu_time_per_iter[iter_id] =
518 				delta / CLOCKS_PER_SEC;
519 			cpu_time_used += cpu_time_per_iter[iter_id];
520 			start_iter = clock();
521 		}
522 	}
523 
524 	/* Deletion rate per iteration */
525 	if (dump_iterations)
526 		for (i = 0; i < MAX_ITERATIONS; i++) {
527 			if (cpu_time_per_iter[i] == -1)
528 				continue;
529 			delta = (double)(iterations_number /
530 				cpu_time_per_iter[i]);
531 			flows_rate = delta / 1000;
532 			printf(":: Iteration #%d: %d flows "
533 				"in %f sec[ Rate = %f K/Sec ]\n",
534 				i, iterations_number,
535 				cpu_time_per_iter[i], flows_rate);
536 		}
537 
538 	/* Deletion rate for all flows */
539 	flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
540 	printf("\n:: Total flow deletion rate -> %f K/Sec\n",
541 		flows_rate);
542 	printf(":: The time for deleting %d in flows %f seconds\n",
543 		flows_count, cpu_time_used);
544 }
545 
546 static inline void
547 flows_handler(void)
548 {
549 	struct rte_flow **flow_list;
550 	struct rte_flow_error error;
551 	clock_t start_iter, end_iter;
552 	double cpu_time_used;
553 	double flows_rate;
554 	double cpu_time_per_iter[MAX_ITERATIONS];
555 	double delta;
556 	uint16_t nr_ports;
557 	uint32_t i;
558 	int port_id;
559 	int iter_id;
560 	uint32_t flow_index;
561 
562 	nr_ports = rte_eth_dev_count_avail();
563 
564 	for (i = 0; i < MAX_ITERATIONS; i++)
565 		cpu_time_per_iter[i] = -1;
566 
567 	if (iterations_number > flows_count)
568 		iterations_number = flows_count;
569 
570 	printf(":: Flows Count per port: %d\n", flows_count);
571 
572 	flow_list = rte_zmalloc("flow_list",
573 		(sizeof(struct rte_flow *) * flows_count) + 1, 0);
574 	if (flow_list == NULL)
575 		rte_exit(EXIT_FAILURE, "No Memory available!");
576 
577 	for (port_id = 0; port_id < nr_ports; port_id++) {
578 		cpu_time_used = 0;
579 		flow_index = 0;
580 		if (flow_group > 0) {
581 			/*
582 			 * Create global rule to jump into flow_group,
583 			 * this way the app will avoid the default rules.
584 			 *
585 			 * Global rule:
586 			 * group 0 eth / end actions jump group <flow_group>
587 			 *
588 			 */
589 			flow = generate_flow(port_id, 0, flow_attrs,
590 				FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
591 				FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
592 				flow_group, 0, 0, &error);
593 
594 			if (flow == NULL) {
595 				print_flow_error(error);
596 				rte_exit(EXIT_FAILURE, "error in creating flow");
597 			}
598 			flow_list[flow_index++] = flow;
599 		}
600 
601 		/* Insertion Rate */
602 		printf("Flows insertion on port = %d\n", port_id);
603 		start_iter = clock();
604 		for (i = 0; i < flows_count; i++) {
605 			flow = generate_flow(port_id, flow_group,
606 				flow_attrs, flow_items, flow_actions,
607 				JUMP_ACTION_TABLE, i,
608 				hairpin_queues_num, &error);
609 
610 			if (force_quit)
611 				i = flows_count;
612 
613 			if (!flow) {
614 				print_flow_error(error);
615 				rte_exit(EXIT_FAILURE, "error in creating flow");
616 			}
617 
618 			flow_list[flow_index++] = flow;
619 
620 			if (i && !((i + 1) % iterations_number)) {
621 				/* Save the insertion rate of each iter */
622 				end_iter = clock();
623 				delta = (double) (end_iter - start_iter);
624 				iter_id = ((i + 1) / iterations_number) - 1;
625 				cpu_time_per_iter[iter_id] =
626 					delta / CLOCKS_PER_SEC;
627 				cpu_time_used += cpu_time_per_iter[iter_id];
628 				start_iter = clock();
629 			}
630 		}
631 
632 		/* Iteration rate per iteration */
633 		if (dump_iterations)
634 			for (i = 0; i < MAX_ITERATIONS; i++) {
635 				if (cpu_time_per_iter[i] == -1)
636 					continue;
637 				delta = (double)(iterations_number /
638 					cpu_time_per_iter[i]);
639 				flows_rate = delta / 1000;
640 				printf(":: Iteration #%d: %d flows "
641 					"in %f sec[ Rate = %f K/Sec ]\n",
642 					i, iterations_number,
643 					cpu_time_per_iter[i], flows_rate);
644 			}
645 
646 		/* Insertion rate for all flows */
647 		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
648 		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
649 						flows_rate);
650 		printf(":: The time for creating %d in flows %f seconds\n",
651 						flows_count, cpu_time_used);
652 
653 		if (delete_flag)
654 			destroy_flows(port_id, flow_list);
655 	}
656 }
657 
658 static void
659 signal_handler(int signum)
660 {
661 	if (signum == SIGINT || signum == SIGTERM) {
662 		printf("\n\nSignal %d received, preparing to exit...\n",
663 					signum);
664 		printf("Error: Stats are wrong due to sudden signal!\n\n");
665 		force_quit = true;
666 	}
667 }
668 
669 static inline uint16_t
670 do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
671 {
672 	uint16_t cnt = 0;
673 	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
674 	li->rx_pkts += cnt;
675 	return cnt;
676 }
677 
678 static inline void
679 do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
680 			uint16_t tx_queue)
681 {
682 	uint16_t nr_tx = 0;
683 	uint16_t i;
684 
685 	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
686 	li->tx_pkts  += nr_tx;
687 	li->tx_drops += cnt - nr_tx;
688 
689 	for (i = nr_tx; i < cnt; i++)
690 		rte_pktmbuf_free(li->pkts[i]);
691 }
692 
693 /*
694  * Method to convert numbers into pretty numbers that easy
695  * to read. The design here is to add comma after each three
696  * digits and set all of this inside buffer.
697  *
698  * For example if n = 1799321, the output will be
699  * 1,799,321 after this method which is easier to read.
700  */
701 static char *
702 pretty_number(uint64_t n, char *buf)
703 {
704 	char p[6][4];
705 	int i = 0;
706 	int off = 0;
707 
708 	while (n > 1000) {
709 		sprintf(p[i], "%03d", (int)(n % 1000));
710 		n /= 1000;
711 		i += 1;
712 	}
713 
714 	sprintf(p[i++], "%d", (int)n);
715 
716 	while (i--)
717 		off += sprintf(buf + off, "%s,", p[i]);
718 	buf[strlen(buf) - 1] = '\0';
719 
720 	return buf;
721 }
722 
723 static void
724 packet_per_second_stats(void)
725 {
726 	struct lcore_info *old;
727 	struct lcore_info *li, *oli;
728 	int nr_lines = 0;
729 	int i;
730 
731 	old = rte_zmalloc("old",
732 		sizeof(struct lcore_info) * MAX_LCORES, 0);
733 	if (old == NULL)
734 		rte_exit(EXIT_FAILURE, "No Memory available!");
735 
736 	memcpy(old, lcore_infos,
737 		sizeof(struct lcore_info) * MAX_LCORES);
738 
739 	while (!force_quit) {
740 		uint64_t total_tx_pkts = 0;
741 		uint64_t total_rx_pkts = 0;
742 		uint64_t total_tx_drops = 0;
743 		uint64_t tx_delta, rx_delta, drops_delta;
744 		char buf[3][32];
745 		int nr_valid_core = 0;
746 
747 		sleep(1);
748 
749 		if (nr_lines) {
750 			char go_up_nr_lines[16];
751 
752 			sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
753 			printf("%s\r", go_up_nr_lines);
754 		}
755 
756 		printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
757 		printf("%6s %16s %16s %16s\n", "------", "----------------",
758 			"----------------", "----------------");
759 		nr_lines = 3;
760 		for (i = 0; i < MAX_LCORES; i++) {
761 			li  = &lcore_infos[i];
762 			oli = &old[i];
763 			if (li->mode != LCORE_MODE_PKT)
764 				continue;
765 
766 			tx_delta    = li->tx_pkts  - oli->tx_pkts;
767 			rx_delta    = li->rx_pkts  - oli->rx_pkts;
768 			drops_delta = li->tx_drops - oli->tx_drops;
769 			printf("%6d %16s %16s %16s\n", i,
770 				pretty_number(tx_delta,    buf[0]),
771 				pretty_number(drops_delta, buf[1]),
772 				pretty_number(rx_delta,    buf[2]));
773 
774 			total_tx_pkts  += tx_delta;
775 			total_rx_pkts  += rx_delta;
776 			total_tx_drops += drops_delta;
777 
778 			nr_valid_core++;
779 			nr_lines += 1;
780 		}
781 
782 		if (nr_valid_core > 1) {
783 			printf("%6s %16s %16s %16s\n", "total",
784 				pretty_number(total_tx_pkts,  buf[0]),
785 				pretty_number(total_tx_drops, buf[1]),
786 				pretty_number(total_rx_pkts,  buf[2]));
787 			nr_lines += 1;
788 		}
789 
790 		memcpy(old, lcore_infos,
791 			sizeof(struct lcore_info) * MAX_LCORES);
792 	}
793 }
794 
795 static int
796 start_forwarding(void *data __rte_unused)
797 {
798 	int lcore = rte_lcore_id();
799 	int stream_id;
800 	uint16_t cnt;
801 	struct lcore_info *li = &lcore_infos[lcore];
802 
803 	if (!li->mode)
804 		return 0;
805 
806 	if (li->mode == LCORE_MODE_STATS) {
807 		printf(":: started stats on lcore %u\n", lcore);
808 		packet_per_second_stats();
809 		return 0;
810 	}
811 
812 	while (!force_quit)
813 		for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
814 			if (li->streams[stream_id].rx_port == -1)
815 				continue;
816 
817 			cnt = do_rx(li,
818 					li->streams[stream_id].rx_port,
819 					li->streams[stream_id].rx_queue);
820 			if (cnt)
821 				do_tx(li, cnt,
822 					li->streams[stream_id].tx_port,
823 					li->streams[stream_id].tx_queue);
824 		}
825 	return 0;
826 }
827 
828 static void
829 init_lcore_info(void)
830 {
831 	int i, j;
832 	unsigned int lcore;
833 	uint16_t nr_port;
834 	uint16_t queue;
835 	int port;
836 	int stream_id = 0;
837 	int streams_per_core;
838 	int unassigned_streams;
839 	int nb_fwd_streams;
840 	nr_port = rte_eth_dev_count_avail();
841 
842 	/* First logical core is reserved for stats printing */
843 	lcore = rte_get_next_lcore(-1, 0, 0);
844 	lcore_infos[lcore].mode = LCORE_MODE_STATS;
845 
846 	/*
847 	 * Initialize all cores
848 	 * All cores at first must have -1 value in all streams
849 	 * This means that this stream is not used, or not set
850 	 * yet.
851 	 */
852 	for (i = 0; i < MAX_LCORES; i++)
853 		for (j = 0; j < MAX_STREAMS; j++) {
854 			lcore_infos[i].streams[j].tx_port = -1;
855 			lcore_infos[i].streams[j].rx_port = -1;
856 			lcore_infos[i].streams[j].tx_queue = -1;
857 			lcore_infos[i].streams[j].rx_queue = -1;
858 			lcore_infos[i].streams_nb = 0;
859 		}
860 
861 	/*
862 	 * Calculate the total streams count.
863 	 * Also distribute those streams count between the available
864 	 * logical cores except first core, since it's reserved for
865 	 * stats prints.
866 	 */
867 	nb_fwd_streams = nr_port * RXQ_NUM;
868 	if ((int)(nb_lcores - 1) >= nb_fwd_streams)
869 		for (i = 0; i < (int)(nb_lcores - 1); i++) {
870 			lcore = rte_get_next_lcore(lcore, 0, 0);
871 			lcore_infos[lcore].streams_nb = 1;
872 		}
873 	else {
874 		streams_per_core = nb_fwd_streams / (nb_lcores - 1);
875 		unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
876 		for (i = 0; i < (int)(nb_lcores - 1); i++) {
877 			lcore = rte_get_next_lcore(lcore, 0, 0);
878 			lcore_infos[lcore].streams_nb = streams_per_core;
879 			if (unassigned_streams) {
880 				lcore_infos[lcore].streams_nb++;
881 				unassigned_streams--;
882 			}
883 		}
884 	}
885 
886 	/*
887 	 * Set the streams for the cores according to each logical
888 	 * core stream count.
889 	 * The streams is built on the design of what received should
890 	 * forward as well, this means that if you received packets on
891 	 * port 0 queue 0 then the same queue should forward the
892 	 * packets, using the same logical core.
893 	 */
894 	lcore = rte_get_next_lcore(-1, 0, 0);
895 	for (port = 0; port < nr_port; port++) {
896 		/* Create FWD stream */
897 		for (queue = 0; queue < RXQ_NUM; queue++) {
898 			if (!lcore_infos[lcore].streams_nb ||
899 				!(stream_id % lcore_infos[lcore].streams_nb)) {
900 				lcore = rte_get_next_lcore(lcore, 0, 0);
901 				lcore_infos[lcore].mode = LCORE_MODE_PKT;
902 				stream_id = 0;
903 			}
904 			lcore_infos[lcore].streams[stream_id].rx_queue = queue;
905 			lcore_infos[lcore].streams[stream_id].tx_queue = queue;
906 			lcore_infos[lcore].streams[stream_id].rx_port = port;
907 			lcore_infos[lcore].streams[stream_id].tx_port = port;
908 			stream_id++;
909 		}
910 	}
911 
912 	/* Print all streams */
913 	printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
914 	for (i = 0; i < MAX_LCORES; i++)
915 		for (j = 0; j < MAX_STREAMS; j++) {
916 			/* No streams for this core */
917 			if (lcore_infos[i].streams[j].tx_port == -1)
918 				break;
919 			printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
920 				i,
921 				lcore_infos[i].streams[j].rx_port,
922 				lcore_infos[i].streams[j].rx_queue,
923 				lcore_infos[i].streams[j].tx_port,
924 				lcore_infos[i].streams[j].tx_queue);
925 		}
926 }
927 
928 static void
929 init_port(void)
930 {
931 	int ret;
932 	uint16_t std_queue;
933 	uint16_t hairpin_queue;
934 	uint16_t port_id;
935 	uint16_t nr_ports;
936 	uint16_t nr_queues;
937 	struct rte_eth_hairpin_conf hairpin_conf = {
938 		.peer_count = 1,
939 	};
940 	struct rte_eth_conf port_conf = {
941 		.rx_adv_conf = {
942 			.rss_conf.rss_hf =
943 				GET_RSS_HF(),
944 		}
945 	};
946 	struct rte_eth_txconf txq_conf;
947 	struct rte_eth_rxconf rxq_conf;
948 	struct rte_eth_dev_info dev_info;
949 
950 	nr_queues = RXQ_NUM;
951 	if (hairpin_queues_num != 0)
952 		nr_queues = RXQ_NUM + hairpin_queues_num;
953 
954 	nr_ports = rte_eth_dev_count_avail();
955 	if (nr_ports == 0)
956 		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
957 
958 	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
959 					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
960 					0, MBUF_SIZE,
961 					rte_socket_id());
962 	if (mbuf_mp == NULL)
963 		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
964 
965 	for (port_id = 0; port_id < nr_ports; port_id++) {
966 		ret = rte_eth_dev_info_get(port_id, &dev_info);
967 		if (ret != 0)
968 			rte_exit(EXIT_FAILURE,
969 				"Error during getting device"
970 				" (port %u) info: %s\n",
971 				port_id, strerror(-ret));
972 
973 		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
974 		port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
975 
976 		printf(":: initializing port: %d\n", port_id);
977 
978 		ret = rte_eth_dev_configure(port_id, nr_queues,
979 				nr_queues, &port_conf);
980 		if (ret < 0)
981 			rte_exit(EXIT_FAILURE,
982 				":: cannot configure device: err=%d, port=%u\n",
983 				ret, port_id);
984 
985 		rxq_conf = dev_info.default_rxconf;
986 		for (std_queue = 0; std_queue < RXQ_NUM; std_queue++) {
987 			ret = rte_eth_rx_queue_setup(port_id, std_queue, NR_RXD,
988 					rte_eth_dev_socket_id(port_id),
989 					&rxq_conf,
990 					mbuf_mp);
991 			if (ret < 0)
992 				rte_exit(EXIT_FAILURE,
993 					":: Rx queue setup failed: err=%d, port=%u\n",
994 					ret, port_id);
995 		}
996 
997 		txq_conf = dev_info.default_txconf;
998 		for (std_queue = 0; std_queue < TXQ_NUM; std_queue++) {
999 			ret = rte_eth_tx_queue_setup(port_id, std_queue, NR_TXD,
1000 					rte_eth_dev_socket_id(port_id),
1001 					&txq_conf);
1002 			if (ret < 0)
1003 				rte_exit(EXIT_FAILURE,
1004 					":: Tx queue setup failed: err=%d, port=%u\n",
1005 					ret, port_id);
1006 		}
1007 
1008 		/* Catch all packets from traffic generator. */
1009 		ret = rte_eth_promiscuous_enable(port_id);
1010 		if (ret != 0)
1011 			rte_exit(EXIT_FAILURE,
1012 				":: promiscuous mode enable failed: err=%s, port=%u\n",
1013 				rte_strerror(-ret), port_id);
1014 
1015 		if (hairpin_queues_num != 0) {
1016 			/*
1017 			 * Configure peer which represents hairpin Tx.
1018 			 * Hairpin queue numbers start after standard queues
1019 			 * (RXQ_NUM and TXQ_NUM).
1020 			 */
1021 			for (hairpin_queue = RXQ_NUM, std_queue = 0;
1022 					hairpin_queue < nr_queues;
1023 					hairpin_queue++, std_queue++) {
1024 				hairpin_conf.peers[0].port = port_id;
1025 				hairpin_conf.peers[0].queue =
1026 					std_queue + TXQ_NUM;
1027 				ret = rte_eth_rx_hairpin_queue_setup(
1028 						port_id, hairpin_queue,
1029 						NR_RXD, &hairpin_conf);
1030 				if (ret != 0)
1031 					rte_exit(EXIT_FAILURE,
1032 						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
1033 						ret, port_id);
1034 			}
1035 
1036 			for (hairpin_queue = TXQ_NUM, std_queue = 0;
1037 					hairpin_queue < nr_queues;
1038 					hairpin_queue++, std_queue++) {
1039 				hairpin_conf.peers[0].port = port_id;
1040 				hairpin_conf.peers[0].queue =
1041 					std_queue + RXQ_NUM;
1042 				ret = rte_eth_tx_hairpin_queue_setup(
1043 						port_id, hairpin_queue,
1044 						NR_TXD, &hairpin_conf);
1045 				if (ret != 0)
1046 					rte_exit(EXIT_FAILURE,
1047 						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
1048 						ret, port_id);
1049 			}
1050 		}
1051 
1052 		ret = rte_eth_dev_start(port_id);
1053 		if (ret < 0)
1054 			rte_exit(EXIT_FAILURE,
1055 				"rte_eth_dev_start:err=%d, port=%u\n",
1056 				ret, port_id);
1057 
1058 		printf(":: initializing port: %d done\n", port_id);
1059 	}
1060 }
1061 
1062 int
1063 main(int argc, char **argv)
1064 {
1065 	int ret;
1066 	uint16_t port;
1067 	struct rte_flow_error error;
1068 	int64_t alloc, last_alloc;
1069 
1070 	ret = rte_eal_init(argc, argv);
1071 	if (ret < 0)
1072 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
1073 
1074 	force_quit = false;
1075 	dump_iterations = false;
1076 	flows_count = DEFAULT_RULES_COUNT;
1077 	iterations_number = DEFAULT_ITERATION;
1078 	delete_flag = false;
1079 	dump_socket_mem_flag = false;
1080 	flow_group = 0;
1081 
1082 	signal(SIGINT, signal_handler);
1083 	signal(SIGTERM, signal_handler);
1084 
1085 	argc -= ret;
1086 	argv += ret;
1087 	if (argc > 1)
1088 		args_parse(argc, argv);
1089 
1090 	init_port();
1091 
1092 	nb_lcores = rte_lcore_count();
1093 	if (nb_lcores <= 1)
1094 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
1095 
1096 	last_alloc = (int64_t)dump_socket_mem(stdout);
1097 	flows_handler();
1098 	alloc = (int64_t)dump_socket_mem(stdout);
1099 
1100 	if (last_alloc)
1101 		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
1102 		(alloc - last_alloc) / 1.0e6);
1103 
1104 	if (enable_fwd) {
1105 		init_lcore_info();
1106 		rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
1107 	}
1108 
1109 	RTE_ETH_FOREACH_DEV(port) {
1110 		rte_flow_flush(port, &error);
1111 		rte_eth_dev_stop(port);
1112 		rte_eth_dev_close(port);
1113 	}
1114 	return 0;
1115 }
1116