xref: /dpdk/examples/vhost/main.c (revision e5ffdd1457c0fb4e8365f524ee2529ac726edcf3)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <arpa/inet.h>
35 #include <getopt.h>
36 #include <linux/if_ether.h>
37 #include <linux/if_vlan.h>
38 #include <linux/virtio_net.h>
39 #include <linux/virtio_ring.h>
40 #include <signal.h>
41 #include <stdint.h>
42 #include <sys/eventfd.h>
43 #include <sys/param.h>
44 #include <unistd.h>
45 
46 #include <rte_atomic.h>
47 #include <rte_cycles.h>
48 #include <rte_ethdev.h>
49 #include <rte_log.h>
50 #include <rte_string_fns.h>
51 
52 #include "main.h"
53 #include "virtio-net.h"
54 #include "vhost-net-cdev.h"
55 
56 #define MAX_QUEUES 128
57 
58 /* the maximum number of external ports supported */
59 #define MAX_SUP_PORTS 1
60 
61 /*
62  * Calculate the number of buffers needed per port
63  */
64 #define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) +  		\
65 							(num_switching_cores*MAX_PKT_BURST) +  			\
66 							(num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\
67 							(num_switching_cores*MBUF_CACHE_SIZE))
68 
69 #define MBUF_CACHE_SIZE 128
70 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
71 
72 /*
73  * RX and TX Prefetch, Host, and Write-back threshold values should be
74  * carefully set for optimal performance. Consult the network
75  * controller's datasheet and supporting DPDK documentation for guidance
76  * on how these parameters should be set.
77  */
78 #define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */
79 #define RX_HTHRESH 8 /* Default values of RX host threshold reg. */
80 #define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */
81 
82 /*
83  * These default values are optimized for use with the Intel(R) 82599 10 GbE
84  * Controller and the DPDK ixgbe PMD. Consider using other values for other
85  * network controllers and/or network drivers.
86  */
87 #define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */
88 #define TX_HTHRESH 0  /* Default values of TX host threshold reg. */
89 #define TX_WTHRESH 0  /* Default values of TX write-back threshold reg. */
90 
91 #define MAX_PKT_BURST 32 		/* Max burst size for RX/TX */
92 #define MAX_MRG_PKT_BURST 16 	/* Max burst for merge buffers. Set to 1 due to performance issue. */
93 #define BURST_TX_DRAIN_US 100 	/* TX drain every ~100us */
94 
95 #define BURST_RX_WAIT_US 15 	/* Defines how long we wait between retries on RX */
96 #define BURST_RX_RETRIES 4		/* Number of retries on RX. */
97 
98 /* State of virtio device. */
99 #define DEVICE_MAC_LEARNING 0
100 #define DEVICE_RX			1
101 #define DEVICE_SAFE_REMOVE	2
102 
103 /* Config_core_flag status definitions. */
104 #define REQUEST_DEV_REMOVAL 1
105 #define ACK_DEV_REMOVAL 0
106 
107 /* Configurable number of RX/TX ring descriptors */
108 #define RTE_TEST_RX_DESC_DEFAULT 1024
109 #define RTE_TEST_TX_DESC_DEFAULT 512
110 
111 #define INVALID_PORT_ID 0xFF
112 
113 /* Max number of devices. Limited by vmdq. */
114 #define MAX_DEVICES 64
115 
116 /* Size of buffers used for rte_snprintfs. */
117 #define MAX_PRINT_BUFF 6072
118 
119 /* Maximum character device basename size. */
120 #define MAX_BASENAME_SZ 10
121 
122 /* Maximum long option length for option parsing. */
123 #define MAX_LONG_OPT_SZ 64
124 
125 /* Used to compare MAC addresses. */
126 #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
127 
128 /* Number of descriptors per cacheline. */
129 #define DESC_PER_CACHELINE (CACHE_LINE_SIZE / sizeof(struct vring_desc))
130 
131 /* mask of enabled ports */
132 static uint32_t enabled_port_mask = 0;
133 
134 /*Number of switching cores enabled*/
135 static uint32_t num_switching_cores = 0;
136 
137 /* number of devices/queues to support*/
138 static uint32_t num_queues = 0;
139 uint32_t num_devices = 0;
140 
141 /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
142 static uint32_t enable_vm2vm = 1;
143 /* Enable stats. */
144 static uint32_t enable_stats = 0;
145 /* Enable retries on RX. */
146 static uint32_t enable_retry = 1;
147 /* Specify timeout (in useconds) between retries on RX. */
148 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
149 /* Specify the number of retries on RX. */
150 static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
151 
152 /* Character device basename. Can be set by user. */
153 static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";
154 
155 /* Charater device index. Can be set by user. */
156 static uint32_t dev_index = 0;
157 
158 /* This can be set by the user so it is made available here. */
159 extern uint64_t VHOST_FEATURES;
160 
161 /* Default configuration for rx and tx thresholds etc. */
162 static const struct rte_eth_rxconf rx_conf_default = {
163 	.rx_thresh = {
164 		.pthresh = RX_PTHRESH,
165 		.hthresh = RX_HTHRESH,
166 		.wthresh = RX_WTHRESH,
167 	},
168 	.rx_drop_en = 1,
169 };
170 
171 /*
172  * These default values are optimized for use with the Intel(R) 82599 10 GbE
173  * Controller and the DPDK ixgbe/igb PMD. Consider using other values for other
174  * network controllers and/or network drivers.
175  */
176 static const struct rte_eth_txconf tx_conf_default = {
177 	.tx_thresh = {
178 		.pthresh = TX_PTHRESH,
179 		.hthresh = TX_HTHRESH,
180 		.wthresh = TX_WTHRESH,
181 	},
182 	.tx_free_thresh = 0, /* Use PMD default values */
183 	.tx_rs_thresh = 0, /* Use PMD default values */
184 };
185 
186 /* empty vmdq configuration structure. Filled in programatically */
187 static const struct rte_eth_conf vmdq_conf_default = {
188 	.rxmode = {
189 		.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
190 		.split_hdr_size = 0,
191 		.header_split   = 0, /**< Header Split disabled */
192 		.hw_ip_checksum = 0, /**< IP checksum offload disabled */
193 		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
194 		/*
195 		 * It is necessary for 1G NIC such as I350,
196 		 * this fixes bug of ipv4 forwarding in guest can't
197 		 * forward pakets from one virtio dev to another virtio dev.
198 		 */
199 		.hw_vlan_strip  = 1, /**< VLAN strip enabled. */
200 		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
201 		.hw_strip_crc   = 0, /**< CRC stripped by hardware */
202 	},
203 
204 	.txmode = {
205 		.mq_mode = ETH_MQ_TX_NONE,
206 	},
207 	.rx_adv_conf = {
208 		/*
209 		 * should be overridden separately in code with
210 		 * appropriate values
211 		 */
212 		.vmdq_rx_conf = {
213 			.nb_queue_pools = ETH_8_POOLS,
214 			.enable_default_pool = 0,
215 			.default_pool = 0,
216 			.nb_pool_maps = 0,
217 			.pool_map = {{0, 0},},
218 		},
219 	},
220 };
221 
222 static unsigned lcore_ids[RTE_MAX_LCORE];
223 static uint8_t ports[RTE_MAX_ETHPORTS];
224 static unsigned num_ports = 0; /**< The number of ports specified in command line */
225 
226 const uint16_t vlan_tags[] = {
227 	1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
228 	1008, 1009, 1010, 1011,	1012, 1013, 1014, 1015,
229 	1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
230 	1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
231 	1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
232 	1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
233 	1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
234 	1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
235 };
236 
237 /* ethernet addresses of ports */
238 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
239 
240 /* heads for the main used and free linked lists for the data path. */
241 static struct virtio_net_data_ll *ll_root_used = NULL;
242 static struct virtio_net_data_ll *ll_root_free = NULL;
243 
244 /* Array of data core structures containing information on individual core linked lists. */
245 static struct lcore_info lcore_info[RTE_MAX_LCORE];
246 
247 /* Used for queueing bursts of TX packets. */
248 struct mbuf_table {
249 	unsigned len;
250 	unsigned txq_id;
251 	struct rte_mbuf *m_table[MAX_PKT_BURST];
252 };
253 
254 /* TX queue for each data core. */
255 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
256 
257 /* Vlan header struct used to insert vlan tags on TX. */
258 struct vlan_ethhdr {
259 	unsigned char   h_dest[ETH_ALEN];
260 	unsigned char   h_source[ETH_ALEN];
261 	__be16          h_vlan_proto;
262 	__be16          h_vlan_TCI;
263 	__be16          h_vlan_encapsulated_proto;
264 };
265 
266 /* Header lengths. */
267 #define VLAN_HLEN       4
268 #define VLAN_ETH_HLEN   18
269 
270 /* Per-device statistics struct */
271 struct device_statistics {
272 	uint64_t tx_total;
273 	rte_atomic64_t rx_total;
274 	uint64_t tx;
275 	rte_atomic64_t rx;
276 } __rte_cache_aligned;
277 struct device_statistics dev_statistics[MAX_DEVICES];
278 
279 /*
280  * Builds up the correct configuration for VMDQ VLAN pool map
281  * according to the pool & queue limits.
282  */
283 static inline int
284 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
285 {
286 	struct rte_eth_vmdq_rx_conf conf;
287 	unsigned i;
288 
289 	memset(&conf, 0, sizeof(conf));
290 	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
291 	conf.nb_pool_maps = num_devices;
292 
293 	for (i = 0; i < conf.nb_pool_maps; i++) {
294 		conf.pool_map[i].vlan_id = vlan_tags[ i ];
295 		conf.pool_map[i].pools = (1UL << i);
296 	}
297 
298 	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
299 	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
300 		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
301 	return 0;
302 }
303 
304 /*
305  * Validate the device number according to the max pool number gotten form dev_info
306  * If the device number is invalid, give the error message and return -1.
307  * Each device must have its own pool.
308  */
309 static inline int
310 validate_num_devices(uint32_t max_nb_devices)
311 {
312 	if (num_devices > max_nb_devices) {
313 		RTE_LOG(ERR, PORT, "invalid number of devices\n");
314 		return -1;
315 	}
316 	return 0;
317 }
318 
319 /*
320  * Initialises a given port using global settings and with the rx buffers
321  * coming from the mbuf_pool passed as parameter
322  */
323 static inline int
324 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
325 {
326 	struct rte_eth_dev_info dev_info;
327 	struct rte_eth_conf port_conf;
328 	uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count();
329 	const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT, tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
330 	int retval;
331 	uint16_t q;
332 
333 	/* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
334 	rte_eth_dev_info_get (port, &dev_info);
335 
336 	/*configure the number of supported virtio devices based on VMDQ limits */
337 	num_devices = dev_info.max_vmdq_pools;
338 	num_queues = dev_info.max_rx_queues;
339 
340 	retval = validate_num_devices(MAX_DEVICES);
341 	if (retval < 0)
342 		return retval;
343 
344 	/* Get port configuration. */
345 	retval = get_eth_conf(&port_conf, num_devices);
346 	if (retval < 0)
347 		return retval;
348 
349 	if (port >= rte_eth_dev_count()) return -1;
350 
351 	rx_rings = (uint16_t)num_queues,
352 	/* Configure ethernet device. */
353 	retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
354 	if (retval != 0)
355 		return retval;
356 
357 	/* Setup the queues. */
358 	for (q = 0; q < rx_rings; q ++) {
359 		retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
360 						rte_eth_dev_socket_id(port), &rx_conf_default,
361 						mbuf_pool);
362 		if (retval < 0)
363 			return retval;
364 	}
365 	for (q = 0; q < tx_rings; q ++) {
366 		retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
367 						rte_eth_dev_socket_id(port), &tx_conf_default);
368 		if (retval < 0)
369 			return retval;
370 	}
371 
372 	/* Start the device. */
373 	retval  = rte_eth_dev_start(port);
374 	if (retval < 0)
375 		return retval;
376 
377 	rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
378 	RTE_LOG(INFO, PORT, "Max virtio devices supported: %u\n", num_devices);
379 	RTE_LOG(INFO, PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
380 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
381 			(unsigned)port,
382 			vmdq_ports_eth_addr[port].addr_bytes[0],
383 			vmdq_ports_eth_addr[port].addr_bytes[1],
384 			vmdq_ports_eth_addr[port].addr_bytes[2],
385 			vmdq_ports_eth_addr[port].addr_bytes[3],
386 			vmdq_ports_eth_addr[port].addr_bytes[4],
387 			vmdq_ports_eth_addr[port].addr_bytes[5]);
388 
389 	return 0;
390 }
391 
392 /*
393  * Set character device basename.
394  */
395 static int
396 us_vhost_parse_basename(const char *q_arg)
397 {
398 	/* parse number string */
399 
400 	if (strnlen(q_arg, MAX_BASENAME_SZ) > MAX_BASENAME_SZ)
401 		return -1;
402 	else
403 		rte_snprintf((char*)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg);
404 
405 	return 0;
406 }
407 
408 /*
409  * Parse the portmask provided at run time.
410  */
411 static int
412 parse_portmask(const char *portmask)
413 {
414 	char *end = NULL;
415 	unsigned long pm;
416 
417 	errno = 0;
418 
419 	/* parse hexadecimal string */
420 	pm = strtoul(portmask, &end, 16);
421 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
422 		return -1;
423 
424 	if (pm == 0)
425 		return -1;
426 
427 	return pm;
428 
429 }
430 
431 /*
432  * Parse num options at run time.
433  */
434 static int
435 parse_num_opt(const char *q_arg, uint32_t max_valid_value)
436 {
437 	char *end = NULL;
438 	unsigned long num;
439 
440 	errno = 0;
441 
442 	/* parse unsigned int string */
443 	num = strtoul(q_arg, &end, 10);
444 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
445 		return -1;
446 
447 	if (num > max_valid_value)
448 		return -1;
449 
450 	return num;
451 
452 }
453 
454 /*
455  * Display usage
456  */
457 static void
458 us_vhost_usage(const char *prgname)
459 {
460 	RTE_LOG(INFO, CONFIG, "%s [EAL options] -- -p PORTMASK --vm2vm [0|1] --rx_retry [0|1] --mergeable [0|1] --stats [0-N] --dev-basename <name> --dev-index [0-N] --nb-devices ND\n"
461 	"		-p PORTMASK: Set mask for ports to be used by application\n"
462 	"		--vm2vm [0|1]: disable/enable(default) vm2vm comms\n"
463 	"		--rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n"
464 	"		--rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
465 	"		--rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
466 	"		--mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
467 	"		--stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
468 	"		--dev-basename: The basename to be used for the character device.\n"
469 	"		--dev-index [0-N]: Defaults to zero if not used. Index is appended to basename.\n",
470 	       prgname);
471 }
472 
473 /*
474  * Parse the arguments given in the command line of the application.
475  */
476 static int
477 us_vhost_parse_args(int argc, char **argv)
478 {
479 	int opt, ret;
480 	int option_index;
481 	unsigned i;
482 	const char *prgname = argv[0];
483 	static struct option long_option[] = {
484 		{"vm2vm", required_argument, NULL, 0},
485 		{"rx-retry", required_argument, NULL, 0},
486 		{"rx-retry-delay", required_argument, NULL, 0},
487 		{"rx-retry-num", required_argument, NULL, 0},
488 		{"mergeable", required_argument, NULL, 0},
489 		{"stats", required_argument, NULL, 0},
490 		{"dev-basename", required_argument, NULL, 0},
491 		{"dev-index", required_argument, NULL, 0},
492 		{NULL, 0, 0, 0}
493 	};
494 
495 	/* Parse command line */
496 	while ((opt = getopt_long(argc, argv, "p:",long_option, &option_index)) != EOF) {
497 		switch (opt) {
498 		/* Portmask */
499 		case 'p':
500 			enabled_port_mask = parse_portmask(optarg);
501 			if (enabled_port_mask == 0) {
502 				RTE_LOG(INFO, CONFIG, "Invalid portmask\n");
503 				us_vhost_usage(prgname);
504 				return -1;
505 			}
506 			break;
507 
508 		case 0:
509 			/* Enable/disable vm2vm comms. */
510 			if (!strncmp(long_option[option_index].name, "vm2vm", MAX_LONG_OPT_SZ)) {
511 				ret = parse_num_opt(optarg, 1);
512 				if (ret == -1) {
513 					RTE_LOG(INFO, CONFIG, "Invalid argument for vm2vm [0|1]\n");
514 					us_vhost_usage(prgname);
515 					return -1;
516 				} else {
517 					enable_vm2vm = ret;
518 				}
519 			}
520 
521 			/* Enable/disable retries on RX. */
522 			if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) {
523 				ret = parse_num_opt(optarg, 1);
524 				if (ret == -1) {
525 					RTE_LOG(INFO, CONFIG, "Invalid argument for rx-retry [0|1]\n");
526 					us_vhost_usage(prgname);
527 					return -1;
528 				} else {
529 					enable_retry = ret;
530 				}
531 			}
532 
533 			/* Specify the retries delay time (in useconds) on RX. */
534 			if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
535 				ret = parse_num_opt(optarg, INT32_MAX);
536 				if (ret == -1) {
537 					RTE_LOG(INFO, CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
538 					us_vhost_usage(prgname);
539 					return -1;
540 				} else {
541 					burst_rx_delay_time = ret;
542 				}
543 			}
544 
545 			/* Specify the retries number on RX. */
546 			if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) {
547 				ret = parse_num_opt(optarg, INT32_MAX);
548 				if (ret == -1) {
549 					RTE_LOG(INFO, CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
550 					us_vhost_usage(prgname);
551 					return -1;
552 				} else {
553 					burst_rx_retry_num = ret;
554 				}
555 			}
556 
557 			/* Enable/disable RX mergeable buffers. */
558 			if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) {
559 				ret = parse_num_opt(optarg, 1);
560 				if (ret == -1) {
561 					RTE_LOG(INFO, CONFIG, "Invalid argument for mergeable [0|1]\n");
562 					us_vhost_usage(prgname);
563 					return -1;
564 				} else {
565 					if (ret)
566 						VHOST_FEATURES = (1ULL << VIRTIO_NET_F_MRG_RXBUF);
567 				}
568 			}
569 
570 			/* Enable/disable stats. */
571 			if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) {
572 				ret = parse_num_opt(optarg, INT32_MAX);
573 				if (ret == -1) {
574 					RTE_LOG(INFO, CONFIG, "Invalid argument for stats [0..N]\n");
575 					us_vhost_usage(prgname);
576 					return -1;
577 				} else {
578 					enable_stats = ret;
579 				}
580 			}
581 
582 			/* Set character device basename. */
583 			if (!strncmp(long_option[option_index].name, "dev-basename", MAX_LONG_OPT_SZ)) {
584 				if (us_vhost_parse_basename(optarg) == -1) {
585 					RTE_LOG(INFO, CONFIG, "Invalid argument for character device basename (Max %d characters)\n", MAX_BASENAME_SZ);
586 					us_vhost_usage(prgname);
587 					return -1;
588 				}
589 			}
590 
591 			/* Set character device index. */
592 			if (!strncmp(long_option[option_index].name, "dev-index", MAX_LONG_OPT_SZ)) {
593 				ret = parse_num_opt(optarg, INT32_MAX);
594 				if (ret == -1) {
595 					RTE_LOG(INFO, CONFIG, "Invalid argument for character device index [0..N]\n");
596 					us_vhost_usage(prgname);
597 					return -1;
598 				} else {
599 					dev_index = ret;
600 				}
601 			}
602 
603 			break;
604 
605 			/* Invalid option - print options. */
606 		default:
607 			us_vhost_usage(prgname);
608 			return -1;
609 		}
610 	}
611 
612 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
613 		if (enabled_port_mask & (1 << i))
614 			ports[num_ports++] = (uint8_t)i;
615 	}
616 
617 	if ((num_ports ==  0) || (num_ports > MAX_SUP_PORTS)) {
618 		RTE_LOG(INFO, PORT, "Current enabled port number is %u,"
619 			"but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
620 		return -1;
621 	}
622 
623 	return 0;
624 }
625 
626 /*
627  * Update the global var NUM_PORTS and array PORTS according to system ports number
628  * and return valid ports number
629  */
630 static unsigned check_ports_num(unsigned nb_ports)
631 {
632 	unsigned valid_num_ports = num_ports;
633 	unsigned portid;
634 
635 	if (num_ports > nb_ports) {
636 		RTE_LOG(INFO, PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
637 			num_ports, nb_ports);
638 		num_ports = nb_ports;
639 	}
640 
641 	for (portid = 0; portid < num_ports; portid ++) {
642 		if (ports[portid] >= nb_ports) {
643 			RTE_LOG(INFO, PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
644 				ports[portid], (nb_ports - 1));
645 			ports[portid] = INVALID_PORT_ID;
646 			valid_num_ports--;
647 		}
648 	}
649 	return valid_num_ports;
650 }
651 
652 /*
653  * Macro to print out packet contents. Wrapped in debug define so that the
654  * data path is not effected when debug is disabled.
655  */
656 #ifdef DEBUG
657 #define PRINT_PACKET(device, addr, size, header) do {																\
658 	char *pkt_addr = (char*)(addr);																					\
659 	unsigned int index;																								\
660 	char packet[MAX_PRINT_BUFF];																					\
661 																													\
662 	if ((header))																									\
663 		rte_snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size));				\
664 	else																											\
665 		rte_snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size));				\
666 	for (index = 0; index < (size); index++) {																		\
667 		rte_snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF),	\
668 			"%02hhx ", pkt_addr[index]);																			\
669 	}																												\
670 	rte_snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), "\n");	\
671 																													\
672 	LOG_DEBUG(DATA, "%s", packet);																					\
673 } while(0)
674 #else
675 #define PRINT_PACKET(device, addr, size, header) do{} while(0)
676 #endif
677 
678 /*
679  * Function to convert guest physical addresses to vhost virtual addresses. This
680  * is used to convert virtio buffer addresses.
681  */
682 static inline uint64_t __attribute__((always_inline))
683 gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
684 {
685 	struct virtio_memory_regions *region;
686 	uint32_t regionidx;
687 	uint64_t vhost_va = 0;
688 
689 	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
690 		region = &dev->mem->regions[regionidx];
691 		if ((guest_pa >= region->guest_phys_address) &&
692 			(guest_pa <= region->guest_phys_address_end)) {
693 			vhost_va = region->address_offset + guest_pa;
694 			break;
695 		}
696 	}
697 	LOG_DEBUG(DATA, "(%"PRIu64") GPA %p| VVA %p\n",
698 		dev->device_fh, (void*)(uintptr_t)guest_pa, (void*)(uintptr_t)vhost_va);
699 
700 	return vhost_va;
701 }
702 
703 /*
704  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
705  * be received from the physical port or from another virtio device. A packet
706  * count is returned to indicate the number of packets that were succesfully
707  * added to the RX queue.
708  */
709 static inline uint32_t __attribute__((always_inline))
710 virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count)
711 {
712 	struct vhost_virtqueue *vq;
713 	struct vring_desc *desc;
714 	struct rte_mbuf *buff;
715 	/* The virtio_hdr is initialised to 0. */
716 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0};
717 	uint64_t buff_addr = 0;
718 	uint64_t buff_hdr_addr = 0;
719 	uint32_t head[MAX_PKT_BURST], packet_len = 0;
720 	uint32_t head_idx, packet_success = 0;
721 	uint32_t mergeable, mrg_count = 0;
722 	uint32_t retry = 0;
723 	uint16_t avail_idx, res_cur_idx;
724 	uint16_t res_base_idx, res_end_idx;
725 	uint16_t free_entries;
726 	uint8_t success = 0;
727 
728 	LOG_DEBUG(DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
729 	vq = dev->virtqueue[VIRTIO_RXQ];
730 	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
731 	/* As many data cores may want access to available buffers, they need to be reserved. */
732 	do {
733 		res_base_idx = vq->last_used_idx_res;
734 		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
735 
736 		free_entries = (avail_idx - res_base_idx);
737 		/* If retry is enabled and the queue is full then we wait and retry to avoid packet loss. */
738 		if (enable_retry && unlikely(count > free_entries)) {
739 			for (retry = 0; retry < burst_rx_retry_num; retry++) {
740 				rte_delay_us(burst_rx_delay_time);
741 				avail_idx =
742 					*((volatile uint16_t *)&vq->avail->idx);
743 				free_entries = (avail_idx - res_base_idx);
744 				if (count <= free_entries)
745 					break;
746 			}
747 		}
748 
749 		/*check that we have enough buffers*/
750 		if (unlikely(count > free_entries))
751 			count = free_entries;
752 
753 		if (count == 0)
754 			return 0;
755 
756 		res_end_idx = res_base_idx + count;
757 		/* vq->last_used_idx_res is atomically updated. */
758 		success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx,
759 									res_end_idx);
760 	} while (unlikely(success == 0));
761 	res_cur_idx = res_base_idx;
762 	LOG_DEBUG(DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx);
763 
764 	/* Prefetch available ring to retrieve indexes. */
765 	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
766 
767 	/* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
768 	mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
769 
770 	/* Retrieve all of the head indexes first to avoid caching issues. */
771 	for (head_idx = 0; head_idx < count; head_idx++)
772 		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
773 
774 	/*Prefetch descriptor index. */
775 	rte_prefetch0(&vq->desc[head[packet_success]]);
776 
777 	while (res_cur_idx != res_end_idx) {
778 		/* Get descriptor from available ring */
779 		desc = &vq->desc[head[packet_success]];
780 
781 		buff = pkts[packet_success];
782 
783 		/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
784 		buff_addr = gpa_to_vva(dev, desc->addr);
785 		/* Prefetch buffer address. */
786 		rte_prefetch0((void*)(uintptr_t)buff_addr);
787 
788 		if (mergeable && (mrg_count != 0)) {
789 			desc->len = packet_len = rte_pktmbuf_data_len(buff);
790 		} else {
791 			/* Copy virtio_hdr to packet and increment buffer address */
792 			buff_hdr_addr = buff_addr;
793 			packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
794 
795 			/*
796 			 * If the descriptors are chained the header and data are placed in
797 			 * separate buffers.
798 			 */
799 			if (desc->flags & VRING_DESC_F_NEXT) {
800 				desc->len = vq->vhost_hlen;
801 				desc = &vq->desc[desc->next];
802 				/* Buffer address translation. */
803 				buff_addr = gpa_to_vva(dev, desc->addr);
804 				desc->len = rte_pktmbuf_data_len(buff);
805 			} else {
806 				buff_addr += vq->vhost_hlen;
807 				desc->len = packet_len;
808 			}
809 		}
810 
811 		PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);
812 
813 		/* Update used ring with desc information */
814 		vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
815 		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
816 
817 		/* Copy mbuf data to buffer */
818 		rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->pkt.data, rte_pktmbuf_data_len(buff));
819 
820 		res_cur_idx++;
821 		packet_success++;
822 
823 		/* If mergeable is disabled then a header is required per buffer. */
824 		if (!mergeable) {
825 			rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen);
826 			PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
827 		} else {
828 			mrg_count++;
829 			/* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */
830 			if ((mrg_count == MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {
831 				virtio_hdr.num_buffers = mrg_count;
832 				LOG_DEBUG(DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
833 				rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen);
834 				PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
835 				mrg_count = 0;
836 			}
837 		}
838 		if (res_cur_idx < res_end_idx) {
839 			/* Prefetch descriptor index. */
840 			rte_prefetch0(&vq->desc[head[packet_success]]);
841 		}
842 	}
843 
844 	rte_compiler_barrier();
845 
846 	/* Wait until it's our turn to add our buffer to the used ring. */
847 	while (unlikely(vq->last_used_idx != res_base_idx))
848 		rte_pause();
849 
850 	*(volatile uint16_t *)&vq->used->idx += count;
851 	vq->last_used_idx = res_end_idx;
852 
853 	/* Kick the guest if necessary. */
854 	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
855 		eventfd_write((int)vq->kickfd, 1);
856 	return count;
857 }
858 
859 /*
860  * Compares a packet destination MAC address to a device MAC address.
861  */
862 static inline int __attribute__((always_inline))
863 ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb)
864 {
865 	return (((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0);
866 }
867 
868 /*
869  * This function learns the MAC address of the device and registers this along with a
870  * vlan tag to a VMDQ.
871  */
872 static int
873 link_vmdq(struct virtio_net *dev, struct rte_mbuf *m)
874 {
875 	struct ether_hdr *pkt_hdr;
876 	struct virtio_net_data_ll *dev_ll;
877 	int i, ret;
878 
879 	/* Learn MAC address of guest device from packet */
880 	pkt_hdr = (struct ether_hdr *)m->pkt.data;
881 
882 	dev_ll = ll_root_used;
883 
884 	while (dev_ll != NULL) {
885 		if (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->dev->mac_address)) {
886 			RTE_LOG(INFO, DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh);
887 			return -1;
888 		}
889 		dev_ll = dev_ll->next;
890 	}
891 
892 	for (i = 0; i < ETHER_ADDR_LEN; i++)
893 		dev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
894 
895 	/* vlan_tag currently uses the device_id. */
896 	dev->vlan_tag = vlan_tags[dev->device_fh];
897 	dev->vmdq_rx_q = dev->device_fh * (num_queues/num_devices);
898 
899 	/* Print out VMDQ registration info. */
900 	RTE_LOG(INFO, DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n",
901 		dev->device_fh,
902 		dev->mac_address.addr_bytes[0], dev->mac_address.addr_bytes[1],
903 		dev->mac_address.addr_bytes[2], dev->mac_address.addr_bytes[3],
904 		dev->mac_address.addr_bytes[4], dev->mac_address.addr_bytes[5],
905 		dev->vlan_tag);
906 
907 	/* Register the MAC address. */
908 	ret = rte_eth_dev_mac_addr_add(ports[0], &dev->mac_address, (uint32_t)dev->device_fh);
909 	if (ret)
910 		RTE_LOG(ERR, DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n",
911 					dev->device_fh);
912 
913 	/* Enable stripping of the vlan tag as we handle routing. */
914 	rte_eth_dev_set_vlan_strip_on_queue(ports[0], (uint16_t)dev->vmdq_rx_q, 1);
915 
916 	/* Set device as ready for RX. */
917 	dev->ready = DEVICE_RX;
918 
919 	return 0;
920 }
921 
922 /*
923  * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
924  * queue before disabling RX on the device.
925  */
926 static inline void
927 unlink_vmdq(struct virtio_net *dev)
928 {
929 	unsigned i = 0;
930 	unsigned rx_count;
931 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
932 
933 	if (dev->ready == DEVICE_RX) {
934 		/*clear MAC and VLAN settings*/
935 		rte_eth_dev_mac_addr_remove(ports[0], &dev->mac_address);
936 		for (i = 0; i < 6; i++)
937 			dev->mac_address.addr_bytes[i] = 0;
938 
939 		dev->vlan_tag = 0;
940 
941 		/*Clear out the receive buffers*/
942 		rx_count = rte_eth_rx_burst(ports[0],
943 					(uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
944 
945 		while (rx_count) {
946 			for (i = 0; i < rx_count; i++)
947 				rte_pktmbuf_free(pkts_burst[i]);
948 
949 			rx_count = rte_eth_rx_burst(ports[0],
950 					(uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
951 		}
952 
953 		dev->ready = DEVICE_MAC_LEARNING;
954 	}
955 }
956 
957 /*
958  * Check if the packet destination MAC address is for a local device. If so then put
959  * the packet on that devices RX queue. If not then return.
960  */
961 static inline unsigned __attribute__((always_inline))
962 virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)
963 {
964 	struct virtio_net_data_ll *dev_ll;
965 	struct ether_hdr *pkt_hdr;
966 	uint64_t ret = 0;
967 
968 	pkt_hdr = (struct ether_hdr *)m->pkt.data;
969 
970 	/*get the used devices list*/
971 	dev_ll = ll_root_used;
972 
973 	while (dev_ll != NULL) {
974 		if ((dev_ll->dev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr),
975 				          &dev_ll->dev->mac_address)) {
976 
977 			/* Drop the packet if the TX packet is destined for the TX device. */
978 			if (dev_ll->dev->device_fh == dev->device_fh) {
979 				LOG_DEBUG(DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n",
980 							dev_ll->dev->device_fh);
981 				return 0;
982 			}
983 
984 
985 			LOG_DEBUG(DATA, "(%"PRIu64") TX: MAC address is local\n", dev_ll->dev->device_fh);
986 
987 			if (dev_ll->dev->remove) {
988 				/*drop the packet if the device is marked for removal*/
989 				LOG_DEBUG(DATA, "(%"PRIu64") Device is marked for removal\n", dev_ll->dev->device_fh);
990 			} else {
991 				/*send the packet to the local virtio device*/
992 				ret = virtio_dev_rx(dev_ll->dev, &m, 1);
993 				if (enable_stats) {
994 					rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx_total, 1);
995 					rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx, ret);
996 					dev_statistics[dev->device_fh].tx_total++;
997 					dev_statistics[dev->device_fh].tx += ret;
998 				}
999 			}
1000 
1001 			return 0;
1002 		}
1003 		dev_ll = dev_ll->next;
1004 	}
1005 
1006 	return -1;
1007 }
1008 
1009 /*
1010  * This function routes the TX packet to the correct interface. This may be a local device
1011  * or the physical port.
1012  */
1013 static inline void __attribute__((always_inline))
1014 virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag)
1015 {
1016 	struct mbuf_table *tx_q;
1017 	struct vlan_ethhdr *vlan_hdr;
1018 	struct rte_mbuf **m_table;
1019 	struct rte_mbuf *mbuf;
1020 	unsigned len, ret;
1021 	const uint16_t lcore_id = rte_lcore_id();
1022 
1023 	/*check if destination is local VM*/
1024 	if (enable_vm2vm && (virtio_tx_local(dev, m) == 0)) {
1025 		return;
1026 	}
1027 
1028 	LOG_DEBUG(DATA, "(%"PRIu64") TX: MAC address is external\n", dev->device_fh);
1029 
1030 	/*Add packet to the port tx queue*/
1031 	tx_q = &lcore_tx_queue[lcore_id];
1032 	len = tx_q->len;
1033 
1034 	/* Allocate an mbuf and populate the structure. */
1035 	mbuf = rte_pktmbuf_alloc(mbuf_pool);
1036 	if (unlikely(mbuf == NULL)) {
1037 		RTE_LOG(ERR, DATA, "Failed to allocate memory for mbuf.\n");
1038 		return;
1039 	}
1040 
1041 	mbuf->pkt.data_len = m->pkt.data_len + VLAN_HLEN;
1042 	mbuf->pkt.pkt_len = mbuf->pkt.data_len;
1043 
1044 	/* Copy ethernet header to mbuf. */
1045 	rte_memcpy((void*)mbuf->pkt.data, (const void*)m->pkt.data, ETH_HLEN);
1046 
1047 
1048 	/* Setup vlan header. Bytes need to be re-ordered for network with htons()*/
1049 	vlan_hdr = (struct vlan_ethhdr *) mbuf->pkt.data;
1050 	vlan_hdr->h_vlan_encapsulated_proto = vlan_hdr->h_vlan_proto;
1051 	vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
1052 	vlan_hdr->h_vlan_TCI = htons(vlan_tag);
1053 
1054 	/* Copy the remaining packet contents to the mbuf. */
1055 	rte_memcpy((void*) ((uint8_t*)mbuf->pkt.data + VLAN_ETH_HLEN),
1056 		(const void*) ((uint8_t*)m->pkt.data + ETH_HLEN), (m->pkt.data_len - ETH_HLEN));
1057 	tx_q->m_table[len] = mbuf;
1058 	len++;
1059 	if (enable_stats) {
1060 		dev_statistics[dev->device_fh].tx_total++;
1061 		dev_statistics[dev->device_fh].tx++;
1062 	}
1063 
1064 	if (unlikely(len == MAX_PKT_BURST)) {
1065 		m_table = (struct rte_mbuf **)tx_q->m_table;
1066 		ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
1067 		/* Free any buffers not handled by TX and update the port stats. */
1068 		if (unlikely(ret < len)) {
1069 			do {
1070 				rte_pktmbuf_free(m_table[ret]);
1071 			} while (++ret < len);
1072 		}
1073 
1074 		len = 0;
1075 	}
1076 
1077 	tx_q->len = len;
1078 	return;
1079 }
1080 
1081 static inline void __attribute__((always_inline))
1082 virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool)
1083 {
1084 	struct rte_mbuf m;
1085 	struct vhost_virtqueue *vq;
1086 	struct vring_desc *desc;
1087 	uint64_t buff_addr = 0;
1088 	uint32_t head[MAX_PKT_BURST];
1089 	uint32_t used_idx;
1090 	uint32_t i;
1091 	uint16_t free_entries, packet_success = 0;
1092 	uint16_t avail_idx;
1093 
1094 	vq = dev->virtqueue[VIRTIO_TXQ];
1095 	avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
1096 
1097 	/* If there are no available buffers then return. */
1098 	if (vq->last_used_idx == avail_idx)
1099 		return;
1100 
1101 	LOG_DEBUG(DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh);
1102 
1103 	/* Prefetch available ring to retrieve head indexes. */
1104 	rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
1105 
1106 	/*get the number of free entries in the ring*/
1107 	free_entries = (avail_idx - vq->last_used_idx);
1108 
1109 	/* Limit to MAX_PKT_BURST. */
1110 	if (free_entries > MAX_PKT_BURST)
1111 		free_entries = MAX_PKT_BURST;
1112 
1113 	LOG_DEBUG(DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
1114 	/* Retrieve all of the head indexes first to avoid caching issues. */
1115 	for (i = 0; i < free_entries; i++)
1116 		head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
1117 
1118 	/* Prefetch descriptor index. */
1119 	rte_prefetch0(&vq->desc[head[packet_success]]);
1120 	rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
1121 
1122 	while (packet_success < free_entries) {
1123 		desc = &vq->desc[head[packet_success]];
1124 
1125 		/* Discard first buffer as it is the virtio header */
1126 		desc = &vq->desc[desc->next];
1127 
1128 		/* Buffer address translation. */
1129 		buff_addr = gpa_to_vva(dev, desc->addr);
1130 		/* Prefetch buffer address. */
1131 		rte_prefetch0((void*)(uintptr_t)buff_addr);
1132 
1133 		used_idx = vq->last_used_idx & (vq->size - 1);
1134 
1135 		if (packet_success < (free_entries - 1)) {
1136 			/* Prefetch descriptor index. */
1137 			rte_prefetch0(&vq->desc[head[packet_success+1]]);
1138 			rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
1139 		}
1140 
1141 		/* Update used index buffer information. */
1142 		vq->used->ring[used_idx].id = head[packet_success];
1143 		vq->used->ring[used_idx].len = 0;
1144 
1145 		/* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */
1146 		m.pkt.data_len = desc->len;
1147 		m.pkt.data = (void*)(uintptr_t)buff_addr;
1148 
1149 		PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
1150 
1151 		/* If this is the first received packet we need to learn the MAC and setup VMDQ */
1152 		if (dev->ready == DEVICE_MAC_LEARNING) {
1153 			if (dev->remove || (link_vmdq(dev, &m) == -1)) {
1154 				/*discard frame if device is scheduled for removal or a duplicate MAC address is found. */
1155 				packet_success += free_entries;
1156 				vq->last_used_idx += packet_success;
1157 				break;
1158 			}
1159 		}
1160 		virtio_tx_route(dev, &m, mbuf_pool, (uint16_t)dev->device_fh);
1161 
1162 		vq->last_used_idx++;
1163 		packet_success++;
1164 	}
1165 
1166 	rte_compiler_barrier();
1167 	vq->used->idx += packet_success;
1168 	/* Kick guest if required. */
1169 	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
1170 		eventfd_write((int)vq->kickfd, 1);
1171 }
1172 
1173 /*
1174  * This function is called by each data core. It handles all RX/TX registered with the
1175  * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
1176  * with all devices in the main linked list.
1177  */
1178 static int
1179 switch_worker(__attribute__((unused)) void *arg)
1180 {
1181 	struct rte_mempool *mbuf_pool = arg;
1182 	struct virtio_net *dev = NULL;
1183 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1184 	struct virtio_net_data_ll *dev_ll;
1185 	struct mbuf_table *tx_q;
1186 	volatile struct lcore_ll_info *lcore_ll;
1187 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
1188 	uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
1189 	unsigned ret, i;
1190 	const uint16_t lcore_id = rte_lcore_id();
1191 	const uint16_t num_cores = (uint16_t)rte_lcore_count();
1192 	uint16_t rx_count = 0;
1193 
1194 	RTE_LOG(INFO, DATA, "Procesing on Core %u started \n", lcore_id);
1195 	lcore_ll = lcore_info[lcore_id].lcore_ll;
1196 	prev_tsc = 0;
1197 
1198 	tx_q = &lcore_tx_queue[lcore_id];
1199 	for (i = 0; i < num_cores; i ++) {
1200 		if (lcore_ids[i] == lcore_id) {
1201 			tx_q->txq_id = i;
1202 			break;
1203 		}
1204 	}
1205 
1206 	while(1) {
1207 		cur_tsc = rte_rdtsc();
1208 		/*
1209 		 * TX burst queue drain
1210 		 */
1211 		diff_tsc = cur_tsc - prev_tsc;
1212 		if (unlikely(diff_tsc > drain_tsc)) {
1213 
1214 			if (tx_q->len) {
1215 				LOG_DEBUG(DATA, "TX queue drained after timeout with burst size %u \n", tx_q->len);
1216 
1217 				/*Tx any packets in the queue*/
1218 				ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
1219 									   (struct rte_mbuf **)tx_q->m_table,
1220 									   (uint16_t)tx_q->len);
1221 				if (unlikely(ret < tx_q->len)) {
1222 					do {
1223 						rte_pktmbuf_free(tx_q->m_table[ret]);
1224 					} while (++ret < tx_q->len);
1225 				}
1226 
1227 				tx_q->len = 0;
1228 			}
1229 
1230 			prev_tsc = cur_tsc;
1231 
1232 		}
1233 
1234 		rte_prefetch0(lcore_ll->ll_root_used);
1235 		/*
1236 		 * Inform the configuration core that we have exited the linked list and that no devices are
1237 		 * in use if requested.
1238 		 */
1239 		if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)
1240 			lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
1241 
1242 		/*
1243 		 * Process devices
1244 		 */
1245 		dev_ll = lcore_ll->ll_root_used;
1246 
1247 		while (dev_ll != NULL) {
1248 			/*get virtio device ID*/
1249 			dev = dev_ll->dev;
1250 
1251 			if (dev->remove) {
1252 				dev_ll = dev_ll->next;
1253 				unlink_vmdq(dev);
1254 				dev->ready = DEVICE_SAFE_REMOVE;
1255 				continue;
1256 			}
1257 			if (likely(dev->ready == DEVICE_RX)) {
1258 				/*Handle guest RX*/
1259 				rx_count = rte_eth_rx_burst(ports[0],
1260 					(uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
1261 
1262 				if (rx_count) {
1263 					ret_count = virtio_dev_rx(dev, pkts_burst, rx_count);
1264 					if (enable_stats) {
1265 						rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx_total, rx_count);
1266 						rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx, ret_count);
1267 					}
1268 					while (likely(rx_count)) {
1269 						rx_count--;
1270 						rte_pktmbuf_free_seg(pkts_burst[rx_count]);
1271 					}
1272 
1273 				}
1274 			}
1275 
1276 			if (!dev->remove)
1277 				/*Handle guest TX*/
1278 				virtio_dev_tx(dev, mbuf_pool);
1279 
1280 			/*move to the next device in the list*/
1281 			dev_ll = dev_ll->next;
1282 		}
1283 	}
1284 
1285 	return 0;
1286 }
1287 
1288 /*
1289  * Add an entry to a used linked list. A free entry must first be found in the free linked list
1290  * using get_data_ll_free_entry();
1291  */
1292 static void
1293 add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev)
1294 {
1295 	struct virtio_net_data_ll *ll = *ll_root_addr;
1296 
1297 	/* Set next as NULL and use a compiler barrier to avoid reordering. */
1298 	ll_dev->next = NULL;
1299 	rte_compiler_barrier();
1300 
1301 	/* If ll == NULL then this is the first device. */
1302 	if (ll) {
1303 		/* Increment to the tail of the linked list. */
1304 		while ((ll->next != NULL) )
1305 			ll = ll->next;
1306 
1307 		ll->next = ll_dev;
1308 	} else {
1309 		*ll_root_addr = ll_dev;
1310 	}
1311 }
1312 
1313 /*
1314  * Remove an entry from a used linked list. The entry must then be added to the free linked list
1315  * using put_data_ll_free_entry().
1316  */
1317 static void
1318 rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev, struct virtio_net_data_ll *ll_dev_last)
1319 {
1320 	struct virtio_net_data_ll *ll = *ll_root_addr;
1321 
1322 	if (unlikely((ll == NULL) || (ll_dev == NULL)))
1323 		return;
1324 
1325 	if (ll_dev == ll)
1326 		*ll_root_addr = ll_dev->next;
1327 	else
1328 		if (likely(ll_dev_last != NULL))
1329 			ll_dev_last->next = ll_dev->next;
1330 		else
1331 			RTE_LOG(ERR, CONFIG, "Remove entry form ll failed.\n");
1332 }
1333 
1334 /*
1335  * Find and return an entry from the free linked list.
1336  */
1337 static struct virtio_net_data_ll *
1338 get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr)
1339 {
1340 	struct virtio_net_data_ll *ll_free = *ll_root_addr;
1341 	struct virtio_net_data_ll *ll_dev;
1342 
1343 	if (ll_free == NULL)
1344 		return NULL;
1345 
1346 	ll_dev = ll_free;
1347 	*ll_root_addr = ll_free->next;
1348 
1349 	return ll_dev;
1350 }
1351 
1352 /*
1353  * Place an entry back on to the free linked list.
1354  */
1355 static void
1356 put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev)
1357 {
1358 	struct virtio_net_data_ll *ll_free = *ll_root_addr;
1359 
1360 	if (ll_dev == NULL)
1361 		return;
1362 
1363 	ll_dev->next = ll_free;
1364 	*ll_root_addr = ll_dev;
1365 }
1366 
1367 /*
1368  * Creates a linked list of a given size.
1369  */
1370 static struct virtio_net_data_ll *
1371 alloc_data_ll(uint32_t size)
1372 {
1373 	struct virtio_net_data_ll *ll_new;
1374 	uint32_t i;
1375 
1376 	/* Malloc and then chain the linked list. */
1377 	ll_new = malloc(size * sizeof(struct virtio_net_data_ll));
1378 	if (ll_new == NULL) {
1379 		RTE_LOG(ERR, CONFIG, "Failed to allocate memory for ll_new.\n");
1380 		return NULL;
1381 	}
1382 
1383 	for (i = 0; i < size - 1; i++) {
1384 		ll_new[i].dev = NULL;
1385 		ll_new[i].next = &ll_new[i+1];
1386 	}
1387 	ll_new[i].next = NULL;
1388 
1389 	return (ll_new);
1390 }
1391 
1392 /*
1393  * Create the main linked list along with each individual cores linked list. A used and a free list
1394  * are created to manage entries.
1395  */
1396 static int
1397 init_data_ll (void)
1398 {
1399 	int lcore;
1400 
1401 	RTE_LCORE_FOREACH_SLAVE(lcore) {
1402 		lcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info));
1403 		if (lcore_info[lcore].lcore_ll == NULL) {
1404 			RTE_LOG(ERR, CONFIG, "Failed to allocate memory for lcore_ll.\n");
1405 			return -1;
1406 		}
1407 
1408 		lcore_info[lcore].lcore_ll->device_num = 0;
1409 		lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
1410 		lcore_info[lcore].lcore_ll->ll_root_used = NULL;
1411 		if (num_devices % num_switching_cores)
1412 			lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1);
1413 		else
1414 			lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores);
1415 	}
1416 
1417 	/* Allocate devices up to a maximum of MAX_DEVICES. */
1418 	ll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES));
1419 
1420 	return 0;
1421 }
1422 
1423 /*
1424  * Set virtqueue flags so that we do not receive interrupts.
1425  */
1426 static void
1427 set_irq_status (struct virtio_net *dev)
1428 {
1429 	dev->virtqueue[VIRTIO_RXQ]->used->flags = VRING_USED_F_NO_NOTIFY;
1430 	dev->virtqueue[VIRTIO_TXQ]->used->flags = VRING_USED_F_NO_NOTIFY;
1431 }
1432 
1433 /*
1434  * Remove a device from the specific data core linked list and from the main linked list. Synchonization
1435  * occurs through the use of the lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
1436  * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
1437  */
1438 static void
1439 destroy_device (volatile struct virtio_net *dev)
1440 {
1441 	struct virtio_net_data_ll *ll_lcore_dev_cur;
1442 	struct virtio_net_data_ll *ll_main_dev_cur;
1443 	struct virtio_net_data_ll *ll_lcore_dev_last = NULL;
1444 	struct virtio_net_data_ll *ll_main_dev_last = NULL;
1445 	int lcore;
1446 
1447 	dev->flags &= ~VIRTIO_DEV_RUNNING;
1448 
1449 	/*set the remove flag. */
1450 	dev->remove = 1;
1451 
1452 	while(dev->ready != DEVICE_SAFE_REMOVE) {
1453 		rte_pause();
1454 	}
1455 
1456 	/* Search for entry to be removed from lcore ll */
1457 	ll_lcore_dev_cur = lcore_info[dev->coreid].lcore_ll->ll_root_used;
1458 	while (ll_lcore_dev_cur != NULL) {
1459 		if (ll_lcore_dev_cur->dev == dev) {
1460 			break;
1461 		} else {
1462 			ll_lcore_dev_last = ll_lcore_dev_cur;
1463 			ll_lcore_dev_cur = ll_lcore_dev_cur->next;
1464 		}
1465 	}
1466 
1467 	if (ll_lcore_dev_cur == NULL) {
1468 		RTE_LOG(ERR, CONFIG, "Failed to find the dev to be destroy.\n");
1469 		return;
1470 	}
1471 
1472 	/* Search for entry to be removed from main ll */
1473 	ll_main_dev_cur = ll_root_used;
1474 	ll_main_dev_last = NULL;
1475 	while (ll_main_dev_cur != NULL) {
1476 		if (ll_main_dev_cur->dev == dev) {
1477 			break;
1478 		} else {
1479 			ll_main_dev_last = ll_main_dev_cur;
1480 			ll_main_dev_cur = ll_main_dev_cur->next;
1481 		}
1482 	}
1483 
1484 	/* Remove entries from the lcore and main ll. */
1485 	rm_data_ll_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
1486 	rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
1487 
1488 	/* Set the dev_removal_flag on each lcore. */
1489 	RTE_LCORE_FOREACH_SLAVE(lcore) {
1490 		lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL;
1491 	}
1492 
1493 	/*
1494 	 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that
1495 	 * they can no longer access the device removed from the linked lists and that the devices
1496 	 * are no longer in use.
1497 	 */
1498 	RTE_LCORE_FOREACH_SLAVE(lcore) {
1499 		while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) {
1500 			rte_pause();
1501 		}
1502 	}
1503 
1504 	/* Add the entries back to the lcore and main free ll.*/
1505 	put_data_ll_free_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);
1506 	put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
1507 
1508 	/* Decrement number of device on the lcore. */
1509 	lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->device_num--;
1510 
1511 	RTE_LOG(INFO, DATA, "(%"PRIu64") Device has been removed from data core\n", dev->device_fh);
1512 }
1513 
1514 /*
1515  * A new device is added to a data core. First the device is added to the main linked list
1516  * and the allocated to a specific data core.
1517  */
1518 static int
1519 new_device (struct virtio_net *dev)
1520 {
1521 	struct virtio_net_data_ll *ll_dev;
1522 	int lcore, core_add = 0;
1523 	uint32_t device_num_min = num_devices;
1524 
1525 	/* Add device to main ll */
1526 	ll_dev = get_data_ll_free_entry(&ll_root_free);
1527 	if (ll_dev == NULL) {
1528 		RTE_LOG(INFO, DATA, "(%"PRIu64") No free entry found in linked list. Device limit "
1529 			"of %d devices per core has been reached\n",
1530 			dev->device_fh, num_devices);
1531 		return -1;
1532 	}
1533 	ll_dev->dev = dev;
1534 	add_data_ll_entry(&ll_root_used, ll_dev);
1535 
1536 	/*reset ready flag*/
1537 	dev->ready = DEVICE_MAC_LEARNING;
1538 	dev->remove = 0;
1539 
1540 	/* Find a suitable lcore to add the device. */
1541 	RTE_LCORE_FOREACH_SLAVE(lcore) {
1542 		if (lcore_info[lcore].lcore_ll->device_num < device_num_min) {
1543 			device_num_min = lcore_info[lcore].lcore_ll->device_num;
1544 			core_add = lcore;
1545 		}
1546 	}
1547 	/* Add device to lcore ll */
1548 	ll_dev->dev->coreid = core_add;
1549 	ll_dev = get_data_ll_free_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_free);
1550 	if (ll_dev == NULL) {
1551 		RTE_LOG(INFO, DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh);
1552 		destroy_device(dev);
1553 		return -1;
1554 	}
1555 	ll_dev->dev = dev;
1556 	add_data_ll_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_used, ll_dev);
1557 
1558 	/* Initialize device stats */
1559 	memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics));
1560 
1561 	/* Disable notifications. */
1562 	set_irq_status(dev);
1563 	lcore_info[ll_dev->dev->coreid].lcore_ll->device_num++;
1564 	dev->flags |= VIRTIO_DEV_RUNNING;
1565 
1566 	RTE_LOG(INFO, DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, dev->coreid);
1567 
1568 	return 0;
1569 }
1570 
1571 /*
1572  * These callback allow devices to be added to the data core when configuration
1573  * has been fully complete.
1574  */
1575 static const struct virtio_net_device_ops virtio_net_device_ops =
1576 {
1577 	.new_device =  new_device,
1578 	.destroy_device = destroy_device,
1579 };
1580 
1581 /*
1582  * This is a thread will wake up after a period to print stats if the user has
1583  * enabled them.
1584  */
1585 static void
1586 print_stats(void)
1587 {
1588 	struct virtio_net_data_ll *dev_ll;
1589 	uint64_t tx_dropped, rx_dropped;
1590 	uint64_t tx, tx_total, rx, rx_total;
1591 	uint32_t device_fh;
1592 	const char clr[] = { 27, '[', '2', 'J', '\0' };
1593 	const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
1594 
1595 	while(1) {
1596 		sleep(enable_stats);
1597 
1598 		/* Clear screen and move to top left */
1599 		printf("%s%s", clr, top_left);
1600 
1601 		printf("\nDevice statistics ====================================");
1602 
1603 		dev_ll = ll_root_used;
1604 		while (dev_ll != NULL) {
1605 			device_fh = (uint32_t)dev_ll->dev->device_fh;
1606 			tx_total = dev_statistics[device_fh].tx_total;
1607 			tx = dev_statistics[device_fh].tx;
1608 			tx_dropped = tx_total - tx;
1609 			rx_total = rte_atomic64_read(&dev_statistics[device_fh].rx_total);
1610 			rx = rte_atomic64_read(&dev_statistics[device_fh].rx);
1611 			rx_dropped = rx_total - rx;
1612 
1613 			printf("\nStatistics for device %"PRIu32" ------------------------------"
1614 					"\nTX total: 		%"PRIu64""
1615 					"\nTX dropped: 		%"PRIu64""
1616 					"\nTX successful: 		%"PRIu64""
1617 					"\nRX total: 		%"PRIu64""
1618 					"\nRX dropped: 		%"PRIu64""
1619 					"\nRX successful: 		%"PRIu64"",
1620 					device_fh,
1621 					tx_total,
1622 					tx_dropped,
1623 					tx,
1624 					rx_total,
1625 					rx_dropped,
1626 					rx);
1627 
1628 			dev_ll = dev_ll->next;
1629 		}
1630 		printf("\n======================================================\n");
1631 	}
1632 }
1633 
1634 /*
1635  * Main function, does initialisation and calls the per-lcore functions. The CUSE
1636  * device is also registered here to handle the IOCTLs.
1637  */
1638 int
1639 MAIN(int argc, char *argv[])
1640 {
1641 	struct rte_mempool *mbuf_pool;
1642 	unsigned lcore_id, core_id = 0;
1643 	unsigned nb_ports, valid_num_ports;
1644 	int ret;
1645 	uint8_t portid;
1646 	static pthread_t tid;
1647 
1648 	/* init EAL */
1649 	ret = rte_eal_init(argc, argv);
1650 	if (ret < 0)
1651 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1652 	argc -= ret;
1653 	argv += ret;
1654 
1655 	/* parse app arguments */
1656 	ret = us_vhost_parse_args(argc, argv);
1657 	if (ret < 0)
1658 		rte_exit(EXIT_FAILURE, "Invalid argument\n");
1659 
1660 	if (rte_eal_pci_probe() != 0)
1661 		rte_exit(EXIT_FAILURE, "Error with NIC driver initialization\n");
1662 
1663 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++)
1664 		if (rte_lcore_is_enabled(lcore_id))
1665 			lcore_ids[core_id ++] = lcore_id;
1666 
1667 	if (rte_lcore_count() > RTE_MAX_LCORE)
1668 		rte_exit(EXIT_FAILURE,"Not enough cores\n");
1669 
1670 	/*set the number of swithcing cores available*/
1671 	num_switching_cores = rte_lcore_count()-1;
1672 
1673 	/* Get the number of physical ports. */
1674 	nb_ports = rte_eth_dev_count();
1675 	if (nb_ports > RTE_MAX_ETHPORTS)
1676 		nb_ports = RTE_MAX_ETHPORTS;
1677 
1678 	/*
1679    	 * Update the global var NUM_PORTS and global array PORTS
1680   	 * and get value of var VALID_NUM_PORTS according to system ports number
1681   	 */
1682 	valid_num_ports = check_ports_num(nb_ports);
1683 
1684 	if ((valid_num_ports ==  0) || (valid_num_ports > MAX_SUP_PORTS)) {
1685 		RTE_LOG(INFO, PORT, "Current enabled port number is %u,"
1686 			"but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
1687 		return -1;
1688 	}
1689 
1690 	/* Create the mbuf pool. */
1691 	mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS_PER_PORT * valid_num_ports,
1692 				       MBUF_SIZE, MBUF_CACHE_SIZE,
1693 				       sizeof(struct rte_pktmbuf_pool_private),
1694 				       rte_pktmbuf_pool_init, NULL,
1695 				       rte_pktmbuf_init, NULL,
1696 				       rte_socket_id(), 0);
1697 	if (mbuf_pool == NULL)
1698 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
1699 
1700 	/* Set log level. */
1701 	rte_set_log_level(LOG_LEVEL);
1702 
1703 	/* initialize all ports */
1704 	for (portid = 0; portid < nb_ports; portid++) {
1705 		/* skip ports that are not enabled */
1706 		if ((enabled_port_mask & (1 << portid)) == 0) {
1707 			RTE_LOG(INFO, PORT, "Skipping disabled port %d\n", portid);
1708 			continue;
1709 		}
1710 		if (port_init(portid, mbuf_pool) != 0)
1711 			rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
1712 	}
1713 
1714 	/* Initialise all linked lists. */
1715 	if (init_data_ll() == -1)
1716 		rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n");
1717 
1718 	/* Initialize device stats */
1719 	memset(&dev_statistics, 0, sizeof(dev_statistics));
1720 
1721 	/* Enable stats if the user option is set. */
1722 	if (enable_stats)
1723 		pthread_create(&tid, NULL, (void*)print_stats, NULL );
1724 
1725 	/* Launch all data cores. */
1726 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1727 		rte_eal_remote_launch(switch_worker, mbuf_pool, lcore_id);
1728 	}
1729 
1730 	/* Register CUSE device to handle IOCTLs. */
1731 	ret = register_cuse_device((char*)&dev_basename, dev_index, get_virtio_net_callbacks());
1732 	if (ret != 0)
1733 		rte_exit(EXIT_FAILURE,"CUSE device setup failure.\n");
1734 
1735 	init_virtio_net(&virtio_net_device_ops);
1736 
1737 	/* Start CUSE session. */
1738 	start_cuse_session_loop();
1739 	return 0;
1740 
1741 }
1742 
1743