xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision 945acb4a0d644d194f1823084a234f9c286dcf8c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <rte_ethdev.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_atomic.h>
18 #include <rte_branch_prediction.h>
19 #include <rte_pci.h>
20 #include <rte_bus_pci.h>
21 #include <rte_ether.h>
22 #include <rte_common.h>
23 #include <rte_errno.h>
24 #include <rte_cpuflags.h>
25 
26 #include <rte_memory.h>
27 #include <rte_eal.h>
28 #include <rte_dev.h>
29 #include <rte_cycles.h>
30 
31 #include "virtio_ethdev.h"
32 #include "virtio_pci.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 
37 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
38 static int  virtio_dev_configure(struct rte_eth_dev *dev);
39 static int  virtio_dev_start(struct rte_eth_dev *dev);
40 static void virtio_dev_stop(struct rte_eth_dev *dev);
41 static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static void virtio_dev_info_get(struct rte_eth_dev *dev,
46 				struct rte_eth_dev_info *dev_info);
47 static int virtio_dev_link_update(struct rte_eth_dev *dev,
48 	int wait_to_complete);
49 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
50 
51 static void virtio_set_hwaddr(struct virtio_hw *hw);
52 static void virtio_get_hwaddr(struct virtio_hw *hw);
53 
54 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
55 				 struct rte_eth_stats *stats);
56 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
57 				 struct rte_eth_xstat *xstats, unsigned n);
58 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
59 				       struct rte_eth_xstat_name *xstats_names,
60 				       unsigned limit);
61 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
62 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
63 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
64 				uint16_t vlan_id, int on);
65 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
66 				struct ether_addr *mac_addr,
67 				uint32_t index, uint32_t vmdq);
68 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
69 static void virtio_mac_addr_set(struct rte_eth_dev *dev,
70 				struct ether_addr *mac_addr);
71 
72 static int virtio_intr_enable(struct rte_eth_dev *dev);
73 static int virtio_intr_disable(struct rte_eth_dev *dev);
74 
75 static int virtio_dev_queue_stats_mapping_set(
76 	struct rte_eth_dev *eth_dev,
77 	uint16_t queue_id,
78 	uint8_t stat_idx,
79 	uint8_t is_rx);
80 
81 int virtio_logtype_init;
82 int virtio_logtype_driver;
83 
84 /*
85  * The set of PCI devices this driver supports
86  */
87 static const struct rte_pci_id pci_id_virtio_map[] = {
88 	{ RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
89 	{ RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
90 	{ .vendor_id = 0, /* sentinel */ },
91 };
92 
93 struct rte_virtio_xstats_name_off {
94 	char name[RTE_ETH_XSTATS_NAME_SIZE];
95 	unsigned offset;
96 };
97 
98 /* [rt]x_qX_ is prepended to the name string here */
99 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
100 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
101 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
102 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
103 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
104 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
105 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
106 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
107 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
108 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
109 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
110 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
111 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
112 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
113 };
114 
115 /* [rt]x_qX_ is prepended to the name string here */
116 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
117 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
118 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
119 	{"errors",                 offsetof(struct virtnet_tx, stats.errors)},
120 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
121 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
122 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
123 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
124 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
125 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
126 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
127 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
128 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
129 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
130 };
131 
132 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
133 			    sizeof(rte_virtio_rxq_stat_strings[0]))
134 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
135 			    sizeof(rte_virtio_txq_stat_strings[0]))
136 
137 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
138 
139 static int
140 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
141 		int *dlen, int pkt_num)
142 {
143 	uint32_t head, i;
144 	int k, sum = 0;
145 	virtio_net_ctrl_ack status = ~0;
146 	struct virtio_pmd_ctrl *result;
147 	struct virtqueue *vq;
148 
149 	ctrl->status = status;
150 
151 	if (!cvq || !cvq->vq) {
152 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
153 		return -1;
154 	}
155 
156 	rte_spinlock_lock(&cvq->lock);
157 	vq = cvq->vq;
158 	head = vq->vq_desc_head_idx;
159 
160 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
161 		"vq->hw->cvq = %p vq = %p",
162 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
163 
164 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
165 		rte_spinlock_unlock(&cvq->lock);
166 		return -1;
167 	}
168 
169 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
170 		sizeof(struct virtio_pmd_ctrl));
171 
172 	/*
173 	 * Format is enforced in qemu code:
174 	 * One TX packet for header;
175 	 * At least one TX packet per argument;
176 	 * One RX packet for ACK.
177 	 */
178 	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
179 	vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem;
180 	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
181 	vq->vq_free_cnt--;
182 	i = vq->vq_ring.desc[head].next;
183 
184 	for (k = 0; k < pkt_num; k++) {
185 		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
186 		vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
187 			+ sizeof(struct virtio_net_ctrl_hdr)
188 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
189 		vq->vq_ring.desc[i].len = dlen[k];
190 		sum += dlen[k];
191 		vq->vq_free_cnt--;
192 		i = vq->vq_ring.desc[i].next;
193 	}
194 
195 	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
196 	vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
197 			+ sizeof(struct virtio_net_ctrl_hdr);
198 	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
199 	vq->vq_free_cnt--;
200 
201 	vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
202 
203 	vq_update_avail_ring(vq, head);
204 	vq_update_avail_idx(vq);
205 
206 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
207 
208 	virtqueue_notify(vq);
209 
210 	rte_rmb();
211 	while (VIRTQUEUE_NUSED(vq) == 0) {
212 		rte_rmb();
213 		usleep(100);
214 	}
215 
216 	while (VIRTQUEUE_NUSED(vq)) {
217 		uint32_t idx, desc_idx, used_idx;
218 		struct vring_used_elem *uep;
219 
220 		used_idx = (uint32_t)(vq->vq_used_cons_idx
221 				& (vq->vq_nentries - 1));
222 		uep = &vq->vq_ring.used->ring[used_idx];
223 		idx = (uint32_t) uep->id;
224 		desc_idx = idx;
225 
226 		while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
227 			desc_idx = vq->vq_ring.desc[desc_idx].next;
228 			vq->vq_free_cnt++;
229 		}
230 
231 		vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
232 		vq->vq_desc_head_idx = idx;
233 
234 		vq->vq_used_cons_idx++;
235 		vq->vq_free_cnt++;
236 	}
237 
238 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
239 			vq->vq_free_cnt, vq->vq_desc_head_idx);
240 
241 	result = cvq->virtio_net_hdr_mz->addr;
242 
243 	rte_spinlock_unlock(&cvq->lock);
244 	return result->status;
245 }
246 
247 static int
248 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
249 {
250 	struct virtio_hw *hw = dev->data->dev_private;
251 	struct virtio_pmd_ctrl ctrl;
252 	int dlen[1];
253 	int ret;
254 
255 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
256 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
257 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
258 
259 	dlen[0] = sizeof(uint16_t);
260 
261 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
262 	if (ret) {
263 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
264 			  "failed, this is too late now...");
265 		return -EINVAL;
266 	}
267 
268 	return 0;
269 }
270 
271 static void
272 virtio_dev_queue_release(void *queue __rte_unused)
273 {
274 	/* do nothing */
275 }
276 
277 static int
278 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
279 {
280 	if (vtpci_queue_idx == hw->max_queue_pairs * 2)
281 		return VTNET_CQ;
282 	else if (vtpci_queue_idx % 2 == 0)
283 		return VTNET_RQ;
284 	else
285 		return VTNET_TQ;
286 }
287 
288 static uint16_t
289 virtio_get_nr_vq(struct virtio_hw *hw)
290 {
291 	uint16_t nr_vq = hw->max_queue_pairs * 2;
292 
293 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
294 		nr_vq += 1;
295 
296 	return nr_vq;
297 }
298 
299 static void
300 virtio_init_vring(struct virtqueue *vq)
301 {
302 	int size = vq->vq_nentries;
303 	struct vring *vr = &vq->vq_ring;
304 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
305 
306 	PMD_INIT_FUNC_TRACE();
307 
308 	/*
309 	 * Reinitialise since virtio port might have been stopped and restarted
310 	 */
311 	memset(ring_mem, 0, vq->vq_ring_size);
312 	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
313 	vq->vq_used_cons_idx = 0;
314 	vq->vq_desc_head_idx = 0;
315 	vq->vq_avail_idx = 0;
316 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
317 	vq->vq_free_cnt = vq->vq_nentries;
318 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
319 
320 	vring_desc_init(vr->desc, size);
321 
322 	/*
323 	 * Disable device(host) interrupting guest
324 	 */
325 	virtqueue_disable_intr(vq);
326 }
327 
328 static int
329 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
330 {
331 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
332 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
333 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
334 	unsigned int vq_size, size;
335 	struct virtio_hw *hw = dev->data->dev_private;
336 	struct virtnet_rx *rxvq = NULL;
337 	struct virtnet_tx *txvq = NULL;
338 	struct virtnet_ctl *cvq = NULL;
339 	struct virtqueue *vq;
340 	size_t sz_hdr_mz = 0;
341 	void *sw_ring = NULL;
342 	int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx);
343 	int ret;
344 
345 	PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx);
346 
347 	/*
348 	 * Read the virtqueue size from the Queue Size field
349 	 * Always power of 2 and if 0 virtqueue does not exist
350 	 */
351 	vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
352 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
353 	if (vq_size == 0) {
354 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
355 		return -EINVAL;
356 	}
357 
358 	if (!rte_is_power_of_2(vq_size)) {
359 		PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
360 		return -EINVAL;
361 	}
362 
363 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
364 		 dev->data->port_id, vtpci_queue_idx);
365 
366 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
367 				vq_size * sizeof(struct vq_desc_extra),
368 				RTE_CACHE_LINE_SIZE);
369 	if (queue_type == VTNET_TQ) {
370 		/*
371 		 * For each xmit packet, allocate a virtio_net_hdr
372 		 * and indirect ring elements
373 		 */
374 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
375 	} else if (queue_type == VTNET_CQ) {
376 		/* Allocate a page for control vq command, data and status */
377 		sz_hdr_mz = PAGE_SIZE;
378 	}
379 
380 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
381 				SOCKET_ID_ANY);
382 	if (vq == NULL) {
383 		PMD_INIT_LOG(ERR, "can not allocate vq");
384 		return -ENOMEM;
385 	}
386 	hw->vqs[vtpci_queue_idx] = vq;
387 
388 	vq->hw = hw;
389 	vq->vq_queue_index = vtpci_queue_idx;
390 	vq->vq_nentries = vq_size;
391 
392 	/*
393 	 * Reserve a memzone for vring elements
394 	 */
395 	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
396 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
397 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
398 		     size, vq->vq_ring_size);
399 
400 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
401 					 SOCKET_ID_ANY,
402 					 0, VIRTIO_PCI_VRING_ALIGN);
403 	if (mz == NULL) {
404 		if (rte_errno == EEXIST)
405 			mz = rte_memzone_lookup(vq_name);
406 		if (mz == NULL) {
407 			ret = -ENOMEM;
408 			goto fail_q_alloc;
409 		}
410 	}
411 
412 	memset(mz->addr, 0, mz->len);
413 
414 	vq->vq_ring_mem = mz->iova;
415 	vq->vq_ring_virt_mem = mz->addr;
416 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
417 		     (uint64_t)mz->iova);
418 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
419 		     (uint64_t)(uintptr_t)mz->addr);
420 
421 	virtio_init_vring(vq);
422 
423 	if (sz_hdr_mz) {
424 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
425 			 dev->data->port_id, vtpci_queue_idx);
426 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
427 						     SOCKET_ID_ANY, 0,
428 						     RTE_CACHE_LINE_SIZE);
429 		if (hdr_mz == NULL) {
430 			if (rte_errno == EEXIST)
431 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
432 			if (hdr_mz == NULL) {
433 				ret = -ENOMEM;
434 				goto fail_q_alloc;
435 			}
436 		}
437 	}
438 
439 	if (queue_type == VTNET_RQ) {
440 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
441 			       sizeof(vq->sw_ring[0]);
442 
443 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
444 				RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
445 		if (!sw_ring) {
446 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
447 			ret = -ENOMEM;
448 			goto fail_q_alloc;
449 		}
450 
451 		vq->sw_ring = sw_ring;
452 		rxvq = &vq->rxq;
453 		rxvq->vq = vq;
454 		rxvq->port_id = dev->data->port_id;
455 		rxvq->mz = mz;
456 	} else if (queue_type == VTNET_TQ) {
457 		txvq = &vq->txq;
458 		txvq->vq = vq;
459 		txvq->port_id = dev->data->port_id;
460 		txvq->mz = mz;
461 		txvq->virtio_net_hdr_mz = hdr_mz;
462 		txvq->virtio_net_hdr_mem = hdr_mz->iova;
463 	} else if (queue_type == VTNET_CQ) {
464 		cvq = &vq->cq;
465 		cvq->vq = vq;
466 		cvq->mz = mz;
467 		cvq->virtio_net_hdr_mz = hdr_mz;
468 		cvq->virtio_net_hdr_mem = hdr_mz->iova;
469 		memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
470 
471 		hw->cvq = cvq;
472 	}
473 
474 	/* For virtio_user case (that is when hw->dev is NULL), we use
475 	 * virtual address. And we need properly set _offset_, please see
476 	 * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
477 	 */
478 	if (!hw->virtio_user_dev)
479 		vq->offset = offsetof(struct rte_mbuf, buf_iova);
480 	else {
481 		vq->vq_ring_mem = (uintptr_t)mz->addr;
482 		vq->offset = offsetof(struct rte_mbuf, buf_addr);
483 		if (queue_type == VTNET_TQ)
484 			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
485 		else if (queue_type == VTNET_CQ)
486 			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
487 	}
488 
489 	if (queue_type == VTNET_TQ) {
490 		struct virtio_tx_region *txr;
491 		unsigned int i;
492 
493 		txr = hdr_mz->addr;
494 		memset(txr, 0, vq_size * sizeof(*txr));
495 		for (i = 0; i < vq_size; i++) {
496 			struct vring_desc *start_dp = txr[i].tx_indir;
497 
498 			vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir));
499 
500 			/* first indirect descriptor is always the tx header */
501 			start_dp->addr = txvq->virtio_net_hdr_mem
502 				+ i * sizeof(*txr)
503 				+ offsetof(struct virtio_tx_region, tx_hdr);
504 
505 			start_dp->len = hw->vtnet_hdr_size;
506 			start_dp->flags = VRING_DESC_F_NEXT;
507 		}
508 	}
509 
510 	if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
511 		PMD_INIT_LOG(ERR, "setup_queue failed");
512 		return -EINVAL;
513 	}
514 
515 	return 0;
516 
517 fail_q_alloc:
518 	rte_free(sw_ring);
519 	rte_memzone_free(hdr_mz);
520 	rte_memzone_free(mz);
521 	rte_free(vq);
522 
523 	return ret;
524 }
525 
526 static void
527 virtio_free_queues(struct virtio_hw *hw)
528 {
529 	uint16_t nr_vq = virtio_get_nr_vq(hw);
530 	struct virtqueue *vq;
531 	int queue_type;
532 	uint16_t i;
533 
534 	if (hw->vqs == NULL)
535 		return;
536 
537 	for (i = 0; i < nr_vq; i++) {
538 		vq = hw->vqs[i];
539 		if (!vq)
540 			continue;
541 
542 		queue_type = virtio_get_queue_type(hw, i);
543 		if (queue_type == VTNET_RQ) {
544 			rte_free(vq->sw_ring);
545 			rte_memzone_free(vq->rxq.mz);
546 		} else if (queue_type == VTNET_TQ) {
547 			rte_memzone_free(vq->txq.mz);
548 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
549 		} else {
550 			rte_memzone_free(vq->cq.mz);
551 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
552 		}
553 
554 		rte_free(vq);
555 		hw->vqs[i] = NULL;
556 	}
557 
558 	rte_free(hw->vqs);
559 	hw->vqs = NULL;
560 }
561 
562 static int
563 virtio_alloc_queues(struct rte_eth_dev *dev)
564 {
565 	struct virtio_hw *hw = dev->data->dev_private;
566 	uint16_t nr_vq = virtio_get_nr_vq(hw);
567 	uint16_t i;
568 	int ret;
569 
570 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
571 	if (!hw->vqs) {
572 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
573 		return -ENOMEM;
574 	}
575 
576 	for (i = 0; i < nr_vq; i++) {
577 		ret = virtio_init_queue(dev, i);
578 		if (ret < 0) {
579 			virtio_free_queues(hw);
580 			return ret;
581 		}
582 	}
583 
584 	return 0;
585 }
586 
587 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
588 
589 static void
590 virtio_dev_close(struct rte_eth_dev *dev)
591 {
592 	struct virtio_hw *hw = dev->data->dev_private;
593 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
594 
595 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
596 
597 	/* reset the NIC */
598 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
599 		VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
600 	if (intr_conf->rxq)
601 		virtio_queues_unbind_intr(dev);
602 
603 	if (intr_conf->lsc || intr_conf->rxq) {
604 		virtio_intr_disable(dev);
605 		rte_intr_efd_disable(dev->intr_handle);
606 		rte_free(dev->intr_handle->intr_vec);
607 		dev->intr_handle->intr_vec = NULL;
608 	}
609 
610 	vtpci_reset(hw);
611 	virtio_dev_free_mbufs(dev);
612 	virtio_free_queues(hw);
613 }
614 
615 static void
616 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
617 {
618 	struct virtio_hw *hw = dev->data->dev_private;
619 	struct virtio_pmd_ctrl ctrl;
620 	int dlen[1];
621 	int ret;
622 
623 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
624 		PMD_INIT_LOG(INFO, "host does not support rx control");
625 		return;
626 	}
627 
628 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
629 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
630 	ctrl.data[0] = 1;
631 	dlen[0] = 1;
632 
633 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
634 	if (ret)
635 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
636 }
637 
638 static void
639 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
640 {
641 	struct virtio_hw *hw = dev->data->dev_private;
642 	struct virtio_pmd_ctrl ctrl;
643 	int dlen[1];
644 	int ret;
645 
646 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
647 		PMD_INIT_LOG(INFO, "host does not support rx control");
648 		return;
649 	}
650 
651 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
652 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
653 	ctrl.data[0] = 0;
654 	dlen[0] = 1;
655 
656 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
657 	if (ret)
658 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
659 }
660 
661 static void
662 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
663 {
664 	struct virtio_hw *hw = dev->data->dev_private;
665 	struct virtio_pmd_ctrl ctrl;
666 	int dlen[1];
667 	int ret;
668 
669 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
670 		PMD_INIT_LOG(INFO, "host does not support rx control");
671 		return;
672 	}
673 
674 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
675 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
676 	ctrl.data[0] = 1;
677 	dlen[0] = 1;
678 
679 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
680 	if (ret)
681 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
682 }
683 
684 static void
685 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
686 {
687 	struct virtio_hw *hw = dev->data->dev_private;
688 	struct virtio_pmd_ctrl ctrl;
689 	int dlen[1];
690 	int ret;
691 
692 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
693 		PMD_INIT_LOG(INFO, "host does not support rx control");
694 		return;
695 	}
696 
697 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
698 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
699 	ctrl.data[0] = 0;
700 	dlen[0] = 1;
701 
702 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
703 	if (ret)
704 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
705 }
706 
707 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
708 static int
709 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
710 {
711 	struct virtio_hw *hw = dev->data->dev_private;
712 	uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN +
713 				 hw->vtnet_hdr_size;
714 	uint32_t frame_size = mtu + ether_hdr_len;
715 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
716 
717 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
718 
719 	if (mtu < ETHER_MIN_MTU || frame_size > max_frame_size) {
720 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
721 			ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
722 		return -EINVAL;
723 	}
724 	return 0;
725 }
726 
727 static int
728 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
729 {
730 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
731 	struct virtqueue *vq = rxvq->vq;
732 
733 	virtqueue_enable_intr(vq);
734 	return 0;
735 }
736 
737 static int
738 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
739 {
740 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
741 	struct virtqueue *vq = rxvq->vq;
742 
743 	virtqueue_disable_intr(vq);
744 	return 0;
745 }
746 
747 /*
748  * dev_ops for virtio, bare necessities for basic operation
749  */
750 static const struct eth_dev_ops virtio_eth_dev_ops = {
751 	.dev_configure           = virtio_dev_configure,
752 	.dev_start               = virtio_dev_start,
753 	.dev_stop                = virtio_dev_stop,
754 	.dev_close               = virtio_dev_close,
755 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
756 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
757 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
758 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
759 	.mtu_set                 = virtio_mtu_set,
760 	.dev_infos_get           = virtio_dev_info_get,
761 	.stats_get               = virtio_dev_stats_get,
762 	.xstats_get              = virtio_dev_xstats_get,
763 	.xstats_get_names        = virtio_dev_xstats_get_names,
764 	.stats_reset             = virtio_dev_stats_reset,
765 	.xstats_reset            = virtio_dev_stats_reset,
766 	.link_update             = virtio_dev_link_update,
767 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
768 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
769 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
770 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
771 	.rx_queue_release        = virtio_dev_queue_release,
772 	.rx_descriptor_done      = virtio_dev_rx_queue_done,
773 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
774 	.tx_queue_release        = virtio_dev_queue_release,
775 	/* collect stats per queue */
776 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
777 	.vlan_filter_set         = virtio_vlan_filter_set,
778 	.mac_addr_add            = virtio_mac_addr_add,
779 	.mac_addr_remove         = virtio_mac_addr_remove,
780 	.mac_addr_set            = virtio_mac_addr_set,
781 };
782 
783 static inline int
784 virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev,
785 				struct rte_eth_link *link)
786 {
787 	struct rte_eth_link *dst = link;
788 	struct rte_eth_link *src = &(dev->data->dev_link);
789 
790 	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
791 			*(uint64_t *)src) == 0)
792 		return -1;
793 
794 	return 0;
795 }
796 
797 /**
798  * Atomically writes the link status information into global
799  * structure rte_eth_dev.
800  *
801  * @param dev
802  *   - Pointer to the structure rte_eth_dev to read from.
803  *   - Pointer to the buffer to be saved with the link status.
804  *
805  * @return
806  *   - On success, zero.
807  *   - On failure, negative value.
808  */
809 static inline int
810 virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev,
811 		struct rte_eth_link *link)
812 {
813 	struct rte_eth_link *dst = &(dev->data->dev_link);
814 	struct rte_eth_link *src = link;
815 
816 	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
817 					*(uint64_t *)src) == 0)
818 		return -1;
819 
820 	return 0;
821 }
822 
823 static void
824 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
825 {
826 	unsigned i;
827 
828 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
829 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
830 		if (txvq == NULL)
831 			continue;
832 
833 		stats->opackets += txvq->stats.packets;
834 		stats->obytes += txvq->stats.bytes;
835 		stats->oerrors += txvq->stats.errors;
836 
837 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
838 			stats->q_opackets[i] = txvq->stats.packets;
839 			stats->q_obytes[i] = txvq->stats.bytes;
840 		}
841 	}
842 
843 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
844 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
845 		if (rxvq == NULL)
846 			continue;
847 
848 		stats->ipackets += rxvq->stats.packets;
849 		stats->ibytes += rxvq->stats.bytes;
850 		stats->ierrors += rxvq->stats.errors;
851 
852 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
853 			stats->q_ipackets[i] = rxvq->stats.packets;
854 			stats->q_ibytes[i] = rxvq->stats.bytes;
855 		}
856 	}
857 
858 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
859 }
860 
861 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
862 				       struct rte_eth_xstat_name *xstats_names,
863 				       __rte_unused unsigned limit)
864 {
865 	unsigned i;
866 	unsigned count = 0;
867 	unsigned t;
868 
869 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
870 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
871 
872 	if (xstats_names != NULL) {
873 		/* Note: limit checked in rte_eth_xstats_names() */
874 
875 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
876 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
877 			if (rxvq == NULL)
878 				continue;
879 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
880 				snprintf(xstats_names[count].name,
881 					sizeof(xstats_names[count].name),
882 					"rx_q%u_%s", i,
883 					rte_virtio_rxq_stat_strings[t].name);
884 				count++;
885 			}
886 		}
887 
888 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
889 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
890 			if (txvq == NULL)
891 				continue;
892 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
893 				snprintf(xstats_names[count].name,
894 					sizeof(xstats_names[count].name),
895 					"tx_q%u_%s", i,
896 					rte_virtio_txq_stat_strings[t].name);
897 				count++;
898 			}
899 		}
900 		return count;
901 	}
902 	return nstats;
903 }
904 
905 static int
906 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
907 		      unsigned n)
908 {
909 	unsigned i;
910 	unsigned count = 0;
911 
912 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
913 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
914 
915 	if (n < nstats)
916 		return nstats;
917 
918 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
919 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
920 
921 		if (rxvq == NULL)
922 			continue;
923 
924 		unsigned t;
925 
926 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
927 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
928 				rte_virtio_rxq_stat_strings[t].offset);
929 			xstats[count].id = count;
930 			count++;
931 		}
932 	}
933 
934 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
935 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
936 
937 		if (txvq == NULL)
938 			continue;
939 
940 		unsigned t;
941 
942 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
943 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
944 				rte_virtio_txq_stat_strings[t].offset);
945 			xstats[count].id = count;
946 			count++;
947 		}
948 	}
949 
950 	return count;
951 }
952 
953 static int
954 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
955 {
956 	virtio_update_stats(dev, stats);
957 
958 	return 0;
959 }
960 
961 static void
962 virtio_dev_stats_reset(struct rte_eth_dev *dev)
963 {
964 	unsigned int i;
965 
966 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
967 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
968 		if (txvq == NULL)
969 			continue;
970 
971 		txvq->stats.packets = 0;
972 		txvq->stats.bytes = 0;
973 		txvq->stats.errors = 0;
974 		txvq->stats.multicast = 0;
975 		txvq->stats.broadcast = 0;
976 		memset(txvq->stats.size_bins, 0,
977 		       sizeof(txvq->stats.size_bins[0]) * 8);
978 	}
979 
980 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
981 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
982 		if (rxvq == NULL)
983 			continue;
984 
985 		rxvq->stats.packets = 0;
986 		rxvq->stats.bytes = 0;
987 		rxvq->stats.errors = 0;
988 		rxvq->stats.multicast = 0;
989 		rxvq->stats.broadcast = 0;
990 		memset(rxvq->stats.size_bins, 0,
991 		       sizeof(rxvq->stats.size_bins[0]) * 8);
992 	}
993 }
994 
995 static void
996 virtio_set_hwaddr(struct virtio_hw *hw)
997 {
998 	vtpci_write_dev_config(hw,
999 			offsetof(struct virtio_net_config, mac),
1000 			&hw->mac_addr, ETHER_ADDR_LEN);
1001 }
1002 
1003 static void
1004 virtio_get_hwaddr(struct virtio_hw *hw)
1005 {
1006 	if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
1007 		vtpci_read_dev_config(hw,
1008 			offsetof(struct virtio_net_config, mac),
1009 			&hw->mac_addr, ETHER_ADDR_LEN);
1010 	} else {
1011 		eth_random_addr(&hw->mac_addr[0]);
1012 		virtio_set_hwaddr(hw);
1013 	}
1014 }
1015 
1016 static int
1017 virtio_mac_table_set(struct virtio_hw *hw,
1018 		     const struct virtio_net_ctrl_mac *uc,
1019 		     const struct virtio_net_ctrl_mac *mc)
1020 {
1021 	struct virtio_pmd_ctrl ctrl;
1022 	int err, len[2];
1023 
1024 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1025 		PMD_DRV_LOG(INFO, "host does not support mac table");
1026 		return -1;
1027 	}
1028 
1029 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1030 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1031 
1032 	len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
1033 	memcpy(ctrl.data, uc, len[0]);
1034 
1035 	len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
1036 	memcpy(ctrl.data + len[0], mc, len[1]);
1037 
1038 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1039 	if (err != 0)
1040 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1041 	return err;
1042 }
1043 
1044 static int
1045 virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
1046 		    uint32_t index, uint32_t vmdq __rte_unused)
1047 {
1048 	struct virtio_hw *hw = dev->data->dev_private;
1049 	const struct ether_addr *addrs = dev->data->mac_addrs;
1050 	unsigned int i;
1051 	struct virtio_net_ctrl_mac *uc, *mc;
1052 
1053 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1054 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1055 		return -EINVAL;
1056 	}
1057 
1058 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1059 	uc->entries = 0;
1060 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1061 	mc->entries = 0;
1062 
1063 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1064 		const struct ether_addr *addr
1065 			= (i == index) ? mac_addr : addrs + i;
1066 		struct virtio_net_ctrl_mac *tbl
1067 			= is_multicast_ether_addr(addr) ? mc : uc;
1068 
1069 		memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
1070 	}
1071 
1072 	return virtio_mac_table_set(hw, uc, mc);
1073 }
1074 
1075 static void
1076 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1077 {
1078 	struct virtio_hw *hw = dev->data->dev_private;
1079 	struct ether_addr *addrs = dev->data->mac_addrs;
1080 	struct virtio_net_ctrl_mac *uc, *mc;
1081 	unsigned int i;
1082 
1083 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1084 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1085 		return;
1086 	}
1087 
1088 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1089 	uc->entries = 0;
1090 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1091 	mc->entries = 0;
1092 
1093 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1094 		struct virtio_net_ctrl_mac *tbl;
1095 
1096 		if (i == index || is_zero_ether_addr(addrs + i))
1097 			continue;
1098 
1099 		tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
1100 		memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
1101 	}
1102 
1103 	virtio_mac_table_set(hw, uc, mc);
1104 }
1105 
1106 static void
1107 virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
1108 {
1109 	struct virtio_hw *hw = dev->data->dev_private;
1110 
1111 	memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
1112 
1113 	/* Use atomic update if available */
1114 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1115 		struct virtio_pmd_ctrl ctrl;
1116 		int len = ETHER_ADDR_LEN;
1117 
1118 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1119 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1120 
1121 		memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
1122 		virtio_send_command(hw->cvq, &ctrl, &len, 1);
1123 	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
1124 		virtio_set_hwaddr(hw);
1125 }
1126 
1127 static int
1128 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1129 {
1130 	struct virtio_hw *hw = dev->data->dev_private;
1131 	struct virtio_pmd_ctrl ctrl;
1132 	int len;
1133 
1134 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1135 		return -ENOTSUP;
1136 
1137 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1138 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1139 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1140 	len = sizeof(vlan_id);
1141 
1142 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1143 }
1144 
1145 static int
1146 virtio_intr_enable(struct rte_eth_dev *dev)
1147 {
1148 	struct virtio_hw *hw = dev->data->dev_private;
1149 
1150 	if (rte_intr_enable(dev->intr_handle) < 0)
1151 		return -1;
1152 
1153 	if (!hw->virtio_user_dev)
1154 		hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1155 
1156 	return 0;
1157 }
1158 
1159 static int
1160 virtio_intr_disable(struct rte_eth_dev *dev)
1161 {
1162 	struct virtio_hw *hw = dev->data->dev_private;
1163 
1164 	if (rte_intr_disable(dev->intr_handle) < 0)
1165 		return -1;
1166 
1167 	if (!hw->virtio_user_dev)
1168 		hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1169 
1170 	return 0;
1171 }
1172 
1173 static int
1174 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1175 {
1176 	uint64_t host_features;
1177 
1178 	/* Prepare guest_features: feature that driver wants to support */
1179 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1180 		req_features);
1181 
1182 	/* Read device(host) feature bits */
1183 	host_features = VTPCI_OPS(hw)->get_features(hw);
1184 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1185 		host_features);
1186 
1187 	/* If supported, ensure MTU value is valid before acknowledging it. */
1188 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1189 		struct virtio_net_config config;
1190 
1191 		vtpci_read_dev_config(hw,
1192 			offsetof(struct virtio_net_config, mtu),
1193 			&config.mtu, sizeof(config.mtu));
1194 
1195 		if (config.mtu < ETHER_MIN_MTU)
1196 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1197 	}
1198 
1199 	/*
1200 	 * Negotiate features: Subset of device feature bits are written back
1201 	 * guest feature bits.
1202 	 */
1203 	hw->guest_features = req_features;
1204 	hw->guest_features = vtpci_negotiate_features(hw, host_features);
1205 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1206 		hw->guest_features);
1207 
1208 	if (hw->modern) {
1209 		if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
1210 			PMD_INIT_LOG(ERR,
1211 				"VIRTIO_F_VERSION_1 features is not enabled.");
1212 			return -1;
1213 		}
1214 		vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1215 		if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1216 			PMD_INIT_LOG(ERR,
1217 				"failed to set FEATURES_OK status!");
1218 			return -1;
1219 		}
1220 	}
1221 
1222 	hw->req_guest_features = req_features;
1223 
1224 	return 0;
1225 }
1226 
1227 int
1228 virtio_dev_pause(struct rte_eth_dev *dev)
1229 {
1230 	struct virtio_hw *hw = dev->data->dev_private;
1231 
1232 	rte_spinlock_lock(&hw->state_lock);
1233 
1234 	if (hw->started == 0) {
1235 		/* Device is just stopped. */
1236 		rte_spinlock_unlock(&hw->state_lock);
1237 		return -1;
1238 	}
1239 	hw->started = 0;
1240 	/*
1241 	 * Prevent the worker threads from touching queues to avoid contention,
1242 	 * 1 ms should be enough for the ongoing Tx function to finish.
1243 	 */
1244 	rte_delay_ms(1);
1245 	return 0;
1246 }
1247 
1248 /*
1249  * Recover hw state to let the worker threads continue.
1250  */
1251 void
1252 virtio_dev_resume(struct rte_eth_dev *dev)
1253 {
1254 	struct virtio_hw *hw = dev->data->dev_private;
1255 
1256 	hw->started = 1;
1257 	rte_spinlock_unlock(&hw->state_lock);
1258 }
1259 
1260 /*
1261  * Should be called only after device is paused.
1262  */
1263 int
1264 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1265 		int nb_pkts)
1266 {
1267 	struct virtio_hw *hw = dev->data->dev_private;
1268 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1269 	int ret;
1270 
1271 	hw->inject_pkts = tx_pkts;
1272 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1273 	hw->inject_pkts = NULL;
1274 
1275 	return ret;
1276 }
1277 
1278 /*
1279  * Process Virtio Config changed interrupt and call the callback
1280  * if link state changed.
1281  */
1282 void
1283 virtio_interrupt_handler(void *param)
1284 {
1285 	struct rte_eth_dev *dev = param;
1286 	struct virtio_hw *hw = dev->data->dev_private;
1287 	uint8_t isr;
1288 
1289 	/* Read interrupt status which clears interrupt */
1290 	isr = vtpci_isr(hw);
1291 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1292 
1293 	if (virtio_intr_enable(dev) < 0)
1294 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1295 
1296 	if (isr & VIRTIO_PCI_ISR_CONFIG) {
1297 		if (virtio_dev_link_update(dev, 0) == 0)
1298 			_rte_eth_dev_callback_process(dev,
1299 						      RTE_ETH_EVENT_INTR_LSC,
1300 						      NULL);
1301 	}
1302 
1303 }
1304 
1305 /* set rx and tx handlers according to what is supported */
1306 static void
1307 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1308 {
1309 	struct virtio_hw *hw = eth_dev->data->dev_private;
1310 
1311 	if (hw->use_simple_rx) {
1312 		PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
1313 			eth_dev->data->port_id);
1314 		eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1315 	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1316 		PMD_INIT_LOG(INFO,
1317 			"virtio: using mergeable buffer Rx path on port %u",
1318 			eth_dev->data->port_id);
1319 		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1320 	} else {
1321 		PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1322 			eth_dev->data->port_id);
1323 		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1324 	}
1325 
1326 	if (hw->use_simple_tx) {
1327 		PMD_INIT_LOG(INFO, "virtio: using simple Tx path on port %u",
1328 			eth_dev->data->port_id);
1329 		eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
1330 	} else {
1331 		PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1332 			eth_dev->data->port_id);
1333 		eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1334 	}
1335 }
1336 
1337 /* Only support 1:1 queue/interrupt mapping so far.
1338  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1339  * interrupt vectors (<N+1).
1340  */
1341 static int
1342 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1343 {
1344 	uint32_t i;
1345 	struct virtio_hw *hw = dev->data->dev_private;
1346 
1347 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1348 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1349 		dev->intr_handle->intr_vec[i] = i + 1;
1350 		if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1351 						 VIRTIO_MSI_NO_VECTOR) {
1352 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1353 			return -EBUSY;
1354 		}
1355 	}
1356 
1357 	return 0;
1358 }
1359 
1360 static void
1361 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1362 {
1363 	uint32_t i;
1364 	struct virtio_hw *hw = dev->data->dev_private;
1365 
1366 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1367 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1368 		VTPCI_OPS(hw)->set_queue_irq(hw,
1369 					     hw->vqs[i * VTNET_CQ],
1370 					     VIRTIO_MSI_NO_VECTOR);
1371 }
1372 
1373 static int
1374 virtio_configure_intr(struct rte_eth_dev *dev)
1375 {
1376 	struct virtio_hw *hw = dev->data->dev_private;
1377 
1378 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1379 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1380 		return -ENOTSUP;
1381 	}
1382 
1383 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1384 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1385 		return -1;
1386 	}
1387 
1388 	if (!dev->intr_handle->intr_vec) {
1389 		dev->intr_handle->intr_vec =
1390 			rte_zmalloc("intr_vec",
1391 				    hw->max_queue_pairs * sizeof(int), 0);
1392 		if (!dev->intr_handle->intr_vec) {
1393 			PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1394 				     hw->max_queue_pairs);
1395 			return -ENOMEM;
1396 		}
1397 	}
1398 
1399 	/* Re-register callback to update max_intr */
1400 	rte_intr_callback_unregister(dev->intr_handle,
1401 				     virtio_interrupt_handler,
1402 				     dev);
1403 	rte_intr_callback_register(dev->intr_handle,
1404 				   virtio_interrupt_handler,
1405 				   dev);
1406 
1407 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1408 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1409 	 * And for legacy devices, this should be done before queue/vec binding
1410 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1411 	 * (22) will be ignored.
1412 	 */
1413 	if (virtio_intr_enable(dev) < 0) {
1414 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1415 		return -1;
1416 	}
1417 
1418 	if (virtio_queues_bind_intr(dev) < 0) {
1419 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1420 		return -1;
1421 	}
1422 
1423 	return 0;
1424 }
1425 
1426 /* reset device and renegotiate features if needed */
1427 static int
1428 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1429 {
1430 	struct virtio_hw *hw = eth_dev->data->dev_private;
1431 	struct virtio_net_config *config;
1432 	struct virtio_net_config local_config;
1433 	struct rte_pci_device *pci_dev = NULL;
1434 	int ret;
1435 
1436 	/* Reset the device although not necessary at startup */
1437 	vtpci_reset(hw);
1438 
1439 	/* Tell the host we've noticed this device. */
1440 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1441 
1442 	/* Tell the host we've known how to drive the device. */
1443 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1444 	if (virtio_negotiate_features(hw, req_features) < 0)
1445 		return -1;
1446 
1447 	if (!hw->virtio_user_dev) {
1448 		pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1449 		rte_eth_copy_pci_info(eth_dev, pci_dev);
1450 	}
1451 
1452 	/* If host does not support both status and MSI-X then disable LSC */
1453 	if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
1454 	    hw->use_msix != VIRTIO_MSIX_NONE)
1455 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1456 	else
1457 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1458 
1459 	/* Setting up rx_header size for the device */
1460 	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1461 	    vtpci_with_feature(hw, VIRTIO_F_VERSION_1))
1462 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1463 	else
1464 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1465 
1466 	/* Copy the permanent MAC address to: virtio_hw */
1467 	virtio_get_hwaddr(hw);
1468 	ether_addr_copy((struct ether_addr *) hw->mac_addr,
1469 			&eth_dev->data->mac_addrs[0]);
1470 	PMD_INIT_LOG(DEBUG,
1471 		     "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1472 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1473 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1474 
1475 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1476 		config = &local_config;
1477 
1478 		vtpci_read_dev_config(hw,
1479 			offsetof(struct virtio_net_config, mac),
1480 			&config->mac, sizeof(config->mac));
1481 
1482 		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1483 			vtpci_read_dev_config(hw,
1484 				offsetof(struct virtio_net_config, status),
1485 				&config->status, sizeof(config->status));
1486 		} else {
1487 			PMD_INIT_LOG(DEBUG,
1488 				     "VIRTIO_NET_F_STATUS is not supported");
1489 			config->status = 0;
1490 		}
1491 
1492 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
1493 			vtpci_read_dev_config(hw,
1494 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
1495 				&config->max_virtqueue_pairs,
1496 				sizeof(config->max_virtqueue_pairs));
1497 		} else {
1498 			PMD_INIT_LOG(DEBUG,
1499 				     "VIRTIO_NET_F_MQ is not supported");
1500 			config->max_virtqueue_pairs = 1;
1501 		}
1502 
1503 		hw->max_queue_pairs = config->max_virtqueue_pairs;
1504 
1505 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
1506 			vtpci_read_dev_config(hw,
1507 				offsetof(struct virtio_net_config, mtu),
1508 				&config->mtu,
1509 				sizeof(config->mtu));
1510 
1511 			/*
1512 			 * MTU value has already been checked at negotiation
1513 			 * time, but check again in case it has changed since
1514 			 * then, which should not happen.
1515 			 */
1516 			if (config->mtu < ETHER_MIN_MTU) {
1517 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1518 						config->mtu);
1519 				return -1;
1520 			}
1521 
1522 			hw->max_mtu = config->mtu;
1523 			/* Set initial MTU to maximum one supported by vhost */
1524 			eth_dev->data->mtu = config->mtu;
1525 
1526 		} else {
1527 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1528 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
1529 		}
1530 
1531 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1532 				config->max_virtqueue_pairs);
1533 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1534 		PMD_INIT_LOG(DEBUG,
1535 				"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1536 				config->mac[0], config->mac[1],
1537 				config->mac[2], config->mac[3],
1538 				config->mac[4], config->mac[5]);
1539 	} else {
1540 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1541 		hw->max_queue_pairs = 1;
1542 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1543 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
1544 	}
1545 
1546 	ret = virtio_alloc_queues(eth_dev);
1547 	if (ret < 0)
1548 		return ret;
1549 
1550 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1551 		if (virtio_configure_intr(eth_dev) < 0) {
1552 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
1553 			return -1;
1554 		}
1555 	}
1556 
1557 	vtpci_reinit_complete(hw);
1558 
1559 	if (pci_dev)
1560 		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
1561 			eth_dev->data->port_id, pci_dev->id.vendor_id,
1562 			pci_dev->id.device_id);
1563 
1564 	return 0;
1565 }
1566 
1567 /*
1568  * Remap the PCI device again (IO port map for legacy device and
1569  * memory map for modern device), so that the secondary process
1570  * could have the PCI initiated correctly.
1571  */
1572 static int
1573 virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
1574 {
1575 	if (hw->modern) {
1576 		/*
1577 		 * We don't have to re-parse the PCI config space, since
1578 		 * rte_pci_map_device() makes sure the mapped address
1579 		 * in secondary process would equal to the one mapped in
1580 		 * the primary process: error will be returned if that
1581 		 * requirement is not met.
1582 		 *
1583 		 * That said, we could simply reuse all cap pointers
1584 		 * (such as dev_cfg, common_cfg, etc.) parsed from the
1585 		 * primary process, which is stored in shared memory.
1586 		 */
1587 		if (rte_pci_map_device(pci_dev)) {
1588 			PMD_INIT_LOG(DEBUG, "failed to map pci device!");
1589 			return -1;
1590 		}
1591 	} else {
1592 		if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
1593 			return -1;
1594 	}
1595 
1596 	return 0;
1597 }
1598 
1599 static void
1600 virtio_set_vtpci_ops(struct virtio_hw *hw)
1601 {
1602 #ifdef RTE_VIRTIO_USER
1603 	if (hw->virtio_user_dev)
1604 		VTPCI_OPS(hw) = &virtio_user_ops;
1605 	else
1606 #endif
1607 	if (hw->modern)
1608 		VTPCI_OPS(hw) = &modern_ops;
1609 	else
1610 		VTPCI_OPS(hw) = &legacy_ops;
1611 }
1612 
1613 /*
1614  * This function is based on probe() function in virtio_pci.c
1615  * It returns 0 on success.
1616  */
1617 int
1618 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1619 {
1620 	struct virtio_hw *hw = eth_dev->data->dev_private;
1621 	int ret;
1622 
1623 	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
1624 
1625 	eth_dev->dev_ops = &virtio_eth_dev_ops;
1626 
1627 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1628 		if (!hw->virtio_user_dev) {
1629 			ret = virtio_remap_pci(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1630 			if (ret)
1631 				return ret;
1632 		}
1633 
1634 		virtio_set_vtpci_ops(hw);
1635 		set_rxtx_funcs(eth_dev);
1636 
1637 		return 0;
1638 	}
1639 
1640 	/* Allocate memory for storing MAC addresses */
1641 	eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
1642 	if (eth_dev->data->mac_addrs == NULL) {
1643 		PMD_INIT_LOG(ERR,
1644 			"Failed to allocate %d bytes needed to store MAC addresses",
1645 			VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN);
1646 		return -ENOMEM;
1647 	}
1648 
1649 	hw->port_id = eth_dev->data->port_id;
1650 	/* For virtio_user case the hw->virtio_user_dev is populated by
1651 	 * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
1652 	 */
1653 	if (!hw->virtio_user_dev) {
1654 		ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1655 		if (ret)
1656 			goto out;
1657 	}
1658 
1659 	/* reset device and negotiate default features */
1660 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1661 	if (ret < 0)
1662 		goto out;
1663 
1664 	/* Setup interrupt callback  */
1665 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1666 		rte_intr_callback_register(eth_dev->intr_handle,
1667 			virtio_interrupt_handler, eth_dev);
1668 
1669 	return 0;
1670 
1671 out:
1672 	rte_free(eth_dev->data->mac_addrs);
1673 	return ret;
1674 }
1675 
1676 static int
1677 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
1678 {
1679 	PMD_INIT_FUNC_TRACE();
1680 
1681 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
1682 		return -EPERM;
1683 
1684 	virtio_dev_stop(eth_dev);
1685 	virtio_dev_close(eth_dev);
1686 
1687 	eth_dev->dev_ops = NULL;
1688 	eth_dev->tx_pkt_burst = NULL;
1689 	eth_dev->rx_pkt_burst = NULL;
1690 
1691 	rte_free(eth_dev->data->mac_addrs);
1692 	eth_dev->data->mac_addrs = NULL;
1693 
1694 	/* reset interrupt callback  */
1695 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1696 		rte_intr_callback_unregister(eth_dev->intr_handle,
1697 						virtio_interrupt_handler,
1698 						eth_dev);
1699 	if (eth_dev->device)
1700 		rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev));
1701 
1702 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
1703 
1704 	return 0;
1705 }
1706 
1707 static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1708 	struct rte_pci_device *pci_dev)
1709 {
1710 	return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw),
1711 		eth_virtio_dev_init);
1712 }
1713 
1714 static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
1715 {
1716 	return rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit);
1717 }
1718 
1719 static struct rte_pci_driver rte_virtio_pmd = {
1720 	.driver = {
1721 		.name = "net_virtio",
1722 	},
1723 	.id_table = pci_id_virtio_map,
1724 	.drv_flags = 0,
1725 	.probe = eth_virtio_pci_probe,
1726 	.remove = eth_virtio_pci_remove,
1727 };
1728 
1729 RTE_INIT(rte_virtio_pmd_init);
1730 static void
1731 rte_virtio_pmd_init(void)
1732 {
1733 	if (rte_eal_iopl_init() != 0) {
1734 		PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
1735 		return;
1736 	}
1737 
1738 	rte_pci_register(&rte_virtio_pmd);
1739 }
1740 
1741 /*
1742  * Configure virtio device
1743  * It returns 0 on success.
1744  */
1745 static int
1746 virtio_dev_configure(struct rte_eth_dev *dev)
1747 {
1748 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1749 	struct virtio_hw *hw = dev->data->dev_private;
1750 	uint64_t req_features;
1751 	int ret;
1752 
1753 	PMD_INIT_LOG(DEBUG, "configure");
1754 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
1755 
1756 	if (dev->data->dev_conf.intr_conf.rxq) {
1757 		ret = virtio_init_device(dev, hw->req_guest_features);
1758 		if (ret < 0)
1759 			return ret;
1760 	}
1761 
1762 	/* The name hw_ip_checksum is a bit confusing since it can be
1763 	 * set by the application to request L3 and/or L4 checksums. In
1764 	 * case of virtio, only L4 checksum is supported.
1765 	 */
1766 	if (rxmode->hw_ip_checksum)
1767 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
1768 
1769 	if (rxmode->enable_lro)
1770 		req_features |=
1771 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
1772 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
1773 
1774 	/* if request features changed, reinit the device */
1775 	if (req_features != hw->req_guest_features) {
1776 		ret = virtio_init_device(dev, req_features);
1777 		if (ret < 0)
1778 			return ret;
1779 	}
1780 
1781 	if (rxmode->hw_ip_checksum &&
1782 		!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
1783 		PMD_DRV_LOG(ERR,
1784 			"rx checksum not available on this host");
1785 		return -ENOTSUP;
1786 	}
1787 
1788 	if (rxmode->enable_lro &&
1789 		(!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
1790 		 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
1791 		PMD_DRV_LOG(ERR,
1792 			"Large Receive Offload not available on this host");
1793 		return -ENOTSUP;
1794 	}
1795 
1796 	/* start control queue */
1797 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
1798 		virtio_dev_cq_start(dev);
1799 
1800 	hw->vlan_strip = rxmode->hw_vlan_strip;
1801 
1802 	if (rxmode->hw_vlan_filter
1803 	    && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
1804 		PMD_DRV_LOG(ERR,
1805 			    "vlan filtering not available on this host");
1806 		return -ENOTSUP;
1807 	}
1808 
1809 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1810 		/* Enable vector (0) for Link State Intrerrupt */
1811 		if (VTPCI_OPS(hw)->set_config_irq(hw, 0) ==
1812 				VIRTIO_MSI_NO_VECTOR) {
1813 			PMD_DRV_LOG(ERR, "failed to set config vector");
1814 			return -EBUSY;
1815 		}
1816 
1817 	rte_spinlock_init(&hw->state_lock);
1818 
1819 	hw->use_simple_rx = 1;
1820 	hw->use_simple_tx = 1;
1821 
1822 #if defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
1823 	if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
1824 		hw->use_simple_rx = 0;
1825 		hw->use_simple_tx = 0;
1826 	}
1827 #endif
1828 	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1829 		hw->use_simple_rx = 0;
1830 		hw->use_simple_tx = 0;
1831 	}
1832 
1833 	if (rxmode->hw_ip_checksum)
1834 		hw->use_simple_rx = 0;
1835 
1836 	return 0;
1837 }
1838 
1839 
1840 static int
1841 virtio_dev_start(struct rte_eth_dev *dev)
1842 {
1843 	uint16_t nb_queues, i;
1844 	struct virtnet_rx *rxvq;
1845 	struct virtnet_tx *txvq __rte_unused;
1846 	struct virtio_hw *hw = dev->data->dev_private;
1847 	int ret;
1848 
1849 	/* Finish the initialization of the queues */
1850 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1851 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
1852 		if (ret < 0)
1853 			return ret;
1854 	}
1855 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1856 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
1857 		if (ret < 0)
1858 			return ret;
1859 	}
1860 
1861 	/* check if lsc interrupt feature is enabled */
1862 	if (dev->data->dev_conf.intr_conf.lsc) {
1863 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1864 			PMD_DRV_LOG(ERR, "link status not supported by host");
1865 			return -ENOTSUP;
1866 		}
1867 	}
1868 
1869 	/* Enable uio/vfio intr/eventfd mapping: althrough we already did that
1870 	 * in device configure, but it could be unmapped  when device is
1871 	 * stopped.
1872 	 */
1873 	if (dev->data->dev_conf.intr_conf.lsc ||
1874 	    dev->data->dev_conf.intr_conf.rxq) {
1875 		virtio_intr_disable(dev);
1876 
1877 		if (virtio_intr_enable(dev) < 0) {
1878 			PMD_DRV_LOG(ERR, "interrupt enable failed");
1879 			return -EIO;
1880 		}
1881 	}
1882 
1883 	/*Notify the backend
1884 	 *Otherwise the tap backend might already stop its queue due to fullness.
1885 	 *vhost backend will have no chance to be waked up
1886 	 */
1887 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
1888 	if (hw->max_queue_pairs > 1) {
1889 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
1890 			return -EINVAL;
1891 	}
1892 
1893 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
1894 
1895 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1896 		rxvq = dev->data->rx_queues[i];
1897 		/* Flush the old packets */
1898 		virtqueue_rxvq_flush(rxvq->vq);
1899 		virtqueue_notify(rxvq->vq);
1900 	}
1901 
1902 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1903 		txvq = dev->data->tx_queues[i];
1904 		virtqueue_notify(txvq->vq);
1905 	}
1906 
1907 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
1908 
1909 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1910 		rxvq = dev->data->rx_queues[i];
1911 		VIRTQUEUE_DUMP(rxvq->vq);
1912 	}
1913 
1914 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1915 		txvq = dev->data->tx_queues[i];
1916 		VIRTQUEUE_DUMP(txvq->vq);
1917 	}
1918 
1919 	set_rxtx_funcs(dev);
1920 	hw->started = 1;
1921 
1922 	/* Initialize Link state */
1923 	virtio_dev_link_update(dev, 0);
1924 
1925 	return 0;
1926 }
1927 
1928 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
1929 {
1930 	struct rte_mbuf *buf;
1931 	int i, mbuf_num = 0;
1932 
1933 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1934 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1935 
1936 		PMD_INIT_LOG(DEBUG,
1937 			     "Before freeing rxq[%d] used and unused buf", i);
1938 		VIRTQUEUE_DUMP(rxvq->vq);
1939 
1940 		PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", i, rxvq);
1941 		while ((buf = virtqueue_detatch_unused(rxvq->vq)) != NULL) {
1942 			rte_pktmbuf_free(buf);
1943 			mbuf_num++;
1944 		}
1945 
1946 		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
1947 		PMD_INIT_LOG(DEBUG,
1948 			     "After freeing rxq[%d] used and unused buf", i);
1949 		VIRTQUEUE_DUMP(rxvq->vq);
1950 	}
1951 
1952 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1953 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1954 
1955 		PMD_INIT_LOG(DEBUG,
1956 			     "Before freeing txq[%d] used and unused bufs",
1957 			     i);
1958 		VIRTQUEUE_DUMP(txvq->vq);
1959 
1960 		mbuf_num = 0;
1961 		while ((buf = virtqueue_detatch_unused(txvq->vq)) != NULL) {
1962 			rte_pktmbuf_free(buf);
1963 			mbuf_num++;
1964 		}
1965 
1966 		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
1967 		PMD_INIT_LOG(DEBUG,
1968 			     "After freeing txq[%d] used and unused buf", i);
1969 		VIRTQUEUE_DUMP(txvq->vq);
1970 	}
1971 }
1972 
1973 /*
1974  * Stop device: disable interrupt and mark link down
1975  */
1976 static void
1977 virtio_dev_stop(struct rte_eth_dev *dev)
1978 {
1979 	struct virtio_hw *hw = dev->data->dev_private;
1980 	struct rte_eth_link link;
1981 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
1982 
1983 	PMD_INIT_LOG(DEBUG, "stop");
1984 
1985 	rte_spinlock_lock(&hw->state_lock);
1986 	if (intr_conf->lsc || intr_conf->rxq)
1987 		virtio_intr_disable(dev);
1988 
1989 	hw->started = 0;
1990 	memset(&link, 0, sizeof(link));
1991 	virtio_dev_atomic_write_link_status(dev, &link);
1992 	rte_spinlock_unlock(&hw->state_lock);
1993 }
1994 
1995 static int
1996 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
1997 {
1998 	struct rte_eth_link link, old;
1999 	uint16_t status;
2000 	struct virtio_hw *hw = dev->data->dev_private;
2001 	memset(&link, 0, sizeof(link));
2002 	virtio_dev_atomic_read_link_status(dev, &link);
2003 	old = link;
2004 	link.link_duplex = ETH_LINK_FULL_DUPLEX;
2005 	link.link_speed  = ETH_SPEED_NUM_10G;
2006 
2007 	if (hw->started == 0) {
2008 		link.link_status = ETH_LINK_DOWN;
2009 	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2010 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
2011 		vtpci_read_dev_config(hw,
2012 				offsetof(struct virtio_net_config, status),
2013 				&status, sizeof(status));
2014 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2015 			link.link_status = ETH_LINK_DOWN;
2016 			PMD_INIT_LOG(DEBUG, "Port %d is down",
2017 				     dev->data->port_id);
2018 		} else {
2019 			link.link_status = ETH_LINK_UP;
2020 			PMD_INIT_LOG(DEBUG, "Port %d is up",
2021 				     dev->data->port_id);
2022 		}
2023 	} else {
2024 		link.link_status = ETH_LINK_UP;
2025 	}
2026 	virtio_dev_atomic_write_link_status(dev, &link);
2027 
2028 	return (old.link_status == link.link_status) ? -1 : 0;
2029 }
2030 
2031 static int
2032 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2033 {
2034 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2035 	struct virtio_hw *hw = dev->data->dev_private;
2036 
2037 	if (mask & ETH_VLAN_FILTER_MASK) {
2038 		if (rxmode->hw_vlan_filter &&
2039 				!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2040 
2041 			PMD_DRV_LOG(NOTICE,
2042 				"vlan filtering not available on this host");
2043 
2044 			return -ENOTSUP;
2045 		}
2046 	}
2047 
2048 	if (mask & ETH_VLAN_STRIP_MASK)
2049 		hw->vlan_strip = rxmode->hw_vlan_strip;
2050 
2051 	return 0;
2052 }
2053 
2054 static void
2055 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2056 {
2057 	uint64_t tso_mask, host_features;
2058 	struct virtio_hw *hw = dev->data->dev_private;
2059 
2060 	dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */
2061 
2062 	dev_info->pci_dev = dev->device ? RTE_ETH_DEV_TO_PCI(dev) : NULL;
2063 	dev_info->max_rx_queues =
2064 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2065 	dev_info->max_tx_queues =
2066 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2067 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2068 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2069 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2070 	dev_info->default_txconf = (struct rte_eth_txconf) {
2071 		.txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
2072 	};
2073 
2074 	host_features = VTPCI_OPS(hw)->get_features(hw);
2075 	dev_info->rx_offload_capa = 0;
2076 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2077 		dev_info->rx_offload_capa |=
2078 			DEV_RX_OFFLOAD_TCP_CKSUM |
2079 			DEV_RX_OFFLOAD_UDP_CKSUM;
2080 	}
2081 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2082 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2083 	if ((host_features & tso_mask) == tso_mask)
2084 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2085 
2086 	dev_info->tx_offload_capa = 0;
2087 	if (hw->guest_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2088 		dev_info->tx_offload_capa |=
2089 			DEV_TX_OFFLOAD_UDP_CKSUM |
2090 			DEV_TX_OFFLOAD_TCP_CKSUM;
2091 	}
2092 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2093 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
2094 	if ((hw->guest_features & tso_mask) == tso_mask)
2095 		dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2096 }
2097 
2098 /*
2099  * It enables testpmd to collect per queue stats.
2100  */
2101 static int
2102 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2103 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2104 __rte_unused uint8_t is_rx)
2105 {
2106 	return 0;
2107 }
2108 
2109 RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__);
2110 RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map);
2111 RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio-pci");
2112 
2113 RTE_INIT(virtio_init_log);
2114 static void
2115 virtio_init_log(void)
2116 {
2117 	virtio_logtype_init = rte_log_register("pmd.virtio.init");
2118 	if (virtio_logtype_init >= 0)
2119 		rte_log_set_level(virtio_logtype_init, RTE_LOG_NOTICE);
2120 	virtio_logtype_driver = rte_log_register("pmd.virtio.driver");
2121 	if (virtio_logtype_driver >= 0)
2122 		rte_log_set_level(virtio_logtype_driver, RTE_LOG_NOTICE);
2123 }
2124