xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision 2a7bb4fdf61e9edfb7adbaecb50e728b82da9e23)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_pci.h>
19 #include <rte_bus_pci.h>
20 #include <rte_ether.h>
21 #include <rte_ip.h>
22 #include <rte_arp.h>
23 #include <rte_common.h>
24 #include <rte_errno.h>
25 #include <rte_cpuflags.h>
26 
27 #include <rte_memory.h>
28 #include <rte_eal.h>
29 #include <rte_dev.h>
30 #include <rte_cycles.h>
31 #include <rte_kvargs.h>
32 
33 #include "virtio_ethdev.h"
34 #include "virtio_pci.h"
35 #include "virtio_logs.h"
36 #include "virtqueue.h"
37 #include "virtio_rxtx.h"
38 
39 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
40 static int  virtio_dev_configure(struct rte_eth_dev *dev);
41 static int  virtio_dev_start(struct rte_eth_dev *dev);
42 static void virtio_dev_stop(struct rte_eth_dev *dev);
43 static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
44 static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
45 static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
46 static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
47 static void virtio_dev_info_get(struct rte_eth_dev *dev,
48 				struct rte_eth_dev_info *dev_info);
49 static int virtio_dev_link_update(struct rte_eth_dev *dev,
50 	int wait_to_complete);
51 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
52 
53 static void virtio_set_hwaddr(struct virtio_hw *hw);
54 static void virtio_get_hwaddr(struct virtio_hw *hw);
55 
56 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
57 				 struct rte_eth_stats *stats);
58 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
59 				 struct rte_eth_xstat *xstats, unsigned n);
60 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
61 				       struct rte_eth_xstat_name *xstats_names,
62 				       unsigned limit);
63 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
64 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
65 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
66 				uint16_t vlan_id, int on);
67 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
68 				struct ether_addr *mac_addr,
69 				uint32_t index, uint32_t vmdq);
70 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
71 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
72 				struct ether_addr *mac_addr);
73 
74 static int virtio_intr_enable(struct rte_eth_dev *dev);
75 static int virtio_intr_disable(struct rte_eth_dev *dev);
76 
77 static int virtio_dev_queue_stats_mapping_set(
78 	struct rte_eth_dev *eth_dev,
79 	uint16_t queue_id,
80 	uint8_t stat_idx,
81 	uint8_t is_rx);
82 
83 int virtio_logtype_init;
84 int virtio_logtype_driver;
85 
86 static void virtio_notify_peers(struct rte_eth_dev *dev);
87 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
88 
89 /*
90  * The set of PCI devices this driver supports
91  */
92 static const struct rte_pci_id pci_id_virtio_map[] = {
93 	{ RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
94 	{ RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
95 	{ .vendor_id = 0, /* sentinel */ },
96 };
97 
98 struct rte_virtio_xstats_name_off {
99 	char name[RTE_ETH_XSTATS_NAME_SIZE];
100 	unsigned offset;
101 };
102 
103 /* [rt]x_qX_ is prepended to the name string here */
104 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
105 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
106 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
107 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
108 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
109 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
110 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
111 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
112 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
113 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
114 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
115 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
116 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
117 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
118 };
119 
120 /* [rt]x_qX_ is prepended to the name string here */
121 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
122 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
123 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
124 	{"errors",                 offsetof(struct virtnet_tx, stats.errors)},
125 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
126 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
127 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
128 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
129 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
130 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
131 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
132 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
133 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
134 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
135 };
136 
137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
138 			    sizeof(rte_virtio_rxq_stat_strings[0]))
139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
140 			    sizeof(rte_virtio_txq_stat_strings[0]))
141 
142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
143 
144 static struct virtio_pmd_ctrl *
145 virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
146 		       int *dlen, int pkt_num)
147 {
148 	struct virtqueue *vq = cvq->vq;
149 	int head;
150 	struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
151 	struct virtio_pmd_ctrl *result;
152 	bool avail_wrap_counter, used_wrap_counter;
153 	uint16_t flags;
154 	int sum = 0;
155 	int k;
156 
157 	/*
158 	 * Format is enforced in qemu code:
159 	 * One TX packet for header;
160 	 * At least one TX packet per argument;
161 	 * One RX packet for ACK.
162 	 */
163 	head = vq->vq_avail_idx;
164 	avail_wrap_counter = vq->avail_wrap_counter;
165 	used_wrap_counter = vq->used_wrap_counter;
166 	desc[head].flags = VRING_DESC_F_NEXT;
167 	desc[head].addr = cvq->virtio_net_hdr_mem;
168 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
169 	vq->vq_free_cnt--;
170 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
171 		vq->vq_avail_idx -= vq->vq_nentries;
172 		vq->avail_wrap_counter ^= 1;
173 	}
174 
175 	for (k = 0; k < pkt_num; k++) {
176 		desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
177 			+ sizeof(struct virtio_net_ctrl_hdr)
178 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
179 		desc[vq->vq_avail_idx].len = dlen[k];
180 		flags = VRING_DESC_F_NEXT;
181 		sum += dlen[k];
182 		vq->vq_free_cnt--;
183 		flags |= VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
184 			 VRING_DESC_F_USED(!vq->avail_wrap_counter);
185 		desc[vq->vq_avail_idx].flags = flags;
186 		rte_smp_wmb();
187 		vq->vq_free_cnt--;
188 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
189 			vq->vq_avail_idx -= vq->vq_nentries;
190 			vq->avail_wrap_counter ^= 1;
191 		}
192 	}
193 
194 
195 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
196 		+ sizeof(struct virtio_net_ctrl_hdr);
197 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
198 	flags = VRING_DESC_F_WRITE;
199 	flags |= VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
200 		 VRING_DESC_F_USED(!vq->avail_wrap_counter);
201 	desc[vq->vq_avail_idx].flags = flags;
202 	flags = VRING_DESC_F_NEXT;
203 	flags |= VRING_DESC_F_AVAIL(avail_wrap_counter) |
204 		 VRING_DESC_F_USED(!avail_wrap_counter);
205 	desc[head].flags = flags;
206 	rte_smp_wmb();
207 
208 	vq->vq_free_cnt--;
209 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
210 		vq->vq_avail_idx -= vq->vq_nentries;
211 		vq->avail_wrap_counter ^= 1;
212 	}
213 
214 	virtqueue_notify(vq);
215 
216 	/* wait for used descriptors in virtqueue */
217 	do {
218 		rte_rmb();
219 		usleep(100);
220 	} while (!__desc_is_used(&desc[head], used_wrap_counter));
221 
222 	/* now get used descriptors */
223 	while (desc_is_used(&desc[vq->vq_used_cons_idx], vq)) {
224 		vq->vq_free_cnt++;
225 		if (++vq->vq_used_cons_idx >= vq->vq_nentries) {
226 			vq->vq_used_cons_idx -= vq->vq_nentries;
227 			vq->used_wrap_counter ^= 1;
228 		}
229 	}
230 
231 	result = cvq->virtio_net_hdr_mz->addr;
232 	return result;
233 }
234 
235 static int
236 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
237 		int *dlen, int pkt_num)
238 {
239 	uint32_t head, i;
240 	int k, sum = 0;
241 	virtio_net_ctrl_ack status = ~0;
242 	struct virtio_pmd_ctrl *result;
243 	struct virtqueue *vq;
244 
245 	ctrl->status = status;
246 
247 	if (!cvq || !cvq->vq) {
248 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
249 		return -1;
250 	}
251 
252 	rte_spinlock_lock(&cvq->lock);
253 	vq = cvq->vq;
254 	head = vq->vq_desc_head_idx;
255 
256 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
257 		"vq->hw->cvq = %p vq = %p",
258 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
259 
260 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
261 		rte_spinlock_unlock(&cvq->lock);
262 		return -1;
263 	}
264 
265 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
266 		sizeof(struct virtio_pmd_ctrl));
267 
268 	if (vtpci_packed_queue(vq->hw)) {
269 		result = virtio_pq_send_command(cvq, ctrl, dlen, pkt_num);
270 		goto out_unlock;
271 	}
272 
273 	/*
274 	 * Format is enforced in qemu code:
275 	 * One TX packet for header;
276 	 * At least one TX packet per argument;
277 	 * One RX packet for ACK.
278 	 */
279 	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
280 	vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem;
281 	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
282 	vq->vq_free_cnt--;
283 	i = vq->vq_ring.desc[head].next;
284 
285 	for (k = 0; k < pkt_num; k++) {
286 		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
287 		vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
288 			+ sizeof(struct virtio_net_ctrl_hdr)
289 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
290 		vq->vq_ring.desc[i].len = dlen[k];
291 		sum += dlen[k];
292 		vq->vq_free_cnt--;
293 		i = vq->vq_ring.desc[i].next;
294 	}
295 
296 	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
297 	vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
298 			+ sizeof(struct virtio_net_ctrl_hdr);
299 	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
300 	vq->vq_free_cnt--;
301 
302 	vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
303 
304 	vq_update_avail_ring(vq, head);
305 	vq_update_avail_idx(vq);
306 
307 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
308 
309 	virtqueue_notify(vq);
310 
311 	rte_rmb();
312 	while (VIRTQUEUE_NUSED(vq) == 0) {
313 		rte_rmb();
314 		usleep(100);
315 	}
316 
317 	while (VIRTQUEUE_NUSED(vq)) {
318 		uint32_t idx, desc_idx, used_idx;
319 		struct vring_used_elem *uep;
320 
321 		used_idx = (uint32_t)(vq->vq_used_cons_idx
322 				& (vq->vq_nentries - 1));
323 		uep = &vq->vq_ring.used->ring[used_idx];
324 		idx = (uint32_t) uep->id;
325 		desc_idx = idx;
326 
327 		while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
328 			desc_idx = vq->vq_ring.desc[desc_idx].next;
329 			vq->vq_free_cnt++;
330 		}
331 
332 		vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
333 		vq->vq_desc_head_idx = idx;
334 
335 		vq->vq_used_cons_idx++;
336 		vq->vq_free_cnt++;
337 	}
338 
339 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
340 			vq->vq_free_cnt, vq->vq_desc_head_idx);
341 
342 	result = cvq->virtio_net_hdr_mz->addr;
343 
344 out_unlock:
345 	rte_spinlock_unlock(&cvq->lock);
346 	return result->status;
347 }
348 
349 static int
350 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
351 {
352 	struct virtio_hw *hw = dev->data->dev_private;
353 	struct virtio_pmd_ctrl ctrl;
354 	int dlen[1];
355 	int ret;
356 
357 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
358 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
359 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
360 
361 	dlen[0] = sizeof(uint16_t);
362 
363 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
364 	if (ret) {
365 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
366 			  "failed, this is too late now...");
367 		return -EINVAL;
368 	}
369 
370 	return 0;
371 }
372 
373 static void
374 virtio_dev_queue_release(void *queue __rte_unused)
375 {
376 	/* do nothing */
377 }
378 
379 static uint16_t
380 virtio_get_nr_vq(struct virtio_hw *hw)
381 {
382 	uint16_t nr_vq = hw->max_queue_pairs * 2;
383 
384 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
385 		nr_vq += 1;
386 
387 	return nr_vq;
388 }
389 
390 static void
391 virtio_init_vring(struct virtqueue *vq)
392 {
393 	int size = vq->vq_nentries;
394 	struct vring *vr = &vq->vq_ring;
395 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
396 
397 	PMD_INIT_FUNC_TRACE();
398 
399 	memset(ring_mem, 0, vq->vq_ring_size);
400 
401 	vq->vq_used_cons_idx = 0;
402 	vq->vq_desc_head_idx = 0;
403 	vq->vq_avail_idx = 0;
404 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
405 	vq->vq_free_cnt = vq->vq_nentries;
406 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
407 	if (vtpci_packed_queue(vq->hw)) {
408 		vring_init_packed(&vq->ring_packed, ring_mem,
409 				  VIRTIO_PCI_VRING_ALIGN, size);
410 		vring_desc_init_packed(vq, size);
411 	} else {
412 		vring_init_split(vr, ring_mem, VIRTIO_PCI_VRING_ALIGN, size);
413 		vring_desc_init_split(vr->desc, size);
414 	}
415 	/*
416 	 * Disable device(host) interrupting guest
417 	 */
418 	virtqueue_disable_intr(vq);
419 }
420 
421 static int
422 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
423 {
424 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
425 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
426 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
427 	unsigned int vq_size, size;
428 	struct virtio_hw *hw = dev->data->dev_private;
429 	struct virtnet_rx *rxvq = NULL;
430 	struct virtnet_tx *txvq = NULL;
431 	struct virtnet_ctl *cvq = NULL;
432 	struct virtqueue *vq;
433 	size_t sz_hdr_mz = 0;
434 	void *sw_ring = NULL;
435 	int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx);
436 	int ret;
437 	int numa_node = dev->device->numa_node;
438 
439 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
440 			vtpci_queue_idx, numa_node);
441 
442 	/*
443 	 * Read the virtqueue size from the Queue Size field
444 	 * Always power of 2 and if 0 virtqueue does not exist
445 	 */
446 	vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
447 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
448 	if (vq_size == 0) {
449 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
450 		return -EINVAL;
451 	}
452 
453 	if (!rte_is_power_of_2(vq_size)) {
454 		PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
455 		return -EINVAL;
456 	}
457 
458 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
459 		 dev->data->port_id, vtpci_queue_idx);
460 
461 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
462 				vq_size * sizeof(struct vq_desc_extra),
463 				RTE_CACHE_LINE_SIZE);
464 	if (queue_type == VTNET_TQ) {
465 		/*
466 		 * For each xmit packet, allocate a virtio_net_hdr
467 		 * and indirect ring elements
468 		 */
469 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
470 	} else if (queue_type == VTNET_CQ) {
471 		/* Allocate a page for control vq command, data and status */
472 		sz_hdr_mz = PAGE_SIZE;
473 	}
474 
475 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
476 				numa_node);
477 	if (vq == NULL) {
478 		PMD_INIT_LOG(ERR, "can not allocate vq");
479 		return -ENOMEM;
480 	}
481 	hw->vqs[vtpci_queue_idx] = vq;
482 
483 	vq->hw = hw;
484 	vq->vq_queue_index = vtpci_queue_idx;
485 	vq->vq_nentries = vq_size;
486 	vq->event_flags_shadow = 0;
487 	if (vtpci_packed_queue(hw)) {
488 		vq->avail_wrap_counter = 1;
489 		vq->used_wrap_counter = 1;
490 		vq->avail_used_flags =
491 			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
492 			VRING_DESC_F_USED(!vq->avail_wrap_counter);
493 	}
494 
495 	/*
496 	 * Reserve a memzone for vring elements
497 	 */
498 	size = vring_size(hw, vq_size, VIRTIO_PCI_VRING_ALIGN);
499 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
500 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
501 		     size, vq->vq_ring_size);
502 
503 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
504 			numa_node, RTE_MEMZONE_IOVA_CONTIG,
505 			VIRTIO_PCI_VRING_ALIGN);
506 	if (mz == NULL) {
507 		if (rte_errno == EEXIST)
508 			mz = rte_memzone_lookup(vq_name);
509 		if (mz == NULL) {
510 			ret = -ENOMEM;
511 			goto fail_q_alloc;
512 		}
513 	}
514 
515 	memset(mz->addr, 0, mz->len);
516 
517 	vq->vq_ring_mem = mz->iova;
518 	vq->vq_ring_virt_mem = mz->addr;
519 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
520 		     (uint64_t)mz->iova);
521 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
522 		     (uint64_t)(uintptr_t)mz->addr);
523 
524 	virtio_init_vring(vq);
525 
526 	if (sz_hdr_mz) {
527 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
528 			 dev->data->port_id, vtpci_queue_idx);
529 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
530 				numa_node, RTE_MEMZONE_IOVA_CONTIG,
531 				RTE_CACHE_LINE_SIZE);
532 		if (hdr_mz == NULL) {
533 			if (rte_errno == EEXIST)
534 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
535 			if (hdr_mz == NULL) {
536 				ret = -ENOMEM;
537 				goto fail_q_alloc;
538 			}
539 		}
540 	}
541 
542 	if (queue_type == VTNET_RQ) {
543 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
544 			       sizeof(vq->sw_ring[0]);
545 
546 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
547 				RTE_CACHE_LINE_SIZE, numa_node);
548 		if (!sw_ring) {
549 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
550 			ret = -ENOMEM;
551 			goto fail_q_alloc;
552 		}
553 
554 		vq->sw_ring = sw_ring;
555 		rxvq = &vq->rxq;
556 		rxvq->vq = vq;
557 		rxvq->port_id = dev->data->port_id;
558 		rxvq->mz = mz;
559 	} else if (queue_type == VTNET_TQ) {
560 		txvq = &vq->txq;
561 		txvq->vq = vq;
562 		txvq->port_id = dev->data->port_id;
563 		txvq->mz = mz;
564 		txvq->virtio_net_hdr_mz = hdr_mz;
565 		txvq->virtio_net_hdr_mem = hdr_mz->iova;
566 	} else if (queue_type == VTNET_CQ) {
567 		cvq = &vq->cq;
568 		cvq->vq = vq;
569 		cvq->mz = mz;
570 		cvq->virtio_net_hdr_mz = hdr_mz;
571 		cvq->virtio_net_hdr_mem = hdr_mz->iova;
572 		memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
573 
574 		hw->cvq = cvq;
575 	}
576 
577 	/* For virtio_user case (that is when hw->dev is NULL), we use
578 	 * virtual address. And we need properly set _offset_, please see
579 	 * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
580 	 */
581 	if (!hw->virtio_user_dev)
582 		vq->offset = offsetof(struct rte_mbuf, buf_iova);
583 	else {
584 		vq->vq_ring_mem = (uintptr_t)mz->addr;
585 		vq->offset = offsetof(struct rte_mbuf, buf_addr);
586 		if (queue_type == VTNET_TQ)
587 			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
588 		else if (queue_type == VTNET_CQ)
589 			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
590 	}
591 
592 	if (queue_type == VTNET_TQ) {
593 		struct virtio_tx_region *txr;
594 		unsigned int i;
595 
596 		txr = hdr_mz->addr;
597 		memset(txr, 0, vq_size * sizeof(*txr));
598 		for (i = 0; i < vq_size; i++) {
599 			struct vring_desc *start_dp = txr[i].tx_indir;
600 			struct vring_packed_desc *start_dp_packed =
601 				txr[i].tx_indir_pq;
602 
603 			/* first indirect descriptor is always the tx header */
604 			if (vtpci_packed_queue(hw)) {
605 				start_dp_packed->addr = txvq->virtio_net_hdr_mem
606 					+ i * sizeof(*txr)
607 					+ offsetof(struct virtio_tx_region,
608 						   tx_hdr);
609 				start_dp_packed->len = hw->vtnet_hdr_size;
610 			} else {
611 				vring_desc_init_split(start_dp,
612 						      RTE_DIM(txr[i].tx_indir));
613 				start_dp->addr = txvq->virtio_net_hdr_mem
614 					+ i * sizeof(*txr)
615 					+ offsetof(struct virtio_tx_region,
616 						   tx_hdr);
617 				start_dp->len = hw->vtnet_hdr_size;
618 				start_dp->flags = VRING_DESC_F_NEXT;
619 			}
620 		}
621 	}
622 
623 	if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
624 		PMD_INIT_LOG(ERR, "setup_queue failed");
625 		return -EINVAL;
626 	}
627 
628 	return 0;
629 
630 fail_q_alloc:
631 	rte_free(sw_ring);
632 	rte_memzone_free(hdr_mz);
633 	rte_memzone_free(mz);
634 	rte_free(vq);
635 
636 	return ret;
637 }
638 
639 static void
640 virtio_free_queues(struct virtio_hw *hw)
641 {
642 	uint16_t nr_vq = virtio_get_nr_vq(hw);
643 	struct virtqueue *vq;
644 	int queue_type;
645 	uint16_t i;
646 
647 	if (hw->vqs == NULL)
648 		return;
649 
650 	for (i = 0; i < nr_vq; i++) {
651 		vq = hw->vqs[i];
652 		if (!vq)
653 			continue;
654 
655 		queue_type = virtio_get_queue_type(hw, i);
656 		if (queue_type == VTNET_RQ) {
657 			rte_free(vq->sw_ring);
658 			rte_memzone_free(vq->rxq.mz);
659 		} else if (queue_type == VTNET_TQ) {
660 			rte_memzone_free(vq->txq.mz);
661 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
662 		} else {
663 			rte_memzone_free(vq->cq.mz);
664 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
665 		}
666 
667 		rte_free(vq);
668 		hw->vqs[i] = NULL;
669 	}
670 
671 	rte_free(hw->vqs);
672 	hw->vqs = NULL;
673 }
674 
675 static int
676 virtio_alloc_queues(struct rte_eth_dev *dev)
677 {
678 	struct virtio_hw *hw = dev->data->dev_private;
679 	uint16_t nr_vq = virtio_get_nr_vq(hw);
680 	uint16_t i;
681 	int ret;
682 
683 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
684 	if (!hw->vqs) {
685 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
686 		return -ENOMEM;
687 	}
688 
689 	for (i = 0; i < nr_vq; i++) {
690 		ret = virtio_init_queue(dev, i);
691 		if (ret < 0) {
692 			virtio_free_queues(hw);
693 			return ret;
694 		}
695 	}
696 
697 	return 0;
698 }
699 
700 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
701 
702 static void
703 virtio_dev_close(struct rte_eth_dev *dev)
704 {
705 	struct virtio_hw *hw = dev->data->dev_private;
706 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
707 
708 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
709 
710 	if (!hw->opened)
711 		return;
712 	hw->opened = false;
713 
714 	/* reset the NIC */
715 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
716 		VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
717 	if (intr_conf->rxq)
718 		virtio_queues_unbind_intr(dev);
719 
720 	if (intr_conf->lsc || intr_conf->rxq) {
721 		virtio_intr_disable(dev);
722 		rte_intr_efd_disable(dev->intr_handle);
723 		rte_free(dev->intr_handle->intr_vec);
724 		dev->intr_handle->intr_vec = NULL;
725 	}
726 
727 	vtpci_reset(hw);
728 	virtio_dev_free_mbufs(dev);
729 	virtio_free_queues(hw);
730 }
731 
732 static void
733 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
734 {
735 	struct virtio_hw *hw = dev->data->dev_private;
736 	struct virtio_pmd_ctrl ctrl;
737 	int dlen[1];
738 	int ret;
739 
740 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
741 		PMD_INIT_LOG(INFO, "host does not support rx control");
742 		return;
743 	}
744 
745 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
746 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
747 	ctrl.data[0] = 1;
748 	dlen[0] = 1;
749 
750 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
751 	if (ret)
752 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
753 }
754 
755 static void
756 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
757 {
758 	struct virtio_hw *hw = dev->data->dev_private;
759 	struct virtio_pmd_ctrl ctrl;
760 	int dlen[1];
761 	int ret;
762 
763 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
764 		PMD_INIT_LOG(INFO, "host does not support rx control");
765 		return;
766 	}
767 
768 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
769 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
770 	ctrl.data[0] = 0;
771 	dlen[0] = 1;
772 
773 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
774 	if (ret)
775 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
776 }
777 
778 static void
779 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
780 {
781 	struct virtio_hw *hw = dev->data->dev_private;
782 	struct virtio_pmd_ctrl ctrl;
783 	int dlen[1];
784 	int ret;
785 
786 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
787 		PMD_INIT_LOG(INFO, "host does not support rx control");
788 		return;
789 	}
790 
791 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
792 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
793 	ctrl.data[0] = 1;
794 	dlen[0] = 1;
795 
796 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
797 	if (ret)
798 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
799 }
800 
801 static void
802 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
803 {
804 	struct virtio_hw *hw = dev->data->dev_private;
805 	struct virtio_pmd_ctrl ctrl;
806 	int dlen[1];
807 	int ret;
808 
809 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
810 		PMD_INIT_LOG(INFO, "host does not support rx control");
811 		return;
812 	}
813 
814 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
815 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
816 	ctrl.data[0] = 0;
817 	dlen[0] = 1;
818 
819 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
820 	if (ret)
821 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
822 }
823 
824 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
825 static int
826 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
827 {
828 	struct virtio_hw *hw = dev->data->dev_private;
829 	uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN +
830 				 hw->vtnet_hdr_size;
831 	uint32_t frame_size = mtu + ether_hdr_len;
832 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
833 
834 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
835 
836 	if (mtu < ETHER_MIN_MTU || frame_size > max_frame_size) {
837 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
838 			ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
839 		return -EINVAL;
840 	}
841 	return 0;
842 }
843 
844 static int
845 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
846 {
847 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
848 	struct virtqueue *vq = rxvq->vq;
849 
850 	virtqueue_enable_intr(vq);
851 	return 0;
852 }
853 
854 static int
855 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
856 {
857 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
858 	struct virtqueue *vq = rxvq->vq;
859 
860 	virtqueue_disable_intr(vq);
861 	return 0;
862 }
863 
864 /*
865  * dev_ops for virtio, bare necessities for basic operation
866  */
867 static const struct eth_dev_ops virtio_eth_dev_ops = {
868 	.dev_configure           = virtio_dev_configure,
869 	.dev_start               = virtio_dev_start,
870 	.dev_stop                = virtio_dev_stop,
871 	.dev_close               = virtio_dev_close,
872 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
873 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
874 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
875 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
876 	.mtu_set                 = virtio_mtu_set,
877 	.dev_infos_get           = virtio_dev_info_get,
878 	.stats_get               = virtio_dev_stats_get,
879 	.xstats_get              = virtio_dev_xstats_get,
880 	.xstats_get_names        = virtio_dev_xstats_get_names,
881 	.stats_reset             = virtio_dev_stats_reset,
882 	.xstats_reset            = virtio_dev_stats_reset,
883 	.link_update             = virtio_dev_link_update,
884 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
885 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
886 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
887 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
888 	.rx_queue_release        = virtio_dev_queue_release,
889 	.rx_descriptor_done      = virtio_dev_rx_queue_done,
890 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
891 	.tx_queue_release        = virtio_dev_queue_release,
892 	/* collect stats per queue */
893 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
894 	.vlan_filter_set         = virtio_vlan_filter_set,
895 	.mac_addr_add            = virtio_mac_addr_add,
896 	.mac_addr_remove         = virtio_mac_addr_remove,
897 	.mac_addr_set            = virtio_mac_addr_set,
898 };
899 
900 static void
901 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
902 {
903 	unsigned i;
904 
905 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
906 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
907 		if (txvq == NULL)
908 			continue;
909 
910 		stats->opackets += txvq->stats.packets;
911 		stats->obytes += txvq->stats.bytes;
912 		stats->oerrors += txvq->stats.errors;
913 
914 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
915 			stats->q_opackets[i] = txvq->stats.packets;
916 			stats->q_obytes[i] = txvq->stats.bytes;
917 		}
918 	}
919 
920 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
921 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
922 		if (rxvq == NULL)
923 			continue;
924 
925 		stats->ipackets += rxvq->stats.packets;
926 		stats->ibytes += rxvq->stats.bytes;
927 		stats->ierrors += rxvq->stats.errors;
928 
929 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
930 			stats->q_ipackets[i] = rxvq->stats.packets;
931 			stats->q_ibytes[i] = rxvq->stats.bytes;
932 		}
933 	}
934 
935 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
936 }
937 
938 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
939 				       struct rte_eth_xstat_name *xstats_names,
940 				       __rte_unused unsigned limit)
941 {
942 	unsigned i;
943 	unsigned count = 0;
944 	unsigned t;
945 
946 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
947 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
948 
949 	if (xstats_names != NULL) {
950 		/* Note: limit checked in rte_eth_xstats_names() */
951 
952 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
953 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
954 			if (rxvq == NULL)
955 				continue;
956 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
957 				snprintf(xstats_names[count].name,
958 					sizeof(xstats_names[count].name),
959 					"rx_q%u_%s", i,
960 					rte_virtio_rxq_stat_strings[t].name);
961 				count++;
962 			}
963 		}
964 
965 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
966 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
967 			if (txvq == NULL)
968 				continue;
969 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
970 				snprintf(xstats_names[count].name,
971 					sizeof(xstats_names[count].name),
972 					"tx_q%u_%s", i,
973 					rte_virtio_txq_stat_strings[t].name);
974 				count++;
975 			}
976 		}
977 		return count;
978 	}
979 	return nstats;
980 }
981 
982 static int
983 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
984 		      unsigned n)
985 {
986 	unsigned i;
987 	unsigned count = 0;
988 
989 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
990 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
991 
992 	if (n < nstats)
993 		return nstats;
994 
995 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
996 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
997 
998 		if (rxvq == NULL)
999 			continue;
1000 
1001 		unsigned t;
1002 
1003 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1004 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1005 				rte_virtio_rxq_stat_strings[t].offset);
1006 			xstats[count].id = count;
1007 			count++;
1008 		}
1009 	}
1010 
1011 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1012 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1013 
1014 		if (txvq == NULL)
1015 			continue;
1016 
1017 		unsigned t;
1018 
1019 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1020 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
1021 				rte_virtio_txq_stat_strings[t].offset);
1022 			xstats[count].id = count;
1023 			count++;
1024 		}
1025 	}
1026 
1027 	return count;
1028 }
1029 
1030 static int
1031 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1032 {
1033 	virtio_update_stats(dev, stats);
1034 
1035 	return 0;
1036 }
1037 
1038 static void
1039 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1040 {
1041 	unsigned int i;
1042 
1043 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1044 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1045 		if (txvq == NULL)
1046 			continue;
1047 
1048 		txvq->stats.packets = 0;
1049 		txvq->stats.bytes = 0;
1050 		txvq->stats.errors = 0;
1051 		txvq->stats.multicast = 0;
1052 		txvq->stats.broadcast = 0;
1053 		memset(txvq->stats.size_bins, 0,
1054 		       sizeof(txvq->stats.size_bins[0]) * 8);
1055 	}
1056 
1057 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1058 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1059 		if (rxvq == NULL)
1060 			continue;
1061 
1062 		rxvq->stats.packets = 0;
1063 		rxvq->stats.bytes = 0;
1064 		rxvq->stats.errors = 0;
1065 		rxvq->stats.multicast = 0;
1066 		rxvq->stats.broadcast = 0;
1067 		memset(rxvq->stats.size_bins, 0,
1068 		       sizeof(rxvq->stats.size_bins[0]) * 8);
1069 	}
1070 }
1071 
1072 static void
1073 virtio_set_hwaddr(struct virtio_hw *hw)
1074 {
1075 	vtpci_write_dev_config(hw,
1076 			offsetof(struct virtio_net_config, mac),
1077 			&hw->mac_addr, ETHER_ADDR_LEN);
1078 }
1079 
1080 static void
1081 virtio_get_hwaddr(struct virtio_hw *hw)
1082 {
1083 	if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
1084 		vtpci_read_dev_config(hw,
1085 			offsetof(struct virtio_net_config, mac),
1086 			&hw->mac_addr, ETHER_ADDR_LEN);
1087 	} else {
1088 		eth_random_addr(&hw->mac_addr[0]);
1089 		virtio_set_hwaddr(hw);
1090 	}
1091 }
1092 
1093 static int
1094 virtio_mac_table_set(struct virtio_hw *hw,
1095 		     const struct virtio_net_ctrl_mac *uc,
1096 		     const struct virtio_net_ctrl_mac *mc)
1097 {
1098 	struct virtio_pmd_ctrl ctrl;
1099 	int err, len[2];
1100 
1101 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1102 		PMD_DRV_LOG(INFO, "host does not support mac table");
1103 		return -1;
1104 	}
1105 
1106 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1107 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1108 
1109 	len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
1110 	memcpy(ctrl.data, uc, len[0]);
1111 
1112 	len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
1113 	memcpy(ctrl.data + len[0], mc, len[1]);
1114 
1115 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1116 	if (err != 0)
1117 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1118 	return err;
1119 }
1120 
1121 static int
1122 virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
1123 		    uint32_t index, uint32_t vmdq __rte_unused)
1124 {
1125 	struct virtio_hw *hw = dev->data->dev_private;
1126 	const struct ether_addr *addrs = dev->data->mac_addrs;
1127 	unsigned int i;
1128 	struct virtio_net_ctrl_mac *uc, *mc;
1129 
1130 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1131 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1132 		return -EINVAL;
1133 	}
1134 
1135 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1136 	uc->entries = 0;
1137 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1138 	mc->entries = 0;
1139 
1140 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1141 		const struct ether_addr *addr
1142 			= (i == index) ? mac_addr : addrs + i;
1143 		struct virtio_net_ctrl_mac *tbl
1144 			= is_multicast_ether_addr(addr) ? mc : uc;
1145 
1146 		memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
1147 	}
1148 
1149 	return virtio_mac_table_set(hw, uc, mc);
1150 }
1151 
1152 static void
1153 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1154 {
1155 	struct virtio_hw *hw = dev->data->dev_private;
1156 	struct ether_addr *addrs = dev->data->mac_addrs;
1157 	struct virtio_net_ctrl_mac *uc, *mc;
1158 	unsigned int i;
1159 
1160 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1161 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1162 		return;
1163 	}
1164 
1165 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1166 	uc->entries = 0;
1167 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1168 	mc->entries = 0;
1169 
1170 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1171 		struct virtio_net_ctrl_mac *tbl;
1172 
1173 		if (i == index || is_zero_ether_addr(addrs + i))
1174 			continue;
1175 
1176 		tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
1177 		memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
1178 	}
1179 
1180 	virtio_mac_table_set(hw, uc, mc);
1181 }
1182 
1183 static int
1184 virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
1185 {
1186 	struct virtio_hw *hw = dev->data->dev_private;
1187 
1188 	memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
1189 
1190 	/* Use atomic update if available */
1191 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1192 		struct virtio_pmd_ctrl ctrl;
1193 		int len = ETHER_ADDR_LEN;
1194 
1195 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1196 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1197 
1198 		memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
1199 		return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1200 	}
1201 
1202 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
1203 		return -ENOTSUP;
1204 
1205 	virtio_set_hwaddr(hw);
1206 	return 0;
1207 }
1208 
1209 static int
1210 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1211 {
1212 	struct virtio_hw *hw = dev->data->dev_private;
1213 	struct virtio_pmd_ctrl ctrl;
1214 	int len;
1215 
1216 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1217 		return -ENOTSUP;
1218 
1219 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1220 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1221 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1222 	len = sizeof(vlan_id);
1223 
1224 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1225 }
1226 
1227 static int
1228 virtio_intr_enable(struct rte_eth_dev *dev)
1229 {
1230 	struct virtio_hw *hw = dev->data->dev_private;
1231 
1232 	if (rte_intr_enable(dev->intr_handle) < 0)
1233 		return -1;
1234 
1235 	if (!hw->virtio_user_dev)
1236 		hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1237 
1238 	return 0;
1239 }
1240 
1241 static int
1242 virtio_intr_disable(struct rte_eth_dev *dev)
1243 {
1244 	struct virtio_hw *hw = dev->data->dev_private;
1245 
1246 	if (rte_intr_disable(dev->intr_handle) < 0)
1247 		return -1;
1248 
1249 	if (!hw->virtio_user_dev)
1250 		hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1251 
1252 	return 0;
1253 }
1254 
1255 static int
1256 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1257 {
1258 	uint64_t host_features;
1259 
1260 	/* Prepare guest_features: feature that driver wants to support */
1261 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1262 		req_features);
1263 
1264 	/* Read device(host) feature bits */
1265 	host_features = VTPCI_OPS(hw)->get_features(hw);
1266 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1267 		host_features);
1268 
1269 	/* If supported, ensure MTU value is valid before acknowledging it. */
1270 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1271 		struct virtio_net_config config;
1272 
1273 		vtpci_read_dev_config(hw,
1274 			offsetof(struct virtio_net_config, mtu),
1275 			&config.mtu, sizeof(config.mtu));
1276 
1277 		if (config.mtu < ETHER_MIN_MTU)
1278 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1279 	}
1280 
1281 	/*
1282 	 * Negotiate features: Subset of device feature bits are written back
1283 	 * guest feature bits.
1284 	 */
1285 	hw->guest_features = req_features;
1286 	hw->guest_features = vtpci_negotiate_features(hw, host_features);
1287 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1288 		hw->guest_features);
1289 
1290 	if (hw->modern) {
1291 		if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
1292 			PMD_INIT_LOG(ERR,
1293 				"VIRTIO_F_VERSION_1 features is not enabled.");
1294 			return -1;
1295 		}
1296 		vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1297 		if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1298 			PMD_INIT_LOG(ERR,
1299 				"failed to set FEATURES_OK status!");
1300 			return -1;
1301 		}
1302 	}
1303 
1304 	hw->req_guest_features = req_features;
1305 
1306 	return 0;
1307 }
1308 
1309 int
1310 virtio_dev_pause(struct rte_eth_dev *dev)
1311 {
1312 	struct virtio_hw *hw = dev->data->dev_private;
1313 
1314 	rte_spinlock_lock(&hw->state_lock);
1315 
1316 	if (hw->started == 0) {
1317 		/* Device is just stopped. */
1318 		rte_spinlock_unlock(&hw->state_lock);
1319 		return -1;
1320 	}
1321 	hw->started = 0;
1322 	/*
1323 	 * Prevent the worker threads from touching queues to avoid contention,
1324 	 * 1 ms should be enough for the ongoing Tx function to finish.
1325 	 */
1326 	rte_delay_ms(1);
1327 	return 0;
1328 }
1329 
1330 /*
1331  * Recover hw state to let the worker threads continue.
1332  */
1333 void
1334 virtio_dev_resume(struct rte_eth_dev *dev)
1335 {
1336 	struct virtio_hw *hw = dev->data->dev_private;
1337 
1338 	hw->started = 1;
1339 	rte_spinlock_unlock(&hw->state_lock);
1340 }
1341 
1342 /*
1343  * Should be called only after device is paused.
1344  */
1345 int
1346 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1347 		int nb_pkts)
1348 {
1349 	struct virtio_hw *hw = dev->data->dev_private;
1350 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1351 	int ret;
1352 
1353 	hw->inject_pkts = tx_pkts;
1354 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1355 	hw->inject_pkts = NULL;
1356 
1357 	return ret;
1358 }
1359 
1360 static void
1361 virtio_notify_peers(struct rte_eth_dev *dev)
1362 {
1363 	struct virtio_hw *hw = dev->data->dev_private;
1364 	struct virtnet_rx *rxvq;
1365 	struct rte_mbuf *rarp_mbuf;
1366 
1367 	if (!dev->data->rx_queues)
1368 		return;
1369 
1370 	rxvq = dev->data->rx_queues[0];
1371 	if (!rxvq)
1372 		return;
1373 
1374 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1375 			(struct ether_addr *)hw->mac_addr);
1376 	if (rarp_mbuf == NULL) {
1377 		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1378 		return;
1379 	}
1380 
1381 	/* If virtio port just stopped, no need to send RARP */
1382 	if (virtio_dev_pause(dev) < 0) {
1383 		rte_pktmbuf_free(rarp_mbuf);
1384 		return;
1385 	}
1386 
1387 	virtio_inject_pkts(dev, &rarp_mbuf, 1);
1388 	virtio_dev_resume(dev);
1389 }
1390 
1391 static void
1392 virtio_ack_link_announce(struct rte_eth_dev *dev)
1393 {
1394 	struct virtio_hw *hw = dev->data->dev_private;
1395 	struct virtio_pmd_ctrl ctrl;
1396 
1397 	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1398 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1399 
1400 	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1401 }
1402 
1403 /*
1404  * Process virtio config changed interrupt. Call the callback
1405  * if link state changed, generate gratuitous RARP packet if
1406  * the status indicates an ANNOUNCE.
1407  */
1408 void
1409 virtio_interrupt_handler(void *param)
1410 {
1411 	struct rte_eth_dev *dev = param;
1412 	struct virtio_hw *hw = dev->data->dev_private;
1413 	uint8_t isr;
1414 	uint16_t status;
1415 
1416 	/* Read interrupt status which clears interrupt */
1417 	isr = vtpci_isr(hw);
1418 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1419 
1420 	if (virtio_intr_enable(dev) < 0)
1421 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1422 
1423 	if (isr & VIRTIO_PCI_ISR_CONFIG) {
1424 		if (virtio_dev_link_update(dev, 0) == 0)
1425 			_rte_eth_dev_callback_process(dev,
1426 						      RTE_ETH_EVENT_INTR_LSC,
1427 						      NULL);
1428 
1429 		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1430 			vtpci_read_dev_config(hw,
1431 				offsetof(struct virtio_net_config, status),
1432 				&status, sizeof(status));
1433 			if (status & VIRTIO_NET_S_ANNOUNCE) {
1434 				virtio_notify_peers(dev);
1435 				if (hw->cvq)
1436 					virtio_ack_link_announce(dev);
1437 			}
1438 		}
1439 	}
1440 }
1441 
1442 /* set rx and tx handlers according to what is supported */
1443 static void
1444 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1445 {
1446 	struct virtio_hw *hw = eth_dev->data->dev_private;
1447 
1448 	if (vtpci_packed_queue(hw)) {
1449 		PMD_INIT_LOG(INFO,
1450 			"virtio: using packed ring standard Tx path on port %u",
1451 			eth_dev->data->port_id);
1452 		eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1453 	} else {
1454 		if (hw->use_inorder_tx) {
1455 			PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1456 				eth_dev->data->port_id);
1457 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1458 		} else {
1459 			PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1460 				eth_dev->data->port_id);
1461 			eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1462 		}
1463 	}
1464 
1465 	if (vtpci_packed_queue(hw)) {
1466 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1467 			PMD_INIT_LOG(INFO,
1468 				"virtio: using packed ring mergeable buffer Rx path on port %u",
1469 				eth_dev->data->port_id);
1470 			eth_dev->rx_pkt_burst =
1471 				&virtio_recv_mergeable_pkts_packed;
1472 		} else {
1473 			PMD_INIT_LOG(INFO,
1474 				"virtio: using packed ring standard Rx path on port %u",
1475 				eth_dev->data->port_id);
1476 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1477 		}
1478 	} else {
1479 		if (hw->use_simple_rx) {
1480 			PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
1481 				eth_dev->data->port_id);
1482 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1483 		} else if (hw->use_inorder_rx) {
1484 			PMD_INIT_LOG(INFO,
1485 				"virtio: using inorder Rx path on port %u",
1486 				eth_dev->data->port_id);
1487 			eth_dev->rx_pkt_burst =	&virtio_recv_pkts_inorder;
1488 		} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1489 			PMD_INIT_LOG(INFO,
1490 				"virtio: using mergeable buffer Rx path on port %u",
1491 				eth_dev->data->port_id);
1492 			eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1493 		} else {
1494 			PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1495 				eth_dev->data->port_id);
1496 			eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1497 		}
1498 	}
1499 
1500 }
1501 
1502 /* Only support 1:1 queue/interrupt mapping so far.
1503  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1504  * interrupt vectors (<N+1).
1505  */
1506 static int
1507 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1508 {
1509 	uint32_t i;
1510 	struct virtio_hw *hw = dev->data->dev_private;
1511 
1512 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1513 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1514 		dev->intr_handle->intr_vec[i] = i + 1;
1515 		if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1516 						 VIRTIO_MSI_NO_VECTOR) {
1517 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1518 			return -EBUSY;
1519 		}
1520 	}
1521 
1522 	return 0;
1523 }
1524 
1525 static void
1526 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1527 {
1528 	uint32_t i;
1529 	struct virtio_hw *hw = dev->data->dev_private;
1530 
1531 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1532 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1533 		VTPCI_OPS(hw)->set_queue_irq(hw,
1534 					     hw->vqs[i * VTNET_CQ],
1535 					     VIRTIO_MSI_NO_VECTOR);
1536 }
1537 
1538 static int
1539 virtio_configure_intr(struct rte_eth_dev *dev)
1540 {
1541 	struct virtio_hw *hw = dev->data->dev_private;
1542 
1543 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1544 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1545 		return -ENOTSUP;
1546 	}
1547 
1548 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1549 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1550 		return -1;
1551 	}
1552 
1553 	if (!dev->intr_handle->intr_vec) {
1554 		dev->intr_handle->intr_vec =
1555 			rte_zmalloc("intr_vec",
1556 				    hw->max_queue_pairs * sizeof(int), 0);
1557 		if (!dev->intr_handle->intr_vec) {
1558 			PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1559 				     hw->max_queue_pairs);
1560 			return -ENOMEM;
1561 		}
1562 	}
1563 
1564 	/* Re-register callback to update max_intr */
1565 	rte_intr_callback_unregister(dev->intr_handle,
1566 				     virtio_interrupt_handler,
1567 				     dev);
1568 	rte_intr_callback_register(dev->intr_handle,
1569 				   virtio_interrupt_handler,
1570 				   dev);
1571 
1572 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1573 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1574 	 * And for legacy devices, this should be done before queue/vec binding
1575 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1576 	 * (22) will be ignored.
1577 	 */
1578 	if (virtio_intr_enable(dev) < 0) {
1579 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1580 		return -1;
1581 	}
1582 
1583 	if (virtio_queues_bind_intr(dev) < 0) {
1584 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1585 		return -1;
1586 	}
1587 
1588 	return 0;
1589 }
1590 
1591 /* reset device and renegotiate features if needed */
1592 static int
1593 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1594 {
1595 	struct virtio_hw *hw = eth_dev->data->dev_private;
1596 	struct virtio_net_config *config;
1597 	struct virtio_net_config local_config;
1598 	struct rte_pci_device *pci_dev = NULL;
1599 	int ret;
1600 
1601 	/* Reset the device although not necessary at startup */
1602 	vtpci_reset(hw);
1603 
1604 	if (hw->vqs) {
1605 		virtio_dev_free_mbufs(eth_dev);
1606 		virtio_free_queues(hw);
1607 	}
1608 
1609 	/* Tell the host we've noticed this device. */
1610 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1611 
1612 	/* Tell the host we've known how to drive the device. */
1613 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1614 	if (virtio_negotiate_features(hw, req_features) < 0)
1615 		return -1;
1616 
1617 	hw->weak_barriers = !vtpci_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1618 
1619 	if (!hw->virtio_user_dev) {
1620 		pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1621 		rte_eth_copy_pci_info(eth_dev, pci_dev);
1622 	}
1623 
1624 	/* If host does not support both status and MSI-X then disable LSC */
1625 	if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
1626 	    hw->use_msix != VIRTIO_MSIX_NONE)
1627 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1628 	else
1629 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1630 
1631 	/* Setting up rx_header size for the device */
1632 	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1633 	    vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
1634 	    vtpci_with_feature(hw, VIRTIO_F_RING_PACKED))
1635 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1636 	else
1637 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1638 
1639 	/* Copy the permanent MAC address to: virtio_hw */
1640 	virtio_get_hwaddr(hw);
1641 	ether_addr_copy((struct ether_addr *) hw->mac_addr,
1642 			&eth_dev->data->mac_addrs[0]);
1643 	PMD_INIT_LOG(DEBUG,
1644 		     "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1645 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1646 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1647 
1648 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1649 		config = &local_config;
1650 
1651 		vtpci_read_dev_config(hw,
1652 			offsetof(struct virtio_net_config, mac),
1653 			&config->mac, sizeof(config->mac));
1654 
1655 		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1656 			vtpci_read_dev_config(hw,
1657 				offsetof(struct virtio_net_config, status),
1658 				&config->status, sizeof(config->status));
1659 		} else {
1660 			PMD_INIT_LOG(DEBUG,
1661 				     "VIRTIO_NET_F_STATUS is not supported");
1662 			config->status = 0;
1663 		}
1664 
1665 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
1666 			vtpci_read_dev_config(hw,
1667 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
1668 				&config->max_virtqueue_pairs,
1669 				sizeof(config->max_virtqueue_pairs));
1670 		} else {
1671 			PMD_INIT_LOG(DEBUG,
1672 				     "VIRTIO_NET_F_MQ is not supported");
1673 			config->max_virtqueue_pairs = 1;
1674 		}
1675 
1676 		hw->max_queue_pairs = config->max_virtqueue_pairs;
1677 
1678 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
1679 			vtpci_read_dev_config(hw,
1680 				offsetof(struct virtio_net_config, mtu),
1681 				&config->mtu,
1682 				sizeof(config->mtu));
1683 
1684 			/*
1685 			 * MTU value has already been checked at negotiation
1686 			 * time, but check again in case it has changed since
1687 			 * then, which should not happen.
1688 			 */
1689 			if (config->mtu < ETHER_MIN_MTU) {
1690 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1691 						config->mtu);
1692 				return -1;
1693 			}
1694 
1695 			hw->max_mtu = config->mtu;
1696 			/* Set initial MTU to maximum one supported by vhost */
1697 			eth_dev->data->mtu = config->mtu;
1698 
1699 		} else {
1700 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1701 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
1702 		}
1703 
1704 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1705 				config->max_virtqueue_pairs);
1706 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1707 		PMD_INIT_LOG(DEBUG,
1708 				"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1709 				config->mac[0], config->mac[1],
1710 				config->mac[2], config->mac[3],
1711 				config->mac[4], config->mac[5]);
1712 	} else {
1713 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1714 		hw->max_queue_pairs = 1;
1715 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1716 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
1717 	}
1718 
1719 	ret = virtio_alloc_queues(eth_dev);
1720 	if (ret < 0)
1721 		return ret;
1722 
1723 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1724 		if (virtio_configure_intr(eth_dev) < 0) {
1725 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
1726 			return -1;
1727 		}
1728 	}
1729 
1730 	vtpci_reinit_complete(hw);
1731 
1732 	if (pci_dev)
1733 		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
1734 			eth_dev->data->port_id, pci_dev->id.vendor_id,
1735 			pci_dev->id.device_id);
1736 
1737 	return 0;
1738 }
1739 
1740 /*
1741  * Remap the PCI device again (IO port map for legacy device and
1742  * memory map for modern device), so that the secondary process
1743  * could have the PCI initiated correctly.
1744  */
1745 static int
1746 virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
1747 {
1748 	if (hw->modern) {
1749 		/*
1750 		 * We don't have to re-parse the PCI config space, since
1751 		 * rte_pci_map_device() makes sure the mapped address
1752 		 * in secondary process would equal to the one mapped in
1753 		 * the primary process: error will be returned if that
1754 		 * requirement is not met.
1755 		 *
1756 		 * That said, we could simply reuse all cap pointers
1757 		 * (such as dev_cfg, common_cfg, etc.) parsed from the
1758 		 * primary process, which is stored in shared memory.
1759 		 */
1760 		if (rte_pci_map_device(pci_dev)) {
1761 			PMD_INIT_LOG(DEBUG, "failed to map pci device!");
1762 			return -1;
1763 		}
1764 	} else {
1765 		if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
1766 			return -1;
1767 	}
1768 
1769 	return 0;
1770 }
1771 
1772 static void
1773 virtio_set_vtpci_ops(struct virtio_hw *hw)
1774 {
1775 #ifdef RTE_VIRTIO_USER
1776 	if (hw->virtio_user_dev)
1777 		VTPCI_OPS(hw) = &virtio_user_ops;
1778 	else
1779 #endif
1780 	if (hw->modern)
1781 		VTPCI_OPS(hw) = &modern_ops;
1782 	else
1783 		VTPCI_OPS(hw) = &legacy_ops;
1784 }
1785 
1786 /*
1787  * This function is based on probe() function in virtio_pci.c
1788  * It returns 0 on success.
1789  */
1790 int
1791 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1792 {
1793 	struct virtio_hw *hw = eth_dev->data->dev_private;
1794 	int ret;
1795 
1796 	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
1797 
1798 	eth_dev->dev_ops = &virtio_eth_dev_ops;
1799 
1800 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1801 		if (!hw->virtio_user_dev) {
1802 			ret = virtio_remap_pci(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1803 			if (ret)
1804 				return ret;
1805 		}
1806 
1807 		virtio_set_vtpci_ops(hw);
1808 		set_rxtx_funcs(eth_dev);
1809 
1810 		return 0;
1811 	}
1812 
1813 	/* Allocate memory for storing MAC addresses */
1814 	eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
1815 	if (eth_dev->data->mac_addrs == NULL) {
1816 		PMD_INIT_LOG(ERR,
1817 			"Failed to allocate %d bytes needed to store MAC addresses",
1818 			VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN);
1819 		return -ENOMEM;
1820 	}
1821 
1822 	hw->port_id = eth_dev->data->port_id;
1823 	/* For virtio_user case the hw->virtio_user_dev is populated by
1824 	 * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
1825 	 */
1826 	if (!hw->virtio_user_dev) {
1827 		ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1828 		if (ret)
1829 			goto out;
1830 	}
1831 
1832 	/* reset device and negotiate default features */
1833 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1834 	if (ret < 0)
1835 		goto out;
1836 
1837 	return 0;
1838 
1839 out:
1840 	rte_free(eth_dev->data->mac_addrs);
1841 	return ret;
1842 }
1843 
1844 static int
1845 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
1846 {
1847 	PMD_INIT_FUNC_TRACE();
1848 
1849 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
1850 		return 0;
1851 
1852 	virtio_dev_stop(eth_dev);
1853 	virtio_dev_close(eth_dev);
1854 
1855 	eth_dev->dev_ops = NULL;
1856 	eth_dev->tx_pkt_burst = NULL;
1857 	eth_dev->rx_pkt_burst = NULL;
1858 
1859 	if (eth_dev->device)
1860 		rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev));
1861 
1862 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
1863 
1864 	return 0;
1865 }
1866 
1867 static int vdpa_check_handler(__rte_unused const char *key,
1868 		const char *value, __rte_unused void *opaque)
1869 {
1870 	if (strcmp(value, "1"))
1871 		return -1;
1872 
1873 	return 0;
1874 }
1875 
1876 static int
1877 vdpa_mode_selected(struct rte_devargs *devargs)
1878 {
1879 	struct rte_kvargs *kvlist;
1880 	const char *key = "vdpa";
1881 	int ret = 0;
1882 
1883 	if (devargs == NULL)
1884 		return 0;
1885 
1886 	kvlist = rte_kvargs_parse(devargs->args, NULL);
1887 	if (kvlist == NULL)
1888 		return 0;
1889 
1890 	if (!rte_kvargs_count(kvlist, key))
1891 		goto exit;
1892 
1893 	/* vdpa mode selected when there's a key-value pair: vdpa=1 */
1894 	if (rte_kvargs_process(kvlist, key,
1895 				vdpa_check_handler, NULL) < 0) {
1896 		goto exit;
1897 	}
1898 	ret = 1;
1899 
1900 exit:
1901 	rte_kvargs_free(kvlist);
1902 	return ret;
1903 }
1904 
1905 static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1906 	struct rte_pci_device *pci_dev)
1907 {
1908 	if (rte_eal_iopl_init() != 0) {
1909 		PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
1910 		return 1;
1911 	}
1912 
1913 	/* virtio pmd skips probe if device needs to work in vdpa mode */
1914 	if (vdpa_mode_selected(pci_dev->device.devargs))
1915 		return 1;
1916 
1917 	return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw),
1918 		eth_virtio_dev_init);
1919 }
1920 
1921 static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
1922 {
1923 	return rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit);
1924 }
1925 
1926 static struct rte_pci_driver rte_virtio_pmd = {
1927 	.driver = {
1928 		.name = "net_virtio",
1929 	},
1930 	.id_table = pci_id_virtio_map,
1931 	.drv_flags = 0,
1932 	.probe = eth_virtio_pci_probe,
1933 	.remove = eth_virtio_pci_remove,
1934 };
1935 
1936 RTE_INIT(rte_virtio_pmd_init)
1937 {
1938 	rte_eal_iopl_init();
1939 	rte_pci_register(&rte_virtio_pmd);
1940 }
1941 
1942 static bool
1943 rx_offload_enabled(struct virtio_hw *hw)
1944 {
1945 	return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
1946 		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
1947 		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
1948 }
1949 
1950 static bool
1951 tx_offload_enabled(struct virtio_hw *hw)
1952 {
1953 	return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
1954 		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
1955 		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
1956 }
1957 
1958 /*
1959  * Configure virtio device
1960  * It returns 0 on success.
1961  */
1962 static int
1963 virtio_dev_configure(struct rte_eth_dev *dev)
1964 {
1965 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1966 	const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
1967 	struct virtio_hw *hw = dev->data->dev_private;
1968 	uint64_t rx_offloads = rxmode->offloads;
1969 	uint64_t tx_offloads = txmode->offloads;
1970 	uint64_t req_features;
1971 	int ret;
1972 
1973 	PMD_INIT_LOG(DEBUG, "configure");
1974 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
1975 
1976 	if (dev->data->dev_conf.intr_conf.rxq) {
1977 		ret = virtio_init_device(dev, hw->req_guest_features);
1978 		if (ret < 0)
1979 			return ret;
1980 	}
1981 
1982 	if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
1983 			   DEV_RX_OFFLOAD_TCP_CKSUM))
1984 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
1985 
1986 	if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
1987 		req_features |=
1988 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
1989 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
1990 
1991 	if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
1992 			   DEV_TX_OFFLOAD_TCP_CKSUM))
1993 		req_features |= (1ULL << VIRTIO_NET_F_CSUM);
1994 
1995 	if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
1996 		req_features |=
1997 			(1ULL << VIRTIO_NET_F_HOST_TSO4) |
1998 			(1ULL << VIRTIO_NET_F_HOST_TSO6);
1999 
2000 	/* if request features changed, reinit the device */
2001 	if (req_features != hw->req_guest_features) {
2002 		ret = virtio_init_device(dev, req_features);
2003 		if (ret < 0)
2004 			return ret;
2005 	}
2006 
2007 	if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2008 			    DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2009 		!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2010 		PMD_DRV_LOG(ERR,
2011 			"rx checksum not available on this host");
2012 		return -ENOTSUP;
2013 	}
2014 
2015 	if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2016 		(!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2017 		 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2018 		PMD_DRV_LOG(ERR,
2019 			"Large Receive Offload not available on this host");
2020 		return -ENOTSUP;
2021 	}
2022 
2023 	/* start control queue */
2024 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2025 		virtio_dev_cq_start(dev);
2026 
2027 	if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2028 		hw->vlan_strip = 1;
2029 
2030 	if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2031 	    && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2032 		PMD_DRV_LOG(ERR,
2033 			    "vlan filtering not available on this host");
2034 		return -ENOTSUP;
2035 	}
2036 
2037 	hw->has_tx_offload = tx_offload_enabled(hw);
2038 	hw->has_rx_offload = rx_offload_enabled(hw);
2039 
2040 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2041 		/* Enable vector (0) for Link State Intrerrupt */
2042 		if (VTPCI_OPS(hw)->set_config_irq(hw, 0) ==
2043 				VIRTIO_MSI_NO_VECTOR) {
2044 			PMD_DRV_LOG(ERR, "failed to set config vector");
2045 			return -EBUSY;
2046 		}
2047 
2048 	rte_spinlock_init(&hw->state_lock);
2049 
2050 	hw->use_simple_rx = 1;
2051 
2052 	if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2053 		hw->use_inorder_tx = 1;
2054 		hw->use_inorder_rx = 1;
2055 		hw->use_simple_rx = 0;
2056 	}
2057 
2058 	if (vtpci_packed_queue(hw)) {
2059 		hw->use_simple_rx = 0;
2060 		hw->use_inorder_rx = 0;
2061 		hw->use_inorder_tx = 0;
2062 	}
2063 
2064 #if defined RTE_ARCH_ARM64 || defined RTE_ARCH_ARM
2065 	if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2066 		hw->use_simple_rx = 0;
2067 	}
2068 #endif
2069 	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2070 		 hw->use_simple_rx = 0;
2071 	}
2072 
2073 	if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2074 			   DEV_RX_OFFLOAD_TCP_CKSUM |
2075 			   DEV_RX_OFFLOAD_TCP_LRO |
2076 			   DEV_RX_OFFLOAD_VLAN_STRIP))
2077 		hw->use_simple_rx = 0;
2078 
2079 	hw->opened = true;
2080 
2081 	return 0;
2082 }
2083 
2084 
2085 static int
2086 virtio_dev_start(struct rte_eth_dev *dev)
2087 {
2088 	uint16_t nb_queues, i;
2089 	struct virtnet_rx *rxvq;
2090 	struct virtnet_tx *txvq __rte_unused;
2091 	struct virtio_hw *hw = dev->data->dev_private;
2092 	int ret;
2093 
2094 	/* Finish the initialization of the queues */
2095 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2096 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
2097 		if (ret < 0)
2098 			return ret;
2099 	}
2100 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2101 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
2102 		if (ret < 0)
2103 			return ret;
2104 	}
2105 
2106 	/* check if lsc interrupt feature is enabled */
2107 	if (dev->data->dev_conf.intr_conf.lsc) {
2108 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2109 			PMD_DRV_LOG(ERR, "link status not supported by host");
2110 			return -ENOTSUP;
2111 		}
2112 	}
2113 
2114 	/* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2115 	 * in device configure, but it could be unmapped  when device is
2116 	 * stopped.
2117 	 */
2118 	if (dev->data->dev_conf.intr_conf.lsc ||
2119 	    dev->data->dev_conf.intr_conf.rxq) {
2120 		virtio_intr_disable(dev);
2121 
2122 		/* Setup interrupt callback  */
2123 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2124 			rte_intr_callback_register(dev->intr_handle,
2125 						   virtio_interrupt_handler,
2126 						   dev);
2127 
2128 		if (virtio_intr_enable(dev) < 0) {
2129 			PMD_DRV_LOG(ERR, "interrupt enable failed");
2130 			return -EIO;
2131 		}
2132 	}
2133 
2134 	/*Notify the backend
2135 	 *Otherwise the tap backend might already stop its queue due to fullness.
2136 	 *vhost backend will have no chance to be waked up
2137 	 */
2138 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2139 	if (hw->max_queue_pairs > 1) {
2140 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2141 			return -EINVAL;
2142 	}
2143 
2144 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2145 
2146 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2147 		rxvq = dev->data->rx_queues[i];
2148 		/* Flush the old packets */
2149 		virtqueue_rxvq_flush(rxvq->vq);
2150 		virtqueue_notify(rxvq->vq);
2151 	}
2152 
2153 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2154 		txvq = dev->data->tx_queues[i];
2155 		virtqueue_notify(txvq->vq);
2156 	}
2157 
2158 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2159 
2160 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2161 		rxvq = dev->data->rx_queues[i];
2162 		VIRTQUEUE_DUMP(rxvq->vq);
2163 	}
2164 
2165 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2166 		txvq = dev->data->tx_queues[i];
2167 		VIRTQUEUE_DUMP(txvq->vq);
2168 	}
2169 
2170 	set_rxtx_funcs(dev);
2171 	hw->started = true;
2172 
2173 	/* Initialize Link state */
2174 	virtio_dev_link_update(dev, 0);
2175 
2176 	return 0;
2177 }
2178 
2179 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2180 {
2181 	struct virtio_hw *hw = dev->data->dev_private;
2182 	uint16_t nr_vq = virtio_get_nr_vq(hw);
2183 	const char *type __rte_unused;
2184 	unsigned int i, mbuf_num = 0;
2185 	struct virtqueue *vq;
2186 	struct rte_mbuf *buf;
2187 	int queue_type;
2188 
2189 	if (hw->vqs == NULL)
2190 		return;
2191 
2192 	for (i = 0; i < nr_vq; i++) {
2193 		vq = hw->vqs[i];
2194 		if (!vq)
2195 			continue;
2196 
2197 		queue_type = virtio_get_queue_type(hw, i);
2198 		if (queue_type == VTNET_RQ)
2199 			type = "rxq";
2200 		else if (queue_type == VTNET_TQ)
2201 			type = "txq";
2202 		else
2203 			continue;
2204 
2205 		PMD_INIT_LOG(DEBUG,
2206 			"Before freeing %s[%d] used and unused buf",
2207 			type, i);
2208 		VIRTQUEUE_DUMP(vq);
2209 
2210 		while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2211 			rte_pktmbuf_free(buf);
2212 			mbuf_num++;
2213 		}
2214 
2215 		PMD_INIT_LOG(DEBUG,
2216 			"After freeing %s[%d] used and unused buf",
2217 			type, i);
2218 		VIRTQUEUE_DUMP(vq);
2219 	}
2220 
2221 	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2222 }
2223 
2224 /*
2225  * Stop device: disable interrupt and mark link down
2226  */
2227 static void
2228 virtio_dev_stop(struct rte_eth_dev *dev)
2229 {
2230 	struct virtio_hw *hw = dev->data->dev_private;
2231 	struct rte_eth_link link;
2232 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2233 
2234 	PMD_INIT_LOG(DEBUG, "stop");
2235 
2236 	rte_spinlock_lock(&hw->state_lock);
2237 	if (!hw->started)
2238 		goto out_unlock;
2239 	hw->started = false;
2240 
2241 	if (intr_conf->lsc || intr_conf->rxq) {
2242 		virtio_intr_disable(dev);
2243 
2244 		/* Reset interrupt callback  */
2245 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2246 			rte_intr_callback_unregister(dev->intr_handle,
2247 						     virtio_interrupt_handler,
2248 						     dev);
2249 		}
2250 	}
2251 
2252 	memset(&link, 0, sizeof(link));
2253 	rte_eth_linkstatus_set(dev, &link);
2254 out_unlock:
2255 	rte_spinlock_unlock(&hw->state_lock);
2256 }
2257 
2258 static int
2259 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2260 {
2261 	struct rte_eth_link link;
2262 	uint16_t status;
2263 	struct virtio_hw *hw = dev->data->dev_private;
2264 
2265 	memset(&link, 0, sizeof(link));
2266 	link.link_duplex = ETH_LINK_FULL_DUPLEX;
2267 	link.link_speed  = ETH_SPEED_NUM_10G;
2268 	link.link_autoneg = ETH_LINK_FIXED;
2269 
2270 	if (!hw->started) {
2271 		link.link_status = ETH_LINK_DOWN;
2272 	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2273 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
2274 		vtpci_read_dev_config(hw,
2275 				offsetof(struct virtio_net_config, status),
2276 				&status, sizeof(status));
2277 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2278 			link.link_status = ETH_LINK_DOWN;
2279 			PMD_INIT_LOG(DEBUG, "Port %d is down",
2280 				     dev->data->port_id);
2281 		} else {
2282 			link.link_status = ETH_LINK_UP;
2283 			PMD_INIT_LOG(DEBUG, "Port %d is up",
2284 				     dev->data->port_id);
2285 		}
2286 	} else {
2287 		link.link_status = ETH_LINK_UP;
2288 	}
2289 
2290 	return rte_eth_linkstatus_set(dev, &link);
2291 }
2292 
2293 static int
2294 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2295 {
2296 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2297 	struct virtio_hw *hw = dev->data->dev_private;
2298 	uint64_t offloads = rxmode->offloads;
2299 
2300 	if (mask & ETH_VLAN_FILTER_MASK) {
2301 		if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2302 				!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2303 
2304 			PMD_DRV_LOG(NOTICE,
2305 				"vlan filtering not available on this host");
2306 
2307 			return -ENOTSUP;
2308 		}
2309 	}
2310 
2311 	if (mask & ETH_VLAN_STRIP_MASK)
2312 		hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2313 
2314 	return 0;
2315 }
2316 
2317 static void
2318 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2319 {
2320 	uint64_t tso_mask, host_features;
2321 	struct virtio_hw *hw = dev->data->dev_private;
2322 
2323 	dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */
2324 
2325 	dev_info->max_rx_queues =
2326 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2327 	dev_info->max_tx_queues =
2328 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2329 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2330 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2331 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2332 
2333 	host_features = VTPCI_OPS(hw)->get_features(hw);
2334 	dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2335 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2336 		dev_info->rx_offload_capa |=
2337 			DEV_RX_OFFLOAD_TCP_CKSUM |
2338 			DEV_RX_OFFLOAD_UDP_CKSUM;
2339 	}
2340 	if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2341 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2342 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2343 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2344 	if ((host_features & tso_mask) == tso_mask)
2345 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2346 
2347 	dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2348 				    DEV_TX_OFFLOAD_VLAN_INSERT;
2349 	if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2350 		dev_info->tx_offload_capa |=
2351 			DEV_TX_OFFLOAD_UDP_CKSUM |
2352 			DEV_TX_OFFLOAD_TCP_CKSUM;
2353 	}
2354 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2355 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
2356 	if ((host_features & tso_mask) == tso_mask)
2357 		dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2358 }
2359 
2360 /*
2361  * It enables testpmd to collect per queue stats.
2362  */
2363 static int
2364 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2365 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2366 __rte_unused uint8_t is_rx)
2367 {
2368 	return 0;
2369 }
2370 
2371 RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__);
2372 RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map);
2373 RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio-pci");
2374 
2375 RTE_INIT(virtio_init_log)
2376 {
2377 	virtio_logtype_init = rte_log_register("pmd.net.virtio.init");
2378 	if (virtio_logtype_init >= 0)
2379 		rte_log_set_level(virtio_logtype_init, RTE_LOG_NOTICE);
2380 	virtio_logtype_driver = rte_log_register("pmd.net.virtio.driver");
2381 	if (virtio_logtype_driver >= 0)
2382 		rte_log_set_level(virtio_logtype_driver, RTE_LOG_NOTICE);
2383 }
2384