xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision c9902a15bd005b6d4fe072cf7b60fe4ee679155f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30 
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38 
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47 	uint32_t *speed,
48 	int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50 				struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52 	int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54 
55 static void virtio_set_hwaddr(struct virtio_hw *hw);
56 static void virtio_get_hwaddr(struct virtio_hw *hw);
57 
58 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
59 				 struct rte_eth_stats *stats);
60 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
61 				 struct rte_eth_xstat *xstats, unsigned n);
62 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
63 				       struct rte_eth_xstat_name *xstats_names,
64 				       unsigned limit);
65 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
66 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
67 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
68 				uint16_t vlan_id, int on);
69 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
70 				struct rte_ether_addr *mac_addr,
71 				uint32_t index, uint32_t vmdq);
72 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
73 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
74 				struct rte_ether_addr *mac_addr);
75 
76 static int virtio_intr_disable(struct rte_eth_dev *dev);
77 
78 static int virtio_dev_queue_stats_mapping_set(
79 	struct rte_eth_dev *eth_dev,
80 	uint16_t queue_id,
81 	uint8_t stat_idx,
82 	uint8_t is_rx);
83 
84 static void virtio_notify_peers(struct rte_eth_dev *dev);
85 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
86 
87 struct rte_virtio_xstats_name_off {
88 	char name[RTE_ETH_XSTATS_NAME_SIZE];
89 	unsigned offset;
90 };
91 
92 /* [rt]x_qX_ is prepended to the name string here */
93 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
94 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
95 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
96 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
97 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
98 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
99 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
100 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
101 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
102 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
103 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
104 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
105 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
106 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
107 };
108 
109 /* [rt]x_qX_ is prepended to the name string here */
110 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
111 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
112 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
113 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
114 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
115 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
116 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
117 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
118 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
119 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
120 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
121 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
122 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
123 };
124 
125 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
126 			    sizeof(rte_virtio_rxq_stat_strings[0]))
127 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
128 			    sizeof(rte_virtio_txq_stat_strings[0]))
129 
130 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
131 
132 static struct virtio_pmd_ctrl *
133 virtio_send_command_packed(struct virtnet_ctl *cvq,
134 			   struct virtio_pmd_ctrl *ctrl,
135 			   int *dlen, int pkt_num)
136 {
137 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
138 	int head;
139 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
140 	struct virtio_pmd_ctrl *result;
141 	uint16_t flags;
142 	int sum = 0;
143 	int nb_descs = 0;
144 	int k;
145 
146 	/*
147 	 * Format is enforced in qemu code:
148 	 * One TX packet for header;
149 	 * At least one TX packet per argument;
150 	 * One RX packet for ACK.
151 	 */
152 	head = vq->vq_avail_idx;
153 	flags = vq->vq_packed.cached_flags;
154 	desc[head].addr = cvq->virtio_net_hdr_mem;
155 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
156 	vq->vq_free_cnt--;
157 	nb_descs++;
158 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
159 		vq->vq_avail_idx -= vq->vq_nentries;
160 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
161 	}
162 
163 	for (k = 0; k < pkt_num; k++) {
164 		desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
165 			+ sizeof(struct virtio_net_ctrl_hdr)
166 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
167 		desc[vq->vq_avail_idx].len = dlen[k];
168 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
169 			vq->vq_packed.cached_flags;
170 		sum += dlen[k];
171 		vq->vq_free_cnt--;
172 		nb_descs++;
173 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
174 			vq->vq_avail_idx -= vq->vq_nentries;
175 			vq->vq_packed.cached_flags ^=
176 				VRING_PACKED_DESC_F_AVAIL_USED;
177 		}
178 	}
179 
180 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
181 		+ sizeof(struct virtio_net_ctrl_hdr);
182 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
183 	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
184 		vq->vq_packed.cached_flags;
185 	vq->vq_free_cnt--;
186 	nb_descs++;
187 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
188 		vq->vq_avail_idx -= vq->vq_nentries;
189 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
190 	}
191 
192 	virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
193 			vq->hw->weak_barriers);
194 
195 	virtio_wmb(vq->hw->weak_barriers);
196 	virtqueue_notify(vq);
197 
198 	/* wait for used desc in virtqueue
199 	 * desc_is_used has a load-acquire or rte_io_rmb inside
200 	 */
201 	while (!desc_is_used(&desc[head], vq))
202 		usleep(100);
203 
204 	/* now get used descriptors */
205 	vq->vq_free_cnt += nb_descs;
206 	vq->vq_used_cons_idx += nb_descs;
207 	if (vq->vq_used_cons_idx >= vq->vq_nentries) {
208 		vq->vq_used_cons_idx -= vq->vq_nentries;
209 		vq->vq_packed.used_wrap_counter ^= 1;
210 	}
211 
212 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
213 			"vq->vq_avail_idx=%d\n"
214 			"vq->vq_used_cons_idx=%d\n"
215 			"vq->vq_packed.cached_flags=0x%x\n"
216 			"vq->vq_packed.used_wrap_counter=%d\n",
217 			vq->vq_free_cnt,
218 			vq->vq_avail_idx,
219 			vq->vq_used_cons_idx,
220 			vq->vq_packed.cached_flags,
221 			vq->vq_packed.used_wrap_counter);
222 
223 	result = cvq->virtio_net_hdr_mz->addr;
224 	return result;
225 }
226 
227 static struct virtio_pmd_ctrl *
228 virtio_send_command_split(struct virtnet_ctl *cvq,
229 			  struct virtio_pmd_ctrl *ctrl,
230 			  int *dlen, int pkt_num)
231 {
232 	struct virtio_pmd_ctrl *result;
233 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
234 	uint32_t head, i;
235 	int k, sum = 0;
236 
237 	head = vq->vq_desc_head_idx;
238 
239 	/*
240 	 * Format is enforced in qemu code:
241 	 * One TX packet for header;
242 	 * At least one TX packet per argument;
243 	 * One RX packet for ACK.
244 	 */
245 	vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
246 	vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
247 	vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
248 	vq->vq_free_cnt--;
249 	i = vq->vq_split.ring.desc[head].next;
250 
251 	for (k = 0; k < pkt_num; k++) {
252 		vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
253 		vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
254 			+ sizeof(struct virtio_net_ctrl_hdr)
255 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
256 		vq->vq_split.ring.desc[i].len = dlen[k];
257 		sum += dlen[k];
258 		vq->vq_free_cnt--;
259 		i = vq->vq_split.ring.desc[i].next;
260 	}
261 
262 	vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
263 	vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
264 			+ sizeof(struct virtio_net_ctrl_hdr);
265 	vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
266 	vq->vq_free_cnt--;
267 
268 	vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
269 
270 	vq_update_avail_ring(vq, head);
271 	vq_update_avail_idx(vq);
272 
273 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
274 
275 	virtqueue_notify(vq);
276 
277 	while (virtqueue_nused(vq) == 0)
278 		usleep(100);
279 
280 	while (virtqueue_nused(vq)) {
281 		uint32_t idx, desc_idx, used_idx;
282 		struct vring_used_elem *uep;
283 
284 		used_idx = (uint32_t)(vq->vq_used_cons_idx
285 				& (vq->vq_nentries - 1));
286 		uep = &vq->vq_split.ring.used->ring[used_idx];
287 		idx = (uint32_t) uep->id;
288 		desc_idx = idx;
289 
290 		while (vq->vq_split.ring.desc[desc_idx].flags &
291 				VRING_DESC_F_NEXT) {
292 			desc_idx = vq->vq_split.ring.desc[desc_idx].next;
293 			vq->vq_free_cnt++;
294 		}
295 
296 		vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
297 		vq->vq_desc_head_idx = idx;
298 
299 		vq->vq_used_cons_idx++;
300 		vq->vq_free_cnt++;
301 	}
302 
303 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
304 			vq->vq_free_cnt, vq->vq_desc_head_idx);
305 
306 	result = cvq->virtio_net_hdr_mz->addr;
307 	return result;
308 }
309 
310 static int
311 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
312 		    int *dlen, int pkt_num)
313 {
314 	virtio_net_ctrl_ack status = ~0;
315 	struct virtio_pmd_ctrl *result;
316 	struct virtqueue *vq;
317 
318 	ctrl->status = status;
319 
320 	if (!cvq) {
321 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
322 		return -1;
323 	}
324 
325 	rte_spinlock_lock(&cvq->lock);
326 	vq = virtnet_cq_to_vq(cvq);
327 
328 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
329 		"vq->hw->cvq = %p vq = %p",
330 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
331 
332 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
333 		rte_spinlock_unlock(&cvq->lock);
334 		return -1;
335 	}
336 
337 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
338 		sizeof(struct virtio_pmd_ctrl));
339 
340 	if (virtio_with_packed_queue(vq->hw))
341 		result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
342 	else
343 		result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
344 
345 	rte_spinlock_unlock(&cvq->lock);
346 	return result->status;
347 }
348 
349 static int
350 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
351 {
352 	struct virtio_hw *hw = dev->data->dev_private;
353 	struct virtio_pmd_ctrl ctrl;
354 	int dlen[1];
355 	int ret;
356 
357 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
358 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
359 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
360 
361 	dlen[0] = sizeof(uint16_t);
362 
363 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
364 	if (ret) {
365 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
366 			  "failed, this is too late now...");
367 		return -EINVAL;
368 	}
369 
370 	return 0;
371 }
372 
373 static void
374 virtio_dev_queue_release(void *queue __rte_unused)
375 {
376 	/* do nothing */
377 }
378 
379 static uint16_t
380 virtio_get_nr_vq(struct virtio_hw *hw)
381 {
382 	uint16_t nr_vq = hw->max_queue_pairs * 2;
383 
384 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
385 		nr_vq += 1;
386 
387 	return nr_vq;
388 }
389 
390 static void
391 virtio_init_vring(struct virtqueue *vq)
392 {
393 	int size = vq->vq_nentries;
394 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
395 
396 	PMD_INIT_FUNC_TRACE();
397 
398 	memset(ring_mem, 0, vq->vq_ring_size);
399 
400 	vq->vq_used_cons_idx = 0;
401 	vq->vq_desc_head_idx = 0;
402 	vq->vq_avail_idx = 0;
403 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
404 	vq->vq_free_cnt = vq->vq_nentries;
405 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
406 	if (virtio_with_packed_queue(vq->hw)) {
407 		vring_init_packed(&vq->vq_packed.ring, ring_mem,
408 				  VIRTIO_VRING_ALIGN, size);
409 		vring_desc_init_packed(vq, size);
410 	} else {
411 		struct vring *vr = &vq->vq_split.ring;
412 
413 		vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
414 		vring_desc_init_split(vr->desc, size);
415 	}
416 	/*
417 	 * Disable device(host) interrupting guest
418 	 */
419 	virtqueue_disable_intr(vq);
420 }
421 
422 static int
423 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
424 {
425 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
426 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
427 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
428 	unsigned int vq_size, size;
429 	struct virtio_hw *hw = dev->data->dev_private;
430 	struct virtnet_rx *rxvq = NULL;
431 	struct virtnet_tx *txvq = NULL;
432 	struct virtnet_ctl *cvq = NULL;
433 	struct virtqueue *vq;
434 	size_t sz_hdr_mz = 0;
435 	void *sw_ring = NULL;
436 	int queue_type = virtio_get_queue_type(hw, queue_idx);
437 	int ret;
438 	int numa_node = dev->device->numa_node;
439 	struct rte_mbuf *fake_mbuf = NULL;
440 
441 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
442 			queue_idx, numa_node);
443 
444 	/*
445 	 * Read the virtqueue size from the Queue Size field
446 	 * Always power of 2 and if 0 virtqueue does not exist
447 	 */
448 	vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
449 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
450 	if (vq_size == 0) {
451 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
452 		return -EINVAL;
453 	}
454 
455 	if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
456 		PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
457 		return -EINVAL;
458 	}
459 
460 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
461 		 dev->data->port_id, queue_idx);
462 
463 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
464 				vq_size * sizeof(struct vq_desc_extra),
465 				RTE_CACHE_LINE_SIZE);
466 	if (queue_type == VTNET_TQ) {
467 		/*
468 		 * For each xmit packet, allocate a virtio_net_hdr
469 		 * and indirect ring elements
470 		 */
471 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
472 	} else if (queue_type == VTNET_CQ) {
473 		/* Allocate a page for control vq command, data and status */
474 		sz_hdr_mz = rte_mem_page_size();
475 	}
476 
477 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
478 				numa_node);
479 	if (vq == NULL) {
480 		PMD_INIT_LOG(ERR, "can not allocate vq");
481 		return -ENOMEM;
482 	}
483 	hw->vqs[queue_idx] = vq;
484 
485 	vq->hw = hw;
486 	vq->vq_queue_index = queue_idx;
487 	vq->vq_nentries = vq_size;
488 	if (virtio_with_packed_queue(hw)) {
489 		vq->vq_packed.used_wrap_counter = 1;
490 		vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
491 		vq->vq_packed.event_flags_shadow = 0;
492 		if (queue_type == VTNET_RQ)
493 			vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
494 	}
495 
496 	/*
497 	 * Reserve a memzone for vring elements
498 	 */
499 	size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
500 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
501 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
502 		     size, vq->vq_ring_size);
503 
504 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
505 			numa_node, RTE_MEMZONE_IOVA_CONTIG,
506 			VIRTIO_VRING_ALIGN);
507 	if (mz == NULL) {
508 		if (rte_errno == EEXIST)
509 			mz = rte_memzone_lookup(vq_name);
510 		if (mz == NULL) {
511 			ret = -ENOMEM;
512 			goto free_vq;
513 		}
514 	}
515 
516 	memset(mz->addr, 0, mz->len);
517 
518 	vq->vq_ring_mem = mz->iova;
519 	vq->vq_ring_virt_mem = mz->addr;
520 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
521 		     (uint64_t)mz->iova);
522 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
523 		     (uint64_t)(uintptr_t)mz->addr);
524 
525 	virtio_init_vring(vq);
526 
527 	if (sz_hdr_mz) {
528 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
529 			 dev->data->port_id, queue_idx);
530 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
531 				numa_node, RTE_MEMZONE_IOVA_CONTIG,
532 				RTE_CACHE_LINE_SIZE);
533 		if (hdr_mz == NULL) {
534 			if (rte_errno == EEXIST)
535 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
536 			if (hdr_mz == NULL) {
537 				ret = -ENOMEM;
538 				goto free_mz;
539 			}
540 		}
541 	}
542 
543 	if (queue_type == VTNET_RQ) {
544 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
545 			       sizeof(vq->sw_ring[0]);
546 
547 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
548 				RTE_CACHE_LINE_SIZE, numa_node);
549 		if (!sw_ring) {
550 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
551 			ret = -ENOMEM;
552 			goto free_hdr_mz;
553 		}
554 
555 		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
556 				RTE_CACHE_LINE_SIZE, numa_node);
557 		if (!fake_mbuf) {
558 			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
559 			ret = -ENOMEM;
560 			goto free_sw_ring;
561 		}
562 
563 		vq->sw_ring = sw_ring;
564 		rxvq = &vq->rxq;
565 		rxvq->port_id = dev->data->port_id;
566 		rxvq->mz = mz;
567 		rxvq->fake_mbuf = fake_mbuf;
568 	} else if (queue_type == VTNET_TQ) {
569 		txvq = &vq->txq;
570 		txvq->port_id = dev->data->port_id;
571 		txvq->mz = mz;
572 		txvq->virtio_net_hdr_mz = hdr_mz;
573 		txvq->virtio_net_hdr_mem = hdr_mz->iova;
574 	} else if (queue_type == VTNET_CQ) {
575 		cvq = &vq->cq;
576 		cvq->mz = mz;
577 		cvq->virtio_net_hdr_mz = hdr_mz;
578 		cvq->virtio_net_hdr_mem = hdr_mz->iova;
579 		memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
580 
581 		hw->cvq = cvq;
582 	}
583 
584 	if (queue_type == VTNET_TQ) {
585 		struct virtio_tx_region *txr;
586 		unsigned int i;
587 
588 		txr = hdr_mz->addr;
589 		memset(txr, 0, vq_size * sizeof(*txr));
590 		for (i = 0; i < vq_size; i++) {
591 			/* first indirect descriptor is always the tx header */
592 			if (!virtio_with_packed_queue(hw)) {
593 				struct vring_desc *start_dp = txr[i].tx_indir;
594 				vring_desc_init_split(start_dp,
595 						      RTE_DIM(txr[i].tx_indir));
596 				start_dp->addr = txvq->virtio_net_hdr_mem
597 					+ i * sizeof(*txr)
598 					+ offsetof(struct virtio_tx_region,
599 						   tx_hdr);
600 				start_dp->len = hw->vtnet_hdr_size;
601 				start_dp->flags = VRING_DESC_F_NEXT;
602 			} else {
603 				struct vring_packed_desc *start_dp =
604 					txr[i].tx_packed_indir;
605 				vring_desc_init_indirect_packed(start_dp,
606 				      RTE_DIM(txr[i].tx_packed_indir));
607 				start_dp->addr = txvq->virtio_net_hdr_mem
608 					+ i * sizeof(*txr)
609 					+ offsetof(struct virtio_tx_region,
610 						   tx_hdr);
611 				start_dp->len = hw->vtnet_hdr_size;
612 			}
613 		}
614 	}
615 
616 	if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
617 		PMD_INIT_LOG(ERR, "setup_queue failed");
618 		ret = -EINVAL;
619 		goto clean_vq;
620 	}
621 
622 	return 0;
623 
624 clean_vq:
625 	hw->cvq = NULL;
626 	rte_free(fake_mbuf);
627 free_sw_ring:
628 	rte_free(sw_ring);
629 free_hdr_mz:
630 	rte_memzone_free(hdr_mz);
631 free_mz:
632 	rte_memzone_free(mz);
633 free_vq:
634 	rte_free(vq);
635 	hw->vqs[queue_idx] = NULL;
636 
637 	return ret;
638 }
639 
640 static void
641 virtio_free_queues(struct virtio_hw *hw)
642 {
643 	uint16_t nr_vq = virtio_get_nr_vq(hw);
644 	struct virtqueue *vq;
645 	int queue_type;
646 	uint16_t i;
647 
648 	if (hw->vqs == NULL)
649 		return;
650 
651 	for (i = 0; i < nr_vq; i++) {
652 		vq = hw->vqs[i];
653 		if (!vq)
654 			continue;
655 
656 		queue_type = virtio_get_queue_type(hw, i);
657 		if (queue_type == VTNET_RQ) {
658 			rte_free(vq->rxq.fake_mbuf);
659 			rte_free(vq->sw_ring);
660 			rte_memzone_free(vq->rxq.mz);
661 		} else if (queue_type == VTNET_TQ) {
662 			rte_memzone_free(vq->txq.mz);
663 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
664 		} else {
665 			rte_memzone_free(vq->cq.mz);
666 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
667 		}
668 
669 		rte_free(vq);
670 		hw->vqs[i] = NULL;
671 	}
672 
673 	rte_free(hw->vqs);
674 	hw->vqs = NULL;
675 }
676 
677 static int
678 virtio_alloc_queues(struct rte_eth_dev *dev)
679 {
680 	struct virtio_hw *hw = dev->data->dev_private;
681 	uint16_t nr_vq = virtio_get_nr_vq(hw);
682 	uint16_t i;
683 	int ret;
684 
685 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
686 	if (!hw->vqs) {
687 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
688 		return -ENOMEM;
689 	}
690 
691 	for (i = 0; i < nr_vq; i++) {
692 		ret = virtio_init_queue(dev, i);
693 		if (ret < 0) {
694 			virtio_free_queues(hw);
695 			return ret;
696 		}
697 	}
698 
699 	return 0;
700 }
701 
702 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
703 
704 int
705 virtio_dev_close(struct rte_eth_dev *dev)
706 {
707 	struct virtio_hw *hw = dev->data->dev_private;
708 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
709 
710 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
711 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
712 		return 0;
713 
714 	if (!hw->opened)
715 		return 0;
716 	hw->opened = 0;
717 
718 	/* reset the NIC */
719 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
720 		VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
721 	if (intr_conf->rxq)
722 		virtio_queues_unbind_intr(dev);
723 
724 	if (intr_conf->lsc || intr_conf->rxq) {
725 		virtio_intr_disable(dev);
726 		rte_intr_efd_disable(dev->intr_handle);
727 		rte_free(dev->intr_handle->intr_vec);
728 		dev->intr_handle->intr_vec = NULL;
729 	}
730 
731 	virtio_reset(hw);
732 	virtio_dev_free_mbufs(dev);
733 	virtio_free_queues(hw);
734 
735 	return VIRTIO_OPS(hw)->dev_close(hw);
736 }
737 
738 static int
739 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
740 {
741 	struct virtio_hw *hw = dev->data->dev_private;
742 	struct virtio_pmd_ctrl ctrl;
743 	int dlen[1];
744 	int ret;
745 
746 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
747 		PMD_INIT_LOG(INFO, "host does not support rx control");
748 		return -ENOTSUP;
749 	}
750 
751 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
752 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
753 	ctrl.data[0] = 1;
754 	dlen[0] = 1;
755 
756 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
757 	if (ret) {
758 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
759 		return -EAGAIN;
760 	}
761 
762 	return 0;
763 }
764 
765 static int
766 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
767 {
768 	struct virtio_hw *hw = dev->data->dev_private;
769 	struct virtio_pmd_ctrl ctrl;
770 	int dlen[1];
771 	int ret;
772 
773 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
774 		PMD_INIT_LOG(INFO, "host does not support rx control");
775 		return -ENOTSUP;
776 	}
777 
778 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
779 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
780 	ctrl.data[0] = 0;
781 	dlen[0] = 1;
782 
783 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
784 	if (ret) {
785 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
786 		return -EAGAIN;
787 	}
788 
789 	return 0;
790 }
791 
792 static int
793 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
794 {
795 	struct virtio_hw *hw = dev->data->dev_private;
796 	struct virtio_pmd_ctrl ctrl;
797 	int dlen[1];
798 	int ret;
799 
800 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
801 		PMD_INIT_LOG(INFO, "host does not support rx control");
802 		return -ENOTSUP;
803 	}
804 
805 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
806 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
807 	ctrl.data[0] = 1;
808 	dlen[0] = 1;
809 
810 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
811 	if (ret) {
812 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
813 		return -EAGAIN;
814 	}
815 
816 	return 0;
817 }
818 
819 static int
820 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
821 {
822 	struct virtio_hw *hw = dev->data->dev_private;
823 	struct virtio_pmd_ctrl ctrl;
824 	int dlen[1];
825 	int ret;
826 
827 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
828 		PMD_INIT_LOG(INFO, "host does not support rx control");
829 		return -ENOTSUP;
830 	}
831 
832 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
833 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
834 	ctrl.data[0] = 0;
835 	dlen[0] = 1;
836 
837 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
838 	if (ret) {
839 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
840 		return -EAGAIN;
841 	}
842 
843 	return 0;
844 }
845 
846 uint16_t
847 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
848 {
849 	return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
850 }
851 
852 bool
853 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
854 			bool rx_scatter_enabled, const char **error)
855 {
856 	if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
857 		*error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
858 		return false;
859 	}
860 
861 	return true;
862 }
863 
864 static bool
865 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
866 				      uint16_t frame_size)
867 {
868 	struct virtio_hw *hw = dev->data->dev_private;
869 	struct virtnet_rx *rxvq;
870 	struct virtqueue *vq;
871 	unsigned int qidx;
872 	uint16_t buf_size;
873 	const char *error;
874 
875 	if (hw->vqs == NULL)
876 		return true;
877 
878 	for (qidx = 0; (vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX]) != NULL;
879 	     qidx++) {
880 		rxvq = &vq->rxq;
881 		if (rxvq->mpool == NULL)
882 			continue;
883 		buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
884 
885 		if (!virtio_rx_check_scatter(frame_size, buf_size,
886 					     hw->rx_ol_scatter, &error)) {
887 			PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
888 				     qidx, error);
889 			return false;
890 		}
891 	}
892 
893 	return true;
894 }
895 
896 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
897 static int
898 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
899 {
900 	struct virtio_hw *hw = dev->data->dev_private;
901 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
902 				 hw->vtnet_hdr_size;
903 	uint32_t frame_size = mtu + ether_hdr_len;
904 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
905 
906 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
907 
908 	if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
909 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
910 			RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
911 		return -EINVAL;
912 	}
913 
914 	if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
915 		PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
916 		return -EINVAL;
917 	}
918 
919 	hw->max_rx_pkt_len = frame_size;
920 	dev->data->dev_conf.rxmode.max_rx_pkt_len = hw->max_rx_pkt_len;
921 
922 	return 0;
923 }
924 
925 static int
926 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
927 {
928 	struct virtio_hw *hw = dev->data->dev_private;
929 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
930 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
931 
932 	virtqueue_enable_intr(vq);
933 	virtio_mb(hw->weak_barriers);
934 	return 0;
935 }
936 
937 static int
938 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
939 {
940 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
941 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
942 
943 	virtqueue_disable_intr(vq);
944 	return 0;
945 }
946 
947 /*
948  * dev_ops for virtio, bare necessities for basic operation
949  */
950 static const struct eth_dev_ops virtio_eth_dev_ops = {
951 	.dev_configure           = virtio_dev_configure,
952 	.dev_start               = virtio_dev_start,
953 	.dev_stop                = virtio_dev_stop,
954 	.dev_close               = virtio_dev_close,
955 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
956 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
957 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
958 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
959 	.mtu_set                 = virtio_mtu_set,
960 	.dev_infos_get           = virtio_dev_info_get,
961 	.stats_get               = virtio_dev_stats_get,
962 	.xstats_get              = virtio_dev_xstats_get,
963 	.xstats_get_names        = virtio_dev_xstats_get_names,
964 	.stats_reset             = virtio_dev_stats_reset,
965 	.xstats_reset            = virtio_dev_stats_reset,
966 	.link_update             = virtio_dev_link_update,
967 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
968 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
969 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
970 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
971 	.rx_queue_release        = virtio_dev_queue_release,
972 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
973 	.tx_queue_release        = virtio_dev_queue_release,
974 	/* collect stats per queue */
975 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
976 	.vlan_filter_set         = virtio_vlan_filter_set,
977 	.mac_addr_add            = virtio_mac_addr_add,
978 	.mac_addr_remove         = virtio_mac_addr_remove,
979 	.mac_addr_set            = virtio_mac_addr_set,
980 };
981 
982 /*
983  * dev_ops for virtio-user in secondary processes, as we just have
984  * some limited supports currently.
985  */
986 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
987 	.dev_infos_get           = virtio_dev_info_get,
988 	.stats_get               = virtio_dev_stats_get,
989 	.xstats_get              = virtio_dev_xstats_get,
990 	.xstats_get_names        = virtio_dev_xstats_get_names,
991 	.stats_reset             = virtio_dev_stats_reset,
992 	.xstats_reset            = virtio_dev_stats_reset,
993 	/* collect stats per queue */
994 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
995 };
996 
997 static void
998 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
999 {
1000 	unsigned i;
1001 
1002 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1003 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1004 		if (txvq == NULL)
1005 			continue;
1006 
1007 		stats->opackets += txvq->stats.packets;
1008 		stats->obytes += txvq->stats.bytes;
1009 
1010 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1011 			stats->q_opackets[i] = txvq->stats.packets;
1012 			stats->q_obytes[i] = txvq->stats.bytes;
1013 		}
1014 	}
1015 
1016 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1017 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1018 		if (rxvq == NULL)
1019 			continue;
1020 
1021 		stats->ipackets += rxvq->stats.packets;
1022 		stats->ibytes += rxvq->stats.bytes;
1023 		stats->ierrors += rxvq->stats.errors;
1024 
1025 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1026 			stats->q_ipackets[i] = rxvq->stats.packets;
1027 			stats->q_ibytes[i] = rxvq->stats.bytes;
1028 		}
1029 	}
1030 
1031 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1032 }
1033 
1034 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1035 				       struct rte_eth_xstat_name *xstats_names,
1036 				       __rte_unused unsigned limit)
1037 {
1038 	unsigned i;
1039 	unsigned count = 0;
1040 	unsigned t;
1041 
1042 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1043 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1044 
1045 	if (xstats_names != NULL) {
1046 		/* Note: limit checked in rte_eth_xstats_names() */
1047 
1048 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1049 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1050 			if (rxvq == NULL)
1051 				continue;
1052 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1053 				snprintf(xstats_names[count].name,
1054 					sizeof(xstats_names[count].name),
1055 					"rx_q%u_%s", i,
1056 					rte_virtio_rxq_stat_strings[t].name);
1057 				count++;
1058 			}
1059 		}
1060 
1061 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1062 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
1063 			if (txvq == NULL)
1064 				continue;
1065 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1066 				snprintf(xstats_names[count].name,
1067 					sizeof(xstats_names[count].name),
1068 					"tx_q%u_%s", i,
1069 					rte_virtio_txq_stat_strings[t].name);
1070 				count++;
1071 			}
1072 		}
1073 		return count;
1074 	}
1075 	return nstats;
1076 }
1077 
1078 static int
1079 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1080 		      unsigned n)
1081 {
1082 	unsigned i;
1083 	unsigned count = 0;
1084 
1085 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1086 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1087 
1088 	if (n < nstats)
1089 		return nstats;
1090 
1091 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1092 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1093 
1094 		if (rxvq == NULL)
1095 			continue;
1096 
1097 		unsigned t;
1098 
1099 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1100 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1101 				rte_virtio_rxq_stat_strings[t].offset);
1102 			xstats[count].id = count;
1103 			count++;
1104 		}
1105 	}
1106 
1107 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1108 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1109 
1110 		if (txvq == NULL)
1111 			continue;
1112 
1113 		unsigned t;
1114 
1115 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1116 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
1117 				rte_virtio_txq_stat_strings[t].offset);
1118 			xstats[count].id = count;
1119 			count++;
1120 		}
1121 	}
1122 
1123 	return count;
1124 }
1125 
1126 static int
1127 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1128 {
1129 	virtio_update_stats(dev, stats);
1130 
1131 	return 0;
1132 }
1133 
1134 static int
1135 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1136 {
1137 	unsigned int i;
1138 
1139 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1140 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1141 		if (txvq == NULL)
1142 			continue;
1143 
1144 		txvq->stats.packets = 0;
1145 		txvq->stats.bytes = 0;
1146 		txvq->stats.multicast = 0;
1147 		txvq->stats.broadcast = 0;
1148 		memset(txvq->stats.size_bins, 0,
1149 		       sizeof(txvq->stats.size_bins[0]) * 8);
1150 	}
1151 
1152 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1153 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1154 		if (rxvq == NULL)
1155 			continue;
1156 
1157 		rxvq->stats.packets = 0;
1158 		rxvq->stats.bytes = 0;
1159 		rxvq->stats.errors = 0;
1160 		rxvq->stats.multicast = 0;
1161 		rxvq->stats.broadcast = 0;
1162 		memset(rxvq->stats.size_bins, 0,
1163 		       sizeof(rxvq->stats.size_bins[0]) * 8);
1164 	}
1165 
1166 	return 0;
1167 }
1168 
1169 static void
1170 virtio_set_hwaddr(struct virtio_hw *hw)
1171 {
1172 	virtio_write_dev_config(hw,
1173 			offsetof(struct virtio_net_config, mac),
1174 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1175 }
1176 
1177 static void
1178 virtio_get_hwaddr(struct virtio_hw *hw)
1179 {
1180 	if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1181 		virtio_read_dev_config(hw,
1182 			offsetof(struct virtio_net_config, mac),
1183 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1184 	} else {
1185 		rte_eth_random_addr(&hw->mac_addr[0]);
1186 		virtio_set_hwaddr(hw);
1187 	}
1188 }
1189 
1190 static int
1191 virtio_mac_table_set(struct virtio_hw *hw,
1192 		     const struct virtio_net_ctrl_mac *uc,
1193 		     const struct virtio_net_ctrl_mac *mc)
1194 {
1195 	struct virtio_pmd_ctrl ctrl;
1196 	int err, len[2];
1197 
1198 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1199 		PMD_DRV_LOG(INFO, "host does not support mac table");
1200 		return -1;
1201 	}
1202 
1203 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1204 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1205 
1206 	len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1207 	memcpy(ctrl.data, uc, len[0]);
1208 
1209 	len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1210 	memcpy(ctrl.data + len[0], mc, len[1]);
1211 
1212 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1213 	if (err != 0)
1214 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1215 	return err;
1216 }
1217 
1218 static int
1219 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1220 		    uint32_t index, uint32_t vmdq __rte_unused)
1221 {
1222 	struct virtio_hw *hw = dev->data->dev_private;
1223 	const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1224 	unsigned int i;
1225 	struct virtio_net_ctrl_mac *uc, *mc;
1226 
1227 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1228 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1229 		return -EINVAL;
1230 	}
1231 
1232 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1233 		sizeof(uc->entries));
1234 	uc->entries = 0;
1235 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1236 		sizeof(mc->entries));
1237 	mc->entries = 0;
1238 
1239 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1240 		const struct rte_ether_addr *addr
1241 			= (i == index) ? mac_addr : addrs + i;
1242 		struct virtio_net_ctrl_mac *tbl
1243 			= rte_is_multicast_ether_addr(addr) ? mc : uc;
1244 
1245 		memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1246 	}
1247 
1248 	return virtio_mac_table_set(hw, uc, mc);
1249 }
1250 
1251 static void
1252 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1253 {
1254 	struct virtio_hw *hw = dev->data->dev_private;
1255 	struct rte_ether_addr *addrs = dev->data->mac_addrs;
1256 	struct virtio_net_ctrl_mac *uc, *mc;
1257 	unsigned int i;
1258 
1259 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1260 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1261 		return;
1262 	}
1263 
1264 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1265 		sizeof(uc->entries));
1266 	uc->entries = 0;
1267 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1268 		sizeof(mc->entries));
1269 	mc->entries = 0;
1270 
1271 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1272 		struct virtio_net_ctrl_mac *tbl;
1273 
1274 		if (i == index || rte_is_zero_ether_addr(addrs + i))
1275 			continue;
1276 
1277 		tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1278 		memcpy(&tbl->macs[tbl->entries++], addrs + i,
1279 			RTE_ETHER_ADDR_LEN);
1280 	}
1281 
1282 	virtio_mac_table_set(hw, uc, mc);
1283 }
1284 
1285 static int
1286 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1287 {
1288 	struct virtio_hw *hw = dev->data->dev_private;
1289 
1290 	memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1291 
1292 	/* Use atomic update if available */
1293 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1294 		struct virtio_pmd_ctrl ctrl;
1295 		int len = RTE_ETHER_ADDR_LEN;
1296 
1297 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1298 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1299 
1300 		memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1301 		return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1302 	}
1303 
1304 	if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1305 		return -ENOTSUP;
1306 
1307 	virtio_set_hwaddr(hw);
1308 	return 0;
1309 }
1310 
1311 static int
1312 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1313 {
1314 	struct virtio_hw *hw = dev->data->dev_private;
1315 	struct virtio_pmd_ctrl ctrl;
1316 	int len;
1317 
1318 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1319 		return -ENOTSUP;
1320 
1321 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1322 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1323 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1324 	len = sizeof(vlan_id);
1325 
1326 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1327 }
1328 
1329 static int
1330 virtio_intr_unmask(struct rte_eth_dev *dev)
1331 {
1332 	struct virtio_hw *hw = dev->data->dev_private;
1333 
1334 	if (rte_intr_ack(dev->intr_handle) < 0)
1335 		return -1;
1336 
1337 	if (VIRTIO_OPS(hw)->intr_detect)
1338 		VIRTIO_OPS(hw)->intr_detect(hw);
1339 
1340 	return 0;
1341 }
1342 
1343 static int
1344 virtio_intr_enable(struct rte_eth_dev *dev)
1345 {
1346 	struct virtio_hw *hw = dev->data->dev_private;
1347 
1348 	if (rte_intr_enable(dev->intr_handle) < 0)
1349 		return -1;
1350 
1351 	if (VIRTIO_OPS(hw)->intr_detect)
1352 		VIRTIO_OPS(hw)->intr_detect(hw);
1353 
1354 	return 0;
1355 }
1356 
1357 static int
1358 virtio_intr_disable(struct rte_eth_dev *dev)
1359 {
1360 	struct virtio_hw *hw = dev->data->dev_private;
1361 
1362 	if (rte_intr_disable(dev->intr_handle) < 0)
1363 		return -1;
1364 
1365 	if (VIRTIO_OPS(hw)->intr_detect)
1366 		VIRTIO_OPS(hw)->intr_detect(hw);
1367 
1368 	return 0;
1369 }
1370 
1371 static int
1372 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1373 {
1374 	uint64_t host_features;
1375 
1376 	/* Prepare guest_features: feature that driver wants to support */
1377 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1378 		req_features);
1379 
1380 	/* Read device(host) feature bits */
1381 	host_features = VIRTIO_OPS(hw)->get_features(hw);
1382 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1383 		host_features);
1384 
1385 	/* If supported, ensure MTU value is valid before acknowledging it. */
1386 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1387 		struct virtio_net_config config;
1388 
1389 		virtio_read_dev_config(hw,
1390 			offsetof(struct virtio_net_config, mtu),
1391 			&config.mtu, sizeof(config.mtu));
1392 
1393 		if (config.mtu < RTE_ETHER_MIN_MTU)
1394 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1395 	}
1396 
1397 	/*
1398 	 * Negotiate features: Subset of device feature bits are written back
1399 	 * guest feature bits.
1400 	 */
1401 	hw->guest_features = req_features;
1402 	hw->guest_features = virtio_negotiate_features(hw, host_features);
1403 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1404 		hw->guest_features);
1405 
1406 	if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1407 		return -1;
1408 
1409 	if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1410 		virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1411 
1412 		if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1413 			PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1414 			return -1;
1415 		}
1416 	}
1417 
1418 	hw->req_guest_features = req_features;
1419 
1420 	return 0;
1421 }
1422 
1423 int
1424 virtio_dev_pause(struct rte_eth_dev *dev)
1425 {
1426 	struct virtio_hw *hw = dev->data->dev_private;
1427 
1428 	rte_spinlock_lock(&hw->state_lock);
1429 
1430 	if (hw->started == 0) {
1431 		/* Device is just stopped. */
1432 		rte_spinlock_unlock(&hw->state_lock);
1433 		return -1;
1434 	}
1435 	hw->started = 0;
1436 	/*
1437 	 * Prevent the worker threads from touching queues to avoid contention,
1438 	 * 1 ms should be enough for the ongoing Tx function to finish.
1439 	 */
1440 	rte_delay_ms(1);
1441 	return 0;
1442 }
1443 
1444 /*
1445  * Recover hw state to let the worker threads continue.
1446  */
1447 void
1448 virtio_dev_resume(struct rte_eth_dev *dev)
1449 {
1450 	struct virtio_hw *hw = dev->data->dev_private;
1451 
1452 	hw->started = 1;
1453 	rte_spinlock_unlock(&hw->state_lock);
1454 }
1455 
1456 /*
1457  * Should be called only after device is paused.
1458  */
1459 int
1460 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1461 		int nb_pkts)
1462 {
1463 	struct virtio_hw *hw = dev->data->dev_private;
1464 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1465 	int ret;
1466 
1467 	hw->inject_pkts = tx_pkts;
1468 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1469 	hw->inject_pkts = NULL;
1470 
1471 	return ret;
1472 }
1473 
1474 static void
1475 virtio_notify_peers(struct rte_eth_dev *dev)
1476 {
1477 	struct virtio_hw *hw = dev->data->dev_private;
1478 	struct virtnet_rx *rxvq;
1479 	struct rte_mbuf *rarp_mbuf;
1480 
1481 	if (!dev->data->rx_queues)
1482 		return;
1483 
1484 	rxvq = dev->data->rx_queues[0];
1485 	if (!rxvq)
1486 		return;
1487 
1488 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1489 			(struct rte_ether_addr *)hw->mac_addr);
1490 	if (rarp_mbuf == NULL) {
1491 		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1492 		return;
1493 	}
1494 
1495 	/* If virtio port just stopped, no need to send RARP */
1496 	if (virtio_dev_pause(dev) < 0) {
1497 		rte_pktmbuf_free(rarp_mbuf);
1498 		return;
1499 	}
1500 
1501 	virtio_inject_pkts(dev, &rarp_mbuf, 1);
1502 	virtio_dev_resume(dev);
1503 }
1504 
1505 static void
1506 virtio_ack_link_announce(struct rte_eth_dev *dev)
1507 {
1508 	struct virtio_hw *hw = dev->data->dev_private;
1509 	struct virtio_pmd_ctrl ctrl;
1510 
1511 	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1512 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1513 
1514 	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1515 }
1516 
1517 /*
1518  * Process virtio config changed interrupt. Call the callback
1519  * if link state changed, generate gratuitous RARP packet if
1520  * the status indicates an ANNOUNCE.
1521  */
1522 void
1523 virtio_interrupt_handler(void *param)
1524 {
1525 	struct rte_eth_dev *dev = param;
1526 	struct virtio_hw *hw = dev->data->dev_private;
1527 	uint8_t isr;
1528 	uint16_t status;
1529 
1530 	/* Read interrupt status which clears interrupt */
1531 	isr = virtio_get_isr(hw);
1532 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1533 
1534 	if (virtio_intr_unmask(dev) < 0)
1535 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1536 
1537 	if (isr & VIRTIO_ISR_CONFIG) {
1538 		if (virtio_dev_link_update(dev, 0) == 0)
1539 			rte_eth_dev_callback_process(dev,
1540 						     RTE_ETH_EVENT_INTR_LSC,
1541 						     NULL);
1542 
1543 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1544 			virtio_read_dev_config(hw,
1545 				offsetof(struct virtio_net_config, status),
1546 				&status, sizeof(status));
1547 			if (status & VIRTIO_NET_S_ANNOUNCE) {
1548 				virtio_notify_peers(dev);
1549 				if (hw->cvq)
1550 					virtio_ack_link_announce(dev);
1551 			}
1552 		}
1553 	}
1554 }
1555 
1556 /* set rx and tx handlers according to what is supported */
1557 static void
1558 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1559 {
1560 	struct virtio_hw *hw = eth_dev->data->dev_private;
1561 
1562 	eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1563 	if (virtio_with_packed_queue(hw)) {
1564 		PMD_INIT_LOG(INFO,
1565 			"virtio: using packed ring %s Tx path on port %u",
1566 			hw->use_vec_tx ? "vectorized" : "standard",
1567 			eth_dev->data->port_id);
1568 		if (hw->use_vec_tx)
1569 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1570 		else
1571 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1572 	} else {
1573 		if (hw->use_inorder_tx) {
1574 			PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1575 				eth_dev->data->port_id);
1576 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1577 		} else {
1578 			PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1579 				eth_dev->data->port_id);
1580 			eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1581 		}
1582 	}
1583 
1584 	if (virtio_with_packed_queue(hw)) {
1585 		if (hw->use_vec_rx) {
1586 			PMD_INIT_LOG(INFO,
1587 				"virtio: using packed ring vectorized Rx path on port %u",
1588 				eth_dev->data->port_id);
1589 			eth_dev->rx_pkt_burst =
1590 				&virtio_recv_pkts_packed_vec;
1591 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1592 			PMD_INIT_LOG(INFO,
1593 				"virtio: using packed ring mergeable buffer Rx path on port %u",
1594 				eth_dev->data->port_id);
1595 			eth_dev->rx_pkt_burst =
1596 				&virtio_recv_mergeable_pkts_packed;
1597 		} else {
1598 			PMD_INIT_LOG(INFO,
1599 				"virtio: using packed ring standard Rx path on port %u",
1600 				eth_dev->data->port_id);
1601 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1602 		}
1603 	} else {
1604 		if (hw->use_vec_rx) {
1605 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1606 				eth_dev->data->port_id);
1607 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1608 		} else if (hw->use_inorder_rx) {
1609 			PMD_INIT_LOG(INFO,
1610 				"virtio: using inorder Rx path on port %u",
1611 				eth_dev->data->port_id);
1612 			eth_dev->rx_pkt_burst =	&virtio_recv_pkts_inorder;
1613 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1614 			PMD_INIT_LOG(INFO,
1615 				"virtio: using mergeable buffer Rx path on port %u",
1616 				eth_dev->data->port_id);
1617 			eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1618 		} else {
1619 			PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1620 				eth_dev->data->port_id);
1621 			eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1622 		}
1623 	}
1624 
1625 }
1626 
1627 /* Only support 1:1 queue/interrupt mapping so far.
1628  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1629  * interrupt vectors (<N+1).
1630  */
1631 static int
1632 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1633 {
1634 	uint32_t i;
1635 	struct virtio_hw *hw = dev->data->dev_private;
1636 
1637 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1638 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1639 		dev->intr_handle->intr_vec[i] = i + 1;
1640 		if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1641 						 VIRTIO_MSI_NO_VECTOR) {
1642 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1643 			return -EBUSY;
1644 		}
1645 	}
1646 
1647 	return 0;
1648 }
1649 
1650 static void
1651 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1652 {
1653 	uint32_t i;
1654 	struct virtio_hw *hw = dev->data->dev_private;
1655 
1656 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1657 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1658 		VIRTIO_OPS(hw)->set_queue_irq(hw,
1659 					     hw->vqs[i * VTNET_CQ],
1660 					     VIRTIO_MSI_NO_VECTOR);
1661 }
1662 
1663 static int
1664 virtio_configure_intr(struct rte_eth_dev *dev)
1665 {
1666 	struct virtio_hw *hw = dev->data->dev_private;
1667 
1668 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1669 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1670 		return -ENOTSUP;
1671 	}
1672 
1673 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1674 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1675 		return -1;
1676 	}
1677 
1678 	if (!dev->intr_handle->intr_vec) {
1679 		dev->intr_handle->intr_vec =
1680 			rte_zmalloc("intr_vec",
1681 				    hw->max_queue_pairs * sizeof(int), 0);
1682 		if (!dev->intr_handle->intr_vec) {
1683 			PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1684 				     hw->max_queue_pairs);
1685 			return -ENOMEM;
1686 		}
1687 	}
1688 
1689 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1690 		/* Re-register callback to update max_intr */
1691 		rte_intr_callback_unregister(dev->intr_handle,
1692 					     virtio_interrupt_handler,
1693 					     dev);
1694 		rte_intr_callback_register(dev->intr_handle,
1695 					   virtio_interrupt_handler,
1696 					   dev);
1697 	}
1698 
1699 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1700 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1701 	 * And for legacy devices, this should be done before queue/vec binding
1702 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1703 	 * (22) will be ignored.
1704 	 */
1705 	if (virtio_intr_enable(dev) < 0) {
1706 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1707 		return -1;
1708 	}
1709 
1710 	if (virtio_queues_bind_intr(dev) < 0) {
1711 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1712 		return -1;
1713 	}
1714 
1715 	return 0;
1716 }
1717 #define DUPLEX_UNKNOWN   0xff
1718 /* reset device and renegotiate features if needed */
1719 static int
1720 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1721 {
1722 	struct virtio_hw *hw = eth_dev->data->dev_private;
1723 	struct virtio_net_config *config;
1724 	struct virtio_net_config local_config;
1725 	int ret;
1726 
1727 	/* Reset the device although not necessary at startup */
1728 	virtio_reset(hw);
1729 
1730 	if (hw->vqs) {
1731 		virtio_dev_free_mbufs(eth_dev);
1732 		virtio_free_queues(hw);
1733 	}
1734 
1735 	/* Tell the host we've noticed this device. */
1736 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1737 
1738 	/* Tell the host we've known how to drive the device. */
1739 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1740 	if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
1741 		return -1;
1742 
1743 	hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1744 
1745 	/* If host does not support both status and MSI-X then disable LSC */
1746 	if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
1747 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1748 	else
1749 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1750 
1751 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1752 
1753 	/* Setting up rx_header size for the device */
1754 	if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1755 	    virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
1756 	    virtio_with_packed_queue(hw))
1757 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1758 	else
1759 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1760 
1761 	/* Copy the permanent MAC address to: virtio_hw */
1762 	virtio_get_hwaddr(hw);
1763 	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1764 			&eth_dev->data->mac_addrs[0]);
1765 	PMD_INIT_LOG(DEBUG,
1766 		     "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1767 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1768 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1769 
1770 	if (hw->speed == ETH_SPEED_NUM_UNKNOWN) {
1771 		if (virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
1772 			config = &local_config;
1773 			virtio_read_dev_config(hw,
1774 				offsetof(struct virtio_net_config, speed),
1775 				&config->speed, sizeof(config->speed));
1776 			virtio_read_dev_config(hw,
1777 				offsetof(struct virtio_net_config, duplex),
1778 				&config->duplex, sizeof(config->duplex));
1779 			hw->speed = config->speed;
1780 			hw->duplex = config->duplex;
1781 		}
1782 	}
1783 	if (hw->duplex == DUPLEX_UNKNOWN)
1784 		hw->duplex = ETH_LINK_FULL_DUPLEX;
1785 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1786 		hw->speed, hw->duplex);
1787 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1788 		config = &local_config;
1789 
1790 		virtio_read_dev_config(hw,
1791 			offsetof(struct virtio_net_config, mac),
1792 			&config->mac, sizeof(config->mac));
1793 
1794 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1795 			virtio_read_dev_config(hw,
1796 				offsetof(struct virtio_net_config, status),
1797 				&config->status, sizeof(config->status));
1798 		} else {
1799 			PMD_INIT_LOG(DEBUG,
1800 				     "VIRTIO_NET_F_STATUS is not supported");
1801 			config->status = 0;
1802 		}
1803 
1804 		if (virtio_with_feature(hw, VIRTIO_NET_F_MQ)) {
1805 			virtio_read_dev_config(hw,
1806 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
1807 				&config->max_virtqueue_pairs,
1808 				sizeof(config->max_virtqueue_pairs));
1809 		} else {
1810 			PMD_INIT_LOG(DEBUG,
1811 				     "VIRTIO_NET_F_MQ is not supported");
1812 			config->max_virtqueue_pairs = 1;
1813 		}
1814 
1815 		hw->max_queue_pairs = config->max_virtqueue_pairs;
1816 
1817 		if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
1818 			virtio_read_dev_config(hw,
1819 				offsetof(struct virtio_net_config, mtu),
1820 				&config->mtu,
1821 				sizeof(config->mtu));
1822 
1823 			/*
1824 			 * MTU value has already been checked at negotiation
1825 			 * time, but check again in case it has changed since
1826 			 * then, which should not happen.
1827 			 */
1828 			if (config->mtu < RTE_ETHER_MIN_MTU) {
1829 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1830 						config->mtu);
1831 				return -1;
1832 			}
1833 
1834 			hw->max_mtu = config->mtu;
1835 			/* Set initial MTU to maximum one supported by vhost */
1836 			eth_dev->data->mtu = config->mtu;
1837 
1838 		} else {
1839 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1840 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
1841 		}
1842 
1843 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1844 				config->max_virtqueue_pairs);
1845 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1846 		PMD_INIT_LOG(DEBUG,
1847 				"PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1848 				config->mac[0], config->mac[1],
1849 				config->mac[2], config->mac[3],
1850 				config->mac[4], config->mac[5]);
1851 	} else {
1852 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1853 		hw->max_queue_pairs = 1;
1854 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1855 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
1856 	}
1857 
1858 	ret = virtio_alloc_queues(eth_dev);
1859 	if (ret < 0)
1860 		return ret;
1861 
1862 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1863 		if (virtio_configure_intr(eth_dev) < 0) {
1864 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
1865 			virtio_free_queues(hw);
1866 			return -1;
1867 		}
1868 	}
1869 
1870 	virtio_reinit_complete(hw);
1871 
1872 	return 0;
1873 }
1874 
1875 /*
1876  * This function is based on probe() function in virtio_pci.c
1877  * It returns 0 on success.
1878  */
1879 int
1880 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1881 {
1882 	struct virtio_hw *hw = eth_dev->data->dev_private;
1883 	uint32_t speed = ETH_SPEED_NUM_UNKNOWN;
1884 	int vectorized = 0;
1885 	int ret;
1886 
1887 	if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
1888 		PMD_INIT_LOG(ERR,
1889 			"Not sufficient headroom required = %d, avail = %d",
1890 			(int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
1891 			RTE_PKTMBUF_HEADROOM);
1892 
1893 		return -1;
1894 	}
1895 
1896 	eth_dev->dev_ops = &virtio_eth_dev_ops;
1897 	eth_dev->rx_descriptor_done = virtio_dev_rx_queue_done;
1898 
1899 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1900 		set_rxtx_funcs(eth_dev);
1901 		return 0;
1902 	}
1903 
1904 	ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
1905 	if (ret < 0)
1906 		return ret;
1907 	hw->speed = speed;
1908 	hw->duplex = DUPLEX_UNKNOWN;
1909 
1910 	/* Allocate memory for storing MAC addresses */
1911 	eth_dev->data->mac_addrs = rte_zmalloc("virtio",
1912 				VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
1913 	if (eth_dev->data->mac_addrs == NULL) {
1914 		PMD_INIT_LOG(ERR,
1915 			"Failed to allocate %d bytes needed to store MAC addresses",
1916 			VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
1917 		return -ENOMEM;
1918 	}
1919 
1920 	rte_spinlock_init(&hw->state_lock);
1921 
1922 	/* reset device and negotiate default features */
1923 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1924 	if (ret < 0)
1925 		goto err_virtio_init;
1926 
1927 	if (vectorized) {
1928 		if (!virtio_with_packed_queue(hw)) {
1929 			hw->use_vec_rx = 1;
1930 		} else {
1931 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
1932 			hw->use_vec_rx = 1;
1933 			hw->use_vec_tx = 1;
1934 #else
1935 			PMD_DRV_LOG(INFO,
1936 				"building environment do not support packed ring vectorized");
1937 #endif
1938 		}
1939 	}
1940 
1941 	hw->opened = 1;
1942 
1943 	return 0;
1944 
1945 err_virtio_init:
1946 	rte_free(eth_dev->data->mac_addrs);
1947 	eth_dev->data->mac_addrs = NULL;
1948 	return ret;
1949 }
1950 
1951 static uint32_t
1952 virtio_dev_speed_capa_get(uint32_t speed)
1953 {
1954 	switch (speed) {
1955 	case ETH_SPEED_NUM_10G:
1956 		return ETH_LINK_SPEED_10G;
1957 	case ETH_SPEED_NUM_20G:
1958 		return ETH_LINK_SPEED_20G;
1959 	case ETH_SPEED_NUM_25G:
1960 		return ETH_LINK_SPEED_25G;
1961 	case ETH_SPEED_NUM_40G:
1962 		return ETH_LINK_SPEED_40G;
1963 	case ETH_SPEED_NUM_50G:
1964 		return ETH_LINK_SPEED_50G;
1965 	case ETH_SPEED_NUM_56G:
1966 		return ETH_LINK_SPEED_56G;
1967 	case ETH_SPEED_NUM_100G:
1968 		return ETH_LINK_SPEED_100G;
1969 	case ETH_SPEED_NUM_200G:
1970 		return ETH_LINK_SPEED_200G;
1971 	default:
1972 		return 0;
1973 	}
1974 }
1975 
1976 static int vectorized_check_handler(__rte_unused const char *key,
1977 		const char *value, void *ret_val)
1978 {
1979 	if (strcmp(value, "1") == 0)
1980 		*(int *)ret_val = 1;
1981 	else
1982 		*(int *)ret_val = 0;
1983 
1984 	return 0;
1985 }
1986 
1987 #define VIRTIO_ARG_SPEED      "speed"
1988 #define VIRTIO_ARG_VECTORIZED "vectorized"
1989 
1990 static int
1991 link_speed_handler(const char *key __rte_unused,
1992 		const char *value, void *ret_val)
1993 {
1994 	uint32_t val;
1995 	if (!value || !ret_val)
1996 		return -EINVAL;
1997 	val = strtoul(value, NULL, 0);
1998 	/* validate input */
1999 	if (virtio_dev_speed_capa_get(val) == 0)
2000 		return -EINVAL;
2001 	*(uint32_t *)ret_val = val;
2002 
2003 	return 0;
2004 }
2005 
2006 
2007 static int
2008 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2009 {
2010 	struct rte_kvargs *kvlist;
2011 	int ret = 0;
2012 
2013 	if (devargs == NULL)
2014 		return 0;
2015 
2016 	kvlist = rte_kvargs_parse(devargs->args, NULL);
2017 	if (kvlist == NULL) {
2018 		PMD_INIT_LOG(ERR, "error when parsing param");
2019 		return 0;
2020 	}
2021 
2022 	if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2023 		ret = rte_kvargs_process(kvlist,
2024 					VIRTIO_ARG_SPEED,
2025 					link_speed_handler, speed);
2026 		if (ret < 0) {
2027 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2028 					VIRTIO_ARG_SPEED);
2029 			goto exit;
2030 		}
2031 	}
2032 
2033 	if (vectorized &&
2034 		rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2035 		ret = rte_kvargs_process(kvlist,
2036 				VIRTIO_ARG_VECTORIZED,
2037 				vectorized_check_handler, vectorized);
2038 		if (ret < 0) {
2039 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2040 					VIRTIO_ARG_VECTORIZED);
2041 			goto exit;
2042 		}
2043 	}
2044 
2045 exit:
2046 	rte_kvargs_free(kvlist);
2047 	return ret;
2048 }
2049 
2050 static uint8_t
2051 rx_offload_enabled(struct virtio_hw *hw)
2052 {
2053 	return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2054 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2055 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2056 }
2057 
2058 static uint8_t
2059 tx_offload_enabled(struct virtio_hw *hw)
2060 {
2061 	return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2062 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2063 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2064 }
2065 
2066 /*
2067  * Configure virtio device
2068  * It returns 0 on success.
2069  */
2070 static int
2071 virtio_dev_configure(struct rte_eth_dev *dev)
2072 {
2073 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2074 	const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2075 	struct virtio_hw *hw = dev->data->dev_private;
2076 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2077 		hw->vtnet_hdr_size;
2078 	uint64_t rx_offloads = rxmode->offloads;
2079 	uint64_t tx_offloads = txmode->offloads;
2080 	uint64_t req_features;
2081 	int ret;
2082 
2083 	PMD_INIT_LOG(DEBUG, "configure");
2084 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2085 
2086 	if (rxmode->mq_mode != ETH_MQ_RX_NONE) {
2087 		PMD_DRV_LOG(ERR,
2088 			"Unsupported Rx multi queue mode %d",
2089 			rxmode->mq_mode);
2090 		return -EINVAL;
2091 	}
2092 
2093 	if (txmode->mq_mode != ETH_MQ_TX_NONE) {
2094 		PMD_DRV_LOG(ERR,
2095 			"Unsupported Tx multi queue mode %d",
2096 			txmode->mq_mode);
2097 		return -EINVAL;
2098 	}
2099 
2100 	if (dev->data->dev_conf.intr_conf.rxq) {
2101 		ret = virtio_init_device(dev, hw->req_guest_features);
2102 		if (ret < 0)
2103 			return ret;
2104 	}
2105 
2106 	if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) &&
2107 	    (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len))
2108 		req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2109 
2110 	if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
2111 		hw->max_rx_pkt_len = rxmode->max_rx_pkt_len;
2112 	else
2113 		hw->max_rx_pkt_len = ether_hdr_len + dev->data->mtu;
2114 
2115 	if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2116 			   DEV_RX_OFFLOAD_TCP_CKSUM))
2117 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2118 
2119 	if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
2120 		req_features |=
2121 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2122 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2123 
2124 	if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
2125 			   DEV_TX_OFFLOAD_TCP_CKSUM))
2126 		req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2127 
2128 	if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
2129 		req_features |=
2130 			(1ULL << VIRTIO_NET_F_HOST_TSO4) |
2131 			(1ULL << VIRTIO_NET_F_HOST_TSO6);
2132 
2133 	/* if request features changed, reinit the device */
2134 	if (req_features != hw->req_guest_features) {
2135 		ret = virtio_init_device(dev, req_features);
2136 		if (ret < 0)
2137 			return ret;
2138 	}
2139 
2140 	if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2141 			    DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2142 		!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2143 		PMD_DRV_LOG(ERR,
2144 			"rx checksum not available on this host");
2145 		return -ENOTSUP;
2146 	}
2147 
2148 	if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2149 		(!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2150 		 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2151 		PMD_DRV_LOG(ERR,
2152 			"Large Receive Offload not available on this host");
2153 		return -ENOTSUP;
2154 	}
2155 
2156 	/* start control queue */
2157 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2158 		virtio_dev_cq_start(dev);
2159 
2160 	if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2161 		hw->vlan_strip = 1;
2162 
2163 	hw->rx_ol_scatter = (rx_offloads & DEV_RX_OFFLOAD_SCATTER);
2164 
2165 	if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2166 			!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2167 		PMD_DRV_LOG(ERR,
2168 			    "vlan filtering not available on this host");
2169 		return -ENOTSUP;
2170 	}
2171 
2172 	hw->has_tx_offload = tx_offload_enabled(hw);
2173 	hw->has_rx_offload = rx_offload_enabled(hw);
2174 
2175 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2176 		/* Enable vector (0) for Link State Intrerrupt */
2177 		if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2178 				VIRTIO_MSI_NO_VECTOR) {
2179 			PMD_DRV_LOG(ERR, "failed to set config vector");
2180 			return -EBUSY;
2181 		}
2182 
2183 	if (virtio_with_packed_queue(hw)) {
2184 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2185 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2186 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2187 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2188 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2189 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2190 			PMD_DRV_LOG(INFO,
2191 				"disabled packed ring vectorized path for requirements not met");
2192 			hw->use_vec_rx = 0;
2193 			hw->use_vec_tx = 0;
2194 		}
2195 #elif defined(RTE_ARCH_ARM)
2196 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2197 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2198 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2199 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2200 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2201 			PMD_DRV_LOG(INFO,
2202 				"disabled packed ring vectorized path for requirements not met");
2203 			hw->use_vec_rx = 0;
2204 			hw->use_vec_tx = 0;
2205 		}
2206 #else
2207 		hw->use_vec_rx = 0;
2208 		hw->use_vec_tx = 0;
2209 #endif
2210 
2211 		if (hw->use_vec_rx) {
2212 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2213 				PMD_DRV_LOG(INFO,
2214 					"disabled packed ring vectorized rx for mrg_rxbuf enabled");
2215 				hw->use_vec_rx = 0;
2216 			}
2217 
2218 			if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) {
2219 				PMD_DRV_LOG(INFO,
2220 					"disabled packed ring vectorized rx for TCP_LRO enabled");
2221 				hw->use_vec_rx = 0;
2222 			}
2223 		}
2224 	} else {
2225 		if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2226 			hw->use_inorder_tx = 1;
2227 			hw->use_inorder_rx = 1;
2228 			hw->use_vec_rx = 0;
2229 		}
2230 
2231 		if (hw->use_vec_rx) {
2232 #if defined RTE_ARCH_ARM
2233 			if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2234 				PMD_DRV_LOG(INFO,
2235 					"disabled split ring vectorized path for requirement not met");
2236 				hw->use_vec_rx = 0;
2237 			}
2238 #endif
2239 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2240 				PMD_DRV_LOG(INFO,
2241 					"disabled split ring vectorized rx for mrg_rxbuf enabled");
2242 				hw->use_vec_rx = 0;
2243 			}
2244 
2245 			if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2246 					   DEV_RX_OFFLOAD_TCP_CKSUM |
2247 					   DEV_RX_OFFLOAD_TCP_LRO |
2248 					   DEV_RX_OFFLOAD_VLAN_STRIP)) {
2249 				PMD_DRV_LOG(INFO,
2250 					"disabled split ring vectorized rx for offloading enabled");
2251 				hw->use_vec_rx = 0;
2252 			}
2253 
2254 			if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2255 				PMD_DRV_LOG(INFO,
2256 					"disabled split ring vectorized rx, max SIMD bitwidth too low");
2257 				hw->use_vec_rx = 0;
2258 			}
2259 		}
2260 	}
2261 
2262 	return 0;
2263 }
2264 
2265 
2266 static int
2267 virtio_dev_start(struct rte_eth_dev *dev)
2268 {
2269 	uint16_t nb_queues, i;
2270 	struct virtqueue *vq;
2271 	struct virtio_hw *hw = dev->data->dev_private;
2272 	int ret;
2273 
2274 	/* Finish the initialization of the queues */
2275 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2276 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
2277 		if (ret < 0)
2278 			return ret;
2279 	}
2280 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2281 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
2282 		if (ret < 0)
2283 			return ret;
2284 	}
2285 
2286 	/* check if lsc interrupt feature is enabled */
2287 	if (dev->data->dev_conf.intr_conf.lsc) {
2288 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2289 			PMD_DRV_LOG(ERR, "link status not supported by host");
2290 			return -ENOTSUP;
2291 		}
2292 	}
2293 
2294 	/* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2295 	 * in device configure, but it could be unmapped  when device is
2296 	 * stopped.
2297 	 */
2298 	if (dev->data->dev_conf.intr_conf.lsc ||
2299 	    dev->data->dev_conf.intr_conf.rxq) {
2300 		virtio_intr_disable(dev);
2301 
2302 		/* Setup interrupt callback  */
2303 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2304 			rte_intr_callback_register(dev->intr_handle,
2305 						   virtio_interrupt_handler,
2306 						   dev);
2307 
2308 		if (virtio_intr_enable(dev) < 0) {
2309 			PMD_DRV_LOG(ERR, "interrupt enable failed");
2310 			return -EIO;
2311 		}
2312 	}
2313 
2314 	/*Notify the backend
2315 	 *Otherwise the tap backend might already stop its queue due to fullness.
2316 	 *vhost backend will have no chance to be waked up
2317 	 */
2318 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2319 	if (hw->max_queue_pairs > 1) {
2320 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2321 			return -EINVAL;
2322 	}
2323 
2324 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2325 
2326 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2327 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2328 		/* Flush the old packets */
2329 		virtqueue_rxvq_flush(vq);
2330 		virtqueue_notify(vq);
2331 	}
2332 
2333 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2334 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2335 		virtqueue_notify(vq);
2336 	}
2337 
2338 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2339 
2340 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2341 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2342 		VIRTQUEUE_DUMP(vq);
2343 	}
2344 
2345 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2346 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2347 		VIRTQUEUE_DUMP(vq);
2348 	}
2349 
2350 	set_rxtx_funcs(dev);
2351 	hw->started = 1;
2352 
2353 	/* Initialize Link state */
2354 	virtio_dev_link_update(dev, 0);
2355 
2356 	return 0;
2357 }
2358 
2359 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2360 {
2361 	struct virtio_hw *hw = dev->data->dev_private;
2362 	uint16_t nr_vq = virtio_get_nr_vq(hw);
2363 	const char *type __rte_unused;
2364 	unsigned int i, mbuf_num = 0;
2365 	struct virtqueue *vq;
2366 	struct rte_mbuf *buf;
2367 	int queue_type;
2368 
2369 	if (hw->vqs == NULL)
2370 		return;
2371 
2372 	for (i = 0; i < nr_vq; i++) {
2373 		vq = hw->vqs[i];
2374 		if (!vq)
2375 			continue;
2376 
2377 		queue_type = virtio_get_queue_type(hw, i);
2378 		if (queue_type == VTNET_RQ)
2379 			type = "rxq";
2380 		else if (queue_type == VTNET_TQ)
2381 			type = "txq";
2382 		else
2383 			continue;
2384 
2385 		PMD_INIT_LOG(DEBUG,
2386 			"Before freeing %s[%d] used and unused buf",
2387 			type, i);
2388 		VIRTQUEUE_DUMP(vq);
2389 
2390 		while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2391 			rte_pktmbuf_free(buf);
2392 			mbuf_num++;
2393 		}
2394 
2395 		PMD_INIT_LOG(DEBUG,
2396 			"After freeing %s[%d] used and unused buf",
2397 			type, i);
2398 		VIRTQUEUE_DUMP(vq);
2399 	}
2400 
2401 	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2402 }
2403 
2404 /*
2405  * Stop device: disable interrupt and mark link down
2406  */
2407 int
2408 virtio_dev_stop(struct rte_eth_dev *dev)
2409 {
2410 	struct virtio_hw *hw = dev->data->dev_private;
2411 	struct rte_eth_link link;
2412 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2413 
2414 	PMD_INIT_LOG(DEBUG, "stop");
2415 	dev->data->dev_started = 0;
2416 
2417 	rte_spinlock_lock(&hw->state_lock);
2418 	if (!hw->started)
2419 		goto out_unlock;
2420 	hw->started = 0;
2421 
2422 	if (intr_conf->lsc || intr_conf->rxq) {
2423 		virtio_intr_disable(dev);
2424 
2425 		/* Reset interrupt callback  */
2426 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2427 			rte_intr_callback_unregister(dev->intr_handle,
2428 						     virtio_interrupt_handler,
2429 						     dev);
2430 		}
2431 	}
2432 
2433 	memset(&link, 0, sizeof(link));
2434 	rte_eth_linkstatus_set(dev, &link);
2435 out_unlock:
2436 	rte_spinlock_unlock(&hw->state_lock);
2437 
2438 	return 0;
2439 }
2440 
2441 static int
2442 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2443 {
2444 	struct rte_eth_link link;
2445 	uint16_t status;
2446 	struct virtio_hw *hw = dev->data->dev_private;
2447 
2448 	memset(&link, 0, sizeof(link));
2449 	link.link_duplex = hw->duplex;
2450 	link.link_speed  = hw->speed;
2451 	link.link_autoneg = ETH_LINK_AUTONEG;
2452 
2453 	if (!hw->started) {
2454 		link.link_status = ETH_LINK_DOWN;
2455 		link.link_speed = ETH_SPEED_NUM_NONE;
2456 	} else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2457 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
2458 		virtio_read_dev_config(hw,
2459 				offsetof(struct virtio_net_config, status),
2460 				&status, sizeof(status));
2461 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2462 			link.link_status = ETH_LINK_DOWN;
2463 			link.link_speed = ETH_SPEED_NUM_NONE;
2464 			PMD_INIT_LOG(DEBUG, "Port %d is down",
2465 				     dev->data->port_id);
2466 		} else {
2467 			link.link_status = ETH_LINK_UP;
2468 			PMD_INIT_LOG(DEBUG, "Port %d is up",
2469 				     dev->data->port_id);
2470 		}
2471 	} else {
2472 		link.link_status = ETH_LINK_UP;
2473 	}
2474 
2475 	return rte_eth_linkstatus_set(dev, &link);
2476 }
2477 
2478 static int
2479 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2480 {
2481 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2482 	struct virtio_hw *hw = dev->data->dev_private;
2483 	uint64_t offloads = rxmode->offloads;
2484 
2485 	if (mask & ETH_VLAN_FILTER_MASK) {
2486 		if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2487 				!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2488 
2489 			PMD_DRV_LOG(NOTICE,
2490 				"vlan filtering not available on this host");
2491 
2492 			return -ENOTSUP;
2493 		}
2494 	}
2495 
2496 	if (mask & ETH_VLAN_STRIP_MASK)
2497 		hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2498 
2499 	return 0;
2500 }
2501 
2502 static int
2503 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2504 {
2505 	uint64_t tso_mask, host_features;
2506 	struct virtio_hw *hw = dev->data->dev_private;
2507 	dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
2508 
2509 	dev_info->max_rx_queues =
2510 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2511 	dev_info->max_tx_queues =
2512 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2513 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2514 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2515 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2516 	dev_info->max_mtu = hw->max_mtu;
2517 
2518 	host_features = VIRTIO_OPS(hw)->get_features(hw);
2519 	dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2520 	dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
2521 	if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
2522 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_SCATTER;
2523 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2524 		dev_info->rx_offload_capa |=
2525 			DEV_RX_OFFLOAD_TCP_CKSUM |
2526 			DEV_RX_OFFLOAD_UDP_CKSUM;
2527 	}
2528 	if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2529 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2530 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2531 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2532 	if ((host_features & tso_mask) == tso_mask)
2533 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2534 
2535 	dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2536 				    DEV_TX_OFFLOAD_VLAN_INSERT;
2537 	if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2538 		dev_info->tx_offload_capa |=
2539 			DEV_TX_OFFLOAD_UDP_CKSUM |
2540 			DEV_TX_OFFLOAD_TCP_CKSUM;
2541 	}
2542 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2543 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
2544 	if ((host_features & tso_mask) == tso_mask)
2545 		dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2546 
2547 	if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
2548 		/*
2549 		 * According to 2.7 Packed Virtqueues,
2550 		 * 2.7.10.1 Structure Size and Alignment:
2551 		 * The Queue Size value does not have to be a power of 2.
2552 		 */
2553 		dev_info->rx_desc_lim.nb_max = UINT16_MAX;
2554 	} else {
2555 		/*
2556 		 * According to 2.6 Split Virtqueues:
2557 		 * Queue Size value is always a power of 2. The maximum Queue
2558 		 * Size value is 32768.
2559 		 */
2560 		dev_info->rx_desc_lim.nb_max = 32768;
2561 	}
2562 	/*
2563 	 * Actual minimum is not the same for virtqueues of different kinds,
2564 	 * but to avoid tangling the code with separate branches, rely on
2565 	 * default thresholds since desc number must be at least of their size.
2566 	 */
2567 	dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
2568 					       RTE_VIRTIO_VPMD_RX_REARM_THRESH);
2569 	dev_info->rx_desc_lim.nb_align = 1;
2570 
2571 	return 0;
2572 }
2573 
2574 /*
2575  * It enables testpmd to collect per queue stats.
2576  */
2577 static int
2578 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2579 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2580 __rte_unused uint8_t is_rx)
2581 {
2582 	return 0;
2583 }
2584 
2585 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
2586 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);
2587