xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision 8f1d23ece06adff5eae9f1b4365bdbbd3abee2b2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <errno.h>
10 #include <unistd.h>
11 
12 #include <ethdev_driver.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_ether.h>
19 #include <rte_ip.h>
20 #include <rte_arp.h>
21 #include <rte_common.h>
22 #include <rte_errno.h>
23 #include <rte_cpuflags.h>
24 #include <rte_vect.h>
25 #include <rte_memory.h>
26 #include <rte_eal_paging.h>
27 #include <rte_eal.h>
28 #include <rte_dev.h>
29 #include <rte_cycles.h>
30 #include <rte_kvargs.h>
31 
32 #include "virtio_ethdev.h"
33 #include "virtio.h"
34 #include "virtio_logs.h"
35 #include "virtqueue.h"
36 #include "virtio_rxtx.h"
37 #include "virtio_rxtx_simple.h"
38 #include "virtio_user/virtio_user_dev.h"
39 
40 static int  virtio_dev_configure(struct rte_eth_dev *dev);
41 static int  virtio_dev_start(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
43 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
45 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
46 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
47 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
48 	uint32_t *speed,
49 	int *vectorized);
50 static int virtio_dev_info_get(struct rte_eth_dev *dev,
51 				struct rte_eth_dev_info *dev_info);
52 static int virtio_dev_link_update(struct rte_eth_dev *dev,
53 	int wait_to_complete);
54 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
55 static int virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
56 		struct rte_eth_rss_conf *rss_conf);
57 static int virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
58 		struct rte_eth_rss_conf *rss_conf);
59 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
60 			 struct rte_eth_rss_reta_entry64 *reta_conf,
61 			 uint16_t reta_size);
62 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
63 			 struct rte_eth_rss_reta_entry64 *reta_conf,
64 			 uint16_t reta_size);
65 
66 static void virtio_set_hwaddr(struct virtio_hw *hw);
67 static void virtio_get_hwaddr(struct virtio_hw *hw);
68 
69 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
70 				 struct rte_eth_stats *stats);
71 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
72 				 struct rte_eth_xstat *xstats, unsigned n);
73 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
74 				       struct rte_eth_xstat_name *xstats_names,
75 				       unsigned limit);
76 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
77 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
78 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
79 				uint16_t vlan_id, int on);
80 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
81 				struct rte_ether_addr *mac_addr,
82 				uint32_t index, uint32_t vmdq);
83 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
84 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
85 				struct rte_ether_addr *mac_addr);
86 
87 static int virtio_intr_disable(struct rte_eth_dev *dev);
88 static int virtio_get_monitor_addr(void *rx_queue,
89 				struct rte_power_monitor_cond *pmc);
90 
91 static int virtio_dev_queue_stats_mapping_set(
92 	struct rte_eth_dev *eth_dev,
93 	uint16_t queue_id,
94 	uint8_t stat_idx,
95 	uint8_t is_rx);
96 
97 static void virtio_notify_peers(struct rte_eth_dev *dev);
98 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
99 
100 struct rte_virtio_xstats_name_off {
101 	char name[RTE_ETH_XSTATS_NAME_SIZE];
102 	unsigned offset;
103 };
104 
105 /* [rt]x_qX_ is prepended to the name string here */
106 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
107 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
108 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
109 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
110 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
111 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
112 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
113 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
114 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
115 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
116 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
117 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
118 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
119 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
120 };
121 
122 /* [rt]x_qX_ is prepended to the name string here */
123 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
124 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
125 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
126 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
127 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
128 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
129 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
130 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
131 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
132 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
133 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
134 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
135 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
136 };
137 
138 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
139 			    sizeof(rte_virtio_rxq_stat_strings[0]))
140 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
141 			    sizeof(rte_virtio_txq_stat_strings[0]))
142 
143 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
144 
145 static struct virtio_pmd_ctrl *
146 virtio_send_command_packed(struct virtnet_ctl *cvq,
147 			   struct virtio_pmd_ctrl *ctrl,
148 			   int *dlen, int pkt_num)
149 {
150 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
151 	int head;
152 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
153 	struct virtio_pmd_ctrl *result;
154 	uint16_t flags;
155 	int sum = 0;
156 	int nb_descs = 0;
157 	int k;
158 
159 	/*
160 	 * Format is enforced in qemu code:
161 	 * One TX packet for header;
162 	 * At least one TX packet per argument;
163 	 * One RX packet for ACK.
164 	 */
165 	head = vq->vq_avail_idx;
166 	flags = vq->vq_packed.cached_flags;
167 	desc[head].addr = cvq->virtio_net_hdr_mem;
168 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
169 	vq->vq_free_cnt--;
170 	nb_descs++;
171 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
172 		vq->vq_avail_idx -= vq->vq_nentries;
173 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
174 	}
175 
176 	for (k = 0; k < pkt_num; k++) {
177 		desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
178 			+ sizeof(struct virtio_net_ctrl_hdr)
179 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
180 		desc[vq->vq_avail_idx].len = dlen[k];
181 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
182 			vq->vq_packed.cached_flags;
183 		sum += dlen[k];
184 		vq->vq_free_cnt--;
185 		nb_descs++;
186 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
187 			vq->vq_avail_idx -= vq->vq_nentries;
188 			vq->vq_packed.cached_flags ^=
189 				VRING_PACKED_DESC_F_AVAIL_USED;
190 		}
191 	}
192 
193 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
194 		+ sizeof(struct virtio_net_ctrl_hdr);
195 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
196 	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
197 		vq->vq_packed.cached_flags;
198 	vq->vq_free_cnt--;
199 	nb_descs++;
200 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
201 		vq->vq_avail_idx -= vq->vq_nentries;
202 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
203 	}
204 
205 	virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
206 			vq->hw->weak_barriers);
207 
208 	virtio_wmb(vq->hw->weak_barriers);
209 	virtqueue_notify(vq);
210 
211 	/* wait for used desc in virtqueue
212 	 * desc_is_used has a load-acquire or rte_io_rmb inside
213 	 */
214 	while (!desc_is_used(&desc[head], vq))
215 		usleep(100);
216 
217 	/* now get used descriptors */
218 	vq->vq_free_cnt += nb_descs;
219 	vq->vq_used_cons_idx += nb_descs;
220 	if (vq->vq_used_cons_idx >= vq->vq_nentries) {
221 		vq->vq_used_cons_idx -= vq->vq_nentries;
222 		vq->vq_packed.used_wrap_counter ^= 1;
223 	}
224 
225 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
226 			"vq->vq_avail_idx=%d\n"
227 			"vq->vq_used_cons_idx=%d\n"
228 			"vq->vq_packed.cached_flags=0x%x\n"
229 			"vq->vq_packed.used_wrap_counter=%d",
230 			vq->vq_free_cnt,
231 			vq->vq_avail_idx,
232 			vq->vq_used_cons_idx,
233 			vq->vq_packed.cached_flags,
234 			vq->vq_packed.used_wrap_counter);
235 
236 	result = cvq->virtio_net_hdr_mz->addr;
237 	return result;
238 }
239 
240 static struct virtio_pmd_ctrl *
241 virtio_send_command_split(struct virtnet_ctl *cvq,
242 			  struct virtio_pmd_ctrl *ctrl,
243 			  int *dlen, int pkt_num)
244 {
245 	struct virtio_pmd_ctrl *result;
246 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
247 	uint32_t head, i;
248 	int k, sum = 0;
249 
250 	head = vq->vq_desc_head_idx;
251 
252 	/*
253 	 * Format is enforced in qemu code:
254 	 * One TX packet for header;
255 	 * At least one TX packet per argument;
256 	 * One RX packet for ACK.
257 	 */
258 	vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
259 	vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
260 	vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
261 	vq->vq_free_cnt--;
262 	i = vq->vq_split.ring.desc[head].next;
263 
264 	for (k = 0; k < pkt_num; k++) {
265 		vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
266 		vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
267 			+ sizeof(struct virtio_net_ctrl_hdr)
268 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
269 		vq->vq_split.ring.desc[i].len = dlen[k];
270 		sum += dlen[k];
271 		vq->vq_free_cnt--;
272 		i = vq->vq_split.ring.desc[i].next;
273 	}
274 
275 	vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
276 	vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
277 			+ sizeof(struct virtio_net_ctrl_hdr);
278 	vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
279 	vq->vq_free_cnt--;
280 
281 	vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
282 
283 	vq_update_avail_ring(vq, head);
284 	vq_update_avail_idx(vq);
285 
286 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
287 
288 	virtqueue_notify(vq);
289 
290 	while (virtqueue_nused(vq) == 0)
291 		usleep(100);
292 
293 	while (virtqueue_nused(vq)) {
294 		uint32_t idx, desc_idx, used_idx;
295 		struct vring_used_elem *uep;
296 
297 		used_idx = (uint32_t)(vq->vq_used_cons_idx
298 				& (vq->vq_nentries - 1));
299 		uep = &vq->vq_split.ring.used->ring[used_idx];
300 		idx = (uint32_t) uep->id;
301 		desc_idx = idx;
302 
303 		while (vq->vq_split.ring.desc[desc_idx].flags &
304 				VRING_DESC_F_NEXT) {
305 			desc_idx = vq->vq_split.ring.desc[desc_idx].next;
306 			vq->vq_free_cnt++;
307 		}
308 
309 		vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
310 		vq->vq_desc_head_idx = idx;
311 
312 		vq->vq_used_cons_idx++;
313 		vq->vq_free_cnt++;
314 	}
315 
316 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
317 			vq->vq_free_cnt, vq->vq_desc_head_idx);
318 
319 	result = cvq->virtio_net_hdr_mz->addr;
320 	return result;
321 }
322 
323 static int
324 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
325 		    int *dlen, int pkt_num)
326 {
327 	virtio_net_ctrl_ack status = ~0;
328 	struct virtio_pmd_ctrl *result;
329 	struct virtqueue *vq;
330 
331 	ctrl->status = status;
332 
333 	if (!cvq) {
334 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
335 		return -1;
336 	}
337 
338 	rte_spinlock_lock(&cvq->lock);
339 	vq = virtnet_cq_to_vq(cvq);
340 
341 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
342 		"vq->hw->cvq = %p vq = %p",
343 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
344 
345 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
346 		rte_spinlock_unlock(&cvq->lock);
347 		return -1;
348 	}
349 
350 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
351 		sizeof(struct virtio_pmd_ctrl));
352 
353 	if (virtio_with_packed_queue(vq->hw))
354 		result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
355 	else
356 		result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
357 
358 	rte_spinlock_unlock(&cvq->lock);
359 	return result->status;
360 }
361 
362 static int
363 virtio_set_multiple_queues_rss(struct rte_eth_dev *dev, uint16_t nb_queues)
364 {
365 	struct virtio_hw *hw = dev->data->dev_private;
366 	struct virtio_pmd_ctrl ctrl;
367 	struct virtio_net_ctrl_rss rss;
368 	int dlen, ret;
369 
370 	rss.hash_types = hw->rss_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
371 	RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(VIRTIO_NET_RSS_RETA_SIZE));
372 	rss.indirection_table_mask = VIRTIO_NET_RSS_RETA_SIZE - 1;
373 	rss.unclassified_queue = 0;
374 	memcpy(rss.indirection_table, hw->rss_reta, VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t));
375 	rss.max_tx_vq = nb_queues;
376 	rss.hash_key_length = VIRTIO_NET_RSS_KEY_SIZE;
377 	memcpy(rss.hash_key_data, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
378 
379 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
380 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_RSS_CONFIG;
381 	memcpy(ctrl.data, &rss, sizeof(rss));
382 
383 	dlen = sizeof(rss);
384 
385 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
386 	if (ret) {
387 		PMD_INIT_LOG(ERR, "RSS multiqueue configured but send command failed");
388 		return -EINVAL;
389 	}
390 
391 	return 0;
392 }
393 
394 static int
395 virtio_set_multiple_queues_auto(struct rte_eth_dev *dev, uint16_t nb_queues)
396 {
397 	struct virtio_hw *hw = dev->data->dev_private;
398 	struct virtio_pmd_ctrl ctrl;
399 	int dlen;
400 	int ret;
401 
402 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
403 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
404 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
405 
406 	dlen = sizeof(uint16_t);
407 
408 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
409 	if (ret) {
410 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
411 			  "failed, this is too late now...");
412 		return -EINVAL;
413 	}
414 
415 	return 0;
416 }
417 
418 static int
419 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
420 {
421 	struct virtio_hw *hw = dev->data->dev_private;
422 
423 	if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
424 		return virtio_set_multiple_queues_rss(dev, nb_queues);
425 	else
426 		return virtio_set_multiple_queues_auto(dev, nb_queues);
427 }
428 
429 static uint16_t
430 virtio_get_nr_vq(struct virtio_hw *hw)
431 {
432 	uint16_t nr_vq = hw->max_queue_pairs * 2;
433 
434 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
435 		nr_vq += 1;
436 
437 	return nr_vq;
438 }
439 
440 static void
441 virtio_init_vring(struct virtqueue *vq)
442 {
443 	int size = vq->vq_nentries;
444 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
445 
446 	PMD_INIT_FUNC_TRACE();
447 
448 	memset(ring_mem, 0, vq->vq_ring_size);
449 
450 	vq->vq_used_cons_idx = 0;
451 	vq->vq_desc_head_idx = 0;
452 	vq->vq_avail_idx = 0;
453 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
454 	vq->vq_free_cnt = vq->vq_nentries;
455 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
456 	if (virtio_with_packed_queue(vq->hw)) {
457 		vring_init_packed(&vq->vq_packed.ring, ring_mem,
458 				  VIRTIO_VRING_ALIGN, size);
459 		vring_desc_init_packed(vq, size);
460 	} else {
461 		struct vring *vr = &vq->vq_split.ring;
462 
463 		vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
464 		vring_desc_init_split(vr->desc, size);
465 	}
466 	/*
467 	 * Disable device(host) interrupting guest
468 	 */
469 	virtqueue_disable_intr(vq);
470 }
471 
472 static int
473 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
474 {
475 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
476 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
477 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
478 	unsigned int vq_size, size;
479 	struct virtio_hw *hw = dev->data->dev_private;
480 	struct virtnet_rx *rxvq = NULL;
481 	struct virtnet_tx *txvq = NULL;
482 	struct virtnet_ctl *cvq = NULL;
483 	struct virtqueue *vq;
484 	size_t sz_hdr_mz = 0;
485 	void *sw_ring = NULL;
486 	int queue_type = virtio_get_queue_type(hw, queue_idx);
487 	int ret;
488 	int numa_node = dev->device->numa_node;
489 	struct rte_mbuf *fake_mbuf = NULL;
490 
491 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
492 			queue_idx, numa_node);
493 
494 	/*
495 	 * Read the virtqueue size from the Queue Size field
496 	 * Always power of 2 and if 0 virtqueue does not exist
497 	 */
498 	vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
499 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
500 	if (vq_size == 0) {
501 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
502 		return -EINVAL;
503 	}
504 
505 	if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
506 		PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
507 		return -EINVAL;
508 	}
509 
510 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
511 		 dev->data->port_id, queue_idx);
512 
513 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
514 				vq_size * sizeof(struct vq_desc_extra),
515 				RTE_CACHE_LINE_SIZE);
516 	if (queue_type == VTNET_TQ) {
517 		/*
518 		 * For each xmit packet, allocate a virtio_net_hdr
519 		 * and indirect ring elements
520 		 */
521 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
522 	} else if (queue_type == VTNET_CQ) {
523 		/* Allocate a page for control vq command, data and status */
524 		sz_hdr_mz = rte_mem_page_size();
525 	}
526 
527 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
528 				numa_node);
529 	if (vq == NULL) {
530 		PMD_INIT_LOG(ERR, "can not allocate vq");
531 		return -ENOMEM;
532 	}
533 	hw->vqs[queue_idx] = vq;
534 
535 	vq->hw = hw;
536 	vq->vq_queue_index = queue_idx;
537 	vq->vq_nentries = vq_size;
538 	if (virtio_with_packed_queue(hw)) {
539 		vq->vq_packed.used_wrap_counter = 1;
540 		vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
541 		vq->vq_packed.event_flags_shadow = 0;
542 		if (queue_type == VTNET_RQ)
543 			vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
544 	}
545 
546 	/*
547 	 * Reserve a memzone for vring elements
548 	 */
549 	size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
550 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
551 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
552 		     size, vq->vq_ring_size);
553 
554 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
555 			numa_node, RTE_MEMZONE_IOVA_CONTIG,
556 			VIRTIO_VRING_ALIGN);
557 	if (mz == NULL) {
558 		if (rte_errno == EEXIST)
559 			mz = rte_memzone_lookup(vq_name);
560 		if (mz == NULL) {
561 			ret = -ENOMEM;
562 			goto free_vq;
563 		}
564 	}
565 
566 	memset(mz->addr, 0, mz->len);
567 
568 	if (hw->use_va)
569 		vq->vq_ring_mem = (uintptr_t)mz->addr;
570 	else
571 		vq->vq_ring_mem = mz->iova;
572 
573 	vq->vq_ring_virt_mem = mz->addr;
574 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
575 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
576 
577 	virtio_init_vring(vq);
578 
579 	if (sz_hdr_mz) {
580 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
581 			 dev->data->port_id, queue_idx);
582 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
583 				numa_node, RTE_MEMZONE_IOVA_CONTIG,
584 				RTE_CACHE_LINE_SIZE);
585 		if (hdr_mz == NULL) {
586 			if (rte_errno == EEXIST)
587 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
588 			if (hdr_mz == NULL) {
589 				ret = -ENOMEM;
590 				goto free_mz;
591 			}
592 		}
593 	}
594 
595 	if (queue_type == VTNET_RQ) {
596 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
597 			       sizeof(vq->sw_ring[0]);
598 
599 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
600 				RTE_CACHE_LINE_SIZE, numa_node);
601 		if (!sw_ring) {
602 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
603 			ret = -ENOMEM;
604 			goto free_hdr_mz;
605 		}
606 
607 		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
608 				RTE_CACHE_LINE_SIZE, numa_node);
609 		if (!fake_mbuf) {
610 			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
611 			ret = -ENOMEM;
612 			goto free_sw_ring;
613 		}
614 
615 		vq->sw_ring = sw_ring;
616 		rxvq = &vq->rxq;
617 		rxvq->port_id = dev->data->port_id;
618 		rxvq->mz = mz;
619 		rxvq->fake_mbuf = fake_mbuf;
620 	} else if (queue_type == VTNET_TQ) {
621 		txvq = &vq->txq;
622 		txvq->port_id = dev->data->port_id;
623 		txvq->mz = mz;
624 		txvq->virtio_net_hdr_mz = hdr_mz;
625 		if (hw->use_va)
626 			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
627 		else
628 			txvq->virtio_net_hdr_mem = hdr_mz->iova;
629 	} else if (queue_type == VTNET_CQ) {
630 		cvq = &vq->cq;
631 		cvq->mz = mz;
632 		cvq->virtio_net_hdr_mz = hdr_mz;
633 		if (hw->use_va)
634 			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
635 		else
636 			cvq->virtio_net_hdr_mem = hdr_mz->iova;
637 		memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
638 
639 		hw->cvq = cvq;
640 	}
641 
642 	if (hw->use_va)
643 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
644 	else
645 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
646 
647 	if (queue_type == VTNET_TQ) {
648 		struct virtio_tx_region *txr;
649 		unsigned int i;
650 
651 		txr = hdr_mz->addr;
652 		memset(txr, 0, vq_size * sizeof(*txr));
653 		for (i = 0; i < vq_size; i++) {
654 			/* first indirect descriptor is always the tx header */
655 			if (!virtio_with_packed_queue(hw)) {
656 				struct vring_desc *start_dp = txr[i].tx_indir;
657 				vring_desc_init_split(start_dp,
658 						      RTE_DIM(txr[i].tx_indir));
659 				start_dp->addr = txvq->virtio_net_hdr_mem
660 					+ i * sizeof(*txr)
661 					+ offsetof(struct virtio_tx_region,
662 						   tx_hdr);
663 				start_dp->len = hw->vtnet_hdr_size;
664 				start_dp->flags = VRING_DESC_F_NEXT;
665 			} else {
666 				struct vring_packed_desc *start_dp =
667 					txr[i].tx_packed_indir;
668 				vring_desc_init_indirect_packed(start_dp,
669 				      RTE_DIM(txr[i].tx_packed_indir));
670 				start_dp->addr = txvq->virtio_net_hdr_mem
671 					+ i * sizeof(*txr)
672 					+ offsetof(struct virtio_tx_region,
673 						   tx_hdr);
674 				start_dp->len = hw->vtnet_hdr_size;
675 			}
676 		}
677 	}
678 
679 	if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
680 		PMD_INIT_LOG(ERR, "setup_queue failed");
681 		ret = -EINVAL;
682 		goto clean_vq;
683 	}
684 
685 	return 0;
686 
687 clean_vq:
688 	hw->cvq = NULL;
689 	rte_free(fake_mbuf);
690 free_sw_ring:
691 	rte_free(sw_ring);
692 free_hdr_mz:
693 	rte_memzone_free(hdr_mz);
694 free_mz:
695 	rte_memzone_free(mz);
696 free_vq:
697 	rte_free(vq);
698 	hw->vqs[queue_idx] = NULL;
699 
700 	return ret;
701 }
702 
703 static void
704 virtio_free_queues(struct virtio_hw *hw)
705 {
706 	uint16_t nr_vq = virtio_get_nr_vq(hw);
707 	struct virtqueue *vq;
708 	int queue_type;
709 	uint16_t i;
710 
711 	if (hw->vqs == NULL)
712 		return;
713 
714 	for (i = 0; i < nr_vq; i++) {
715 		vq = hw->vqs[i];
716 		if (!vq)
717 			continue;
718 
719 		queue_type = virtio_get_queue_type(hw, i);
720 		if (queue_type == VTNET_RQ) {
721 			rte_free(vq->rxq.fake_mbuf);
722 			rte_free(vq->sw_ring);
723 			rte_memzone_free(vq->rxq.mz);
724 		} else if (queue_type == VTNET_TQ) {
725 			rte_memzone_free(vq->txq.mz);
726 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
727 		} else {
728 			rte_memzone_free(vq->cq.mz);
729 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
730 		}
731 
732 		rte_free(vq);
733 		hw->vqs[i] = NULL;
734 	}
735 
736 	rte_free(hw->vqs);
737 	hw->vqs = NULL;
738 }
739 
740 static int
741 virtio_alloc_queues(struct rte_eth_dev *dev)
742 {
743 	struct virtio_hw *hw = dev->data->dev_private;
744 	uint16_t nr_vq = virtio_get_nr_vq(hw);
745 	uint16_t i;
746 	int ret;
747 
748 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
749 	if (!hw->vqs) {
750 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
751 		return -ENOMEM;
752 	}
753 
754 	for (i = 0; i < nr_vq; i++) {
755 		ret = virtio_init_queue(dev, i);
756 		if (ret < 0) {
757 			virtio_free_queues(hw);
758 			return ret;
759 		}
760 	}
761 
762 	return 0;
763 }
764 
765 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
766 
767 static void
768 virtio_free_rss(struct virtio_hw *hw)
769 {
770 	rte_free(hw->rss_key);
771 	hw->rss_key = NULL;
772 
773 	rte_free(hw->rss_reta);
774 	hw->rss_reta = NULL;
775 }
776 
777 int
778 virtio_dev_close(struct rte_eth_dev *dev)
779 {
780 	struct virtio_hw *hw = dev->data->dev_private;
781 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
782 
783 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
784 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
785 		return 0;
786 
787 	if (!hw->opened)
788 		return 0;
789 	hw->opened = 0;
790 
791 	/* reset the NIC */
792 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
793 		VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
794 	if (intr_conf->rxq)
795 		virtio_queues_unbind_intr(dev);
796 
797 	if (intr_conf->lsc || intr_conf->rxq) {
798 		virtio_intr_disable(dev);
799 		rte_intr_efd_disable(dev->intr_handle);
800 		rte_intr_vec_list_free(dev->intr_handle);
801 	}
802 
803 	virtio_reset(hw);
804 	virtio_dev_free_mbufs(dev);
805 	virtio_free_queues(hw);
806 	virtio_free_rss(hw);
807 
808 	return VIRTIO_OPS(hw)->dev_close(hw);
809 }
810 
811 static int
812 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
813 {
814 	struct virtio_hw *hw = dev->data->dev_private;
815 	struct virtio_pmd_ctrl ctrl;
816 	int dlen[1];
817 	int ret;
818 
819 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
820 		PMD_INIT_LOG(INFO, "host does not support rx control");
821 		return -ENOTSUP;
822 	}
823 
824 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
825 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
826 	ctrl.data[0] = 1;
827 	dlen[0] = 1;
828 
829 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
830 	if (ret) {
831 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
832 		return -EAGAIN;
833 	}
834 
835 	return 0;
836 }
837 
838 static int
839 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
840 {
841 	struct virtio_hw *hw = dev->data->dev_private;
842 	struct virtio_pmd_ctrl ctrl;
843 	int dlen[1];
844 	int ret;
845 
846 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
847 		PMD_INIT_LOG(INFO, "host does not support rx control");
848 		return -ENOTSUP;
849 	}
850 
851 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
852 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
853 	ctrl.data[0] = 0;
854 	dlen[0] = 1;
855 
856 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
857 	if (ret) {
858 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
859 		return -EAGAIN;
860 	}
861 
862 	return 0;
863 }
864 
865 static int
866 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
867 {
868 	struct virtio_hw *hw = dev->data->dev_private;
869 	struct virtio_pmd_ctrl ctrl;
870 	int dlen[1];
871 	int ret;
872 
873 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
874 		PMD_INIT_LOG(INFO, "host does not support rx control");
875 		return -ENOTSUP;
876 	}
877 
878 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
879 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
880 	ctrl.data[0] = 1;
881 	dlen[0] = 1;
882 
883 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
884 	if (ret) {
885 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
886 		return -EAGAIN;
887 	}
888 
889 	return 0;
890 }
891 
892 static int
893 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
894 {
895 	struct virtio_hw *hw = dev->data->dev_private;
896 	struct virtio_pmd_ctrl ctrl;
897 	int dlen[1];
898 	int ret;
899 
900 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
901 		PMD_INIT_LOG(INFO, "host does not support rx control");
902 		return -ENOTSUP;
903 	}
904 
905 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
906 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
907 	ctrl.data[0] = 0;
908 	dlen[0] = 1;
909 
910 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
911 	if (ret) {
912 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
913 		return -EAGAIN;
914 	}
915 
916 	return 0;
917 }
918 
919 uint16_t
920 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
921 {
922 	return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
923 }
924 
925 bool
926 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
927 			bool rx_scatter_enabled, const char **error)
928 {
929 	if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
930 		*error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
931 		return false;
932 	}
933 
934 	return true;
935 }
936 
937 static bool
938 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
939 				      uint16_t frame_size)
940 {
941 	struct virtio_hw *hw = dev->data->dev_private;
942 	struct virtnet_rx *rxvq;
943 	struct virtqueue *vq;
944 	unsigned int qidx;
945 	uint16_t buf_size;
946 	const char *error;
947 
948 	if (hw->vqs == NULL)
949 		return true;
950 
951 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
952 		vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX];
953 		if (vq == NULL)
954 			continue;
955 
956 		rxvq = &vq->rxq;
957 		if (rxvq->mpool == NULL)
958 			continue;
959 		buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
960 
961 		if (!virtio_rx_check_scatter(frame_size, buf_size,
962 					     hw->rx_ol_scatter, &error)) {
963 			PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
964 				     qidx, error);
965 			return false;
966 		}
967 	}
968 
969 	return true;
970 }
971 
972 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
973 static int
974 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
975 {
976 	struct virtio_hw *hw = dev->data->dev_private;
977 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
978 				 hw->vtnet_hdr_size;
979 	uint32_t frame_size = mtu + ether_hdr_len;
980 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
981 
982 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
983 
984 	if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
985 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
986 			RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
987 		return -EINVAL;
988 	}
989 
990 	if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
991 		PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
992 		return -EINVAL;
993 	}
994 
995 	hw->max_rx_pkt_len = frame_size;
996 
997 	return 0;
998 }
999 
1000 static int
1001 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1002 {
1003 	struct virtio_hw *hw = dev->data->dev_private;
1004 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1005 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1006 
1007 	virtqueue_enable_intr(vq);
1008 	virtio_mb(hw->weak_barriers);
1009 	return 0;
1010 }
1011 
1012 static int
1013 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1014 {
1015 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1016 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1017 
1018 	virtqueue_disable_intr(vq);
1019 	return 0;
1020 }
1021 
1022 /*
1023  * dev_ops for virtio, bare necessities for basic operation
1024  */
1025 static const struct eth_dev_ops virtio_eth_dev_ops = {
1026 	.dev_configure           = virtio_dev_configure,
1027 	.dev_start               = virtio_dev_start,
1028 	.dev_stop                = virtio_dev_stop,
1029 	.dev_close               = virtio_dev_close,
1030 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
1031 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
1032 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
1033 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
1034 	.mtu_set                 = virtio_mtu_set,
1035 	.dev_infos_get           = virtio_dev_info_get,
1036 	.stats_get               = virtio_dev_stats_get,
1037 	.xstats_get              = virtio_dev_xstats_get,
1038 	.xstats_get_names        = virtio_dev_xstats_get_names,
1039 	.stats_reset             = virtio_dev_stats_reset,
1040 	.xstats_reset            = virtio_dev_stats_reset,
1041 	.link_update             = virtio_dev_link_update,
1042 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
1043 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
1044 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
1045 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
1046 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
1047 	.rss_hash_update         = virtio_dev_rss_hash_update,
1048 	.rss_hash_conf_get       = virtio_dev_rss_hash_conf_get,
1049 	.reta_update             = virtio_dev_rss_reta_update,
1050 	.reta_query              = virtio_dev_rss_reta_query,
1051 	/* collect stats per queue */
1052 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1053 	.vlan_filter_set         = virtio_vlan_filter_set,
1054 	.mac_addr_add            = virtio_mac_addr_add,
1055 	.mac_addr_remove         = virtio_mac_addr_remove,
1056 	.mac_addr_set            = virtio_mac_addr_set,
1057 	.get_monitor_addr        = virtio_get_monitor_addr,
1058 };
1059 
1060 /*
1061  * dev_ops for virtio-user in secondary processes, as we just have
1062  * some limited supports currently.
1063  */
1064 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1065 	.dev_infos_get           = virtio_dev_info_get,
1066 	.stats_get               = virtio_dev_stats_get,
1067 	.xstats_get              = virtio_dev_xstats_get,
1068 	.xstats_get_names        = virtio_dev_xstats_get_names,
1069 	.stats_reset             = virtio_dev_stats_reset,
1070 	.xstats_reset            = virtio_dev_stats_reset,
1071 	/* collect stats per queue */
1072 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1073 };
1074 
1075 static void
1076 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1077 {
1078 	unsigned i;
1079 
1080 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1081 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1082 		if (txvq == NULL)
1083 			continue;
1084 
1085 		stats->opackets += txvq->stats.packets;
1086 		stats->obytes += txvq->stats.bytes;
1087 
1088 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1089 			stats->q_opackets[i] = txvq->stats.packets;
1090 			stats->q_obytes[i] = txvq->stats.bytes;
1091 		}
1092 	}
1093 
1094 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1095 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1096 		if (rxvq == NULL)
1097 			continue;
1098 
1099 		stats->ipackets += rxvq->stats.packets;
1100 		stats->ibytes += rxvq->stats.bytes;
1101 		stats->ierrors += rxvq->stats.errors;
1102 
1103 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1104 			stats->q_ipackets[i] = rxvq->stats.packets;
1105 			stats->q_ibytes[i] = rxvq->stats.bytes;
1106 		}
1107 	}
1108 
1109 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1110 }
1111 
1112 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1113 				       struct rte_eth_xstat_name *xstats_names,
1114 				       __rte_unused unsigned limit)
1115 {
1116 	unsigned i;
1117 	unsigned count = 0;
1118 	unsigned t;
1119 
1120 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1121 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1122 
1123 	if (xstats_names != NULL) {
1124 		/* Note: limit checked in rte_eth_xstats_names() */
1125 
1126 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1127 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1128 			if (rxvq == NULL)
1129 				continue;
1130 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1131 				snprintf(xstats_names[count].name,
1132 					sizeof(xstats_names[count].name),
1133 					"rx_q%u_%s", i,
1134 					rte_virtio_rxq_stat_strings[t].name);
1135 				count++;
1136 			}
1137 		}
1138 
1139 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1140 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
1141 			if (txvq == NULL)
1142 				continue;
1143 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1144 				snprintf(xstats_names[count].name,
1145 					sizeof(xstats_names[count].name),
1146 					"tx_q%u_%s", i,
1147 					rte_virtio_txq_stat_strings[t].name);
1148 				count++;
1149 			}
1150 		}
1151 		return count;
1152 	}
1153 	return nstats;
1154 }
1155 
1156 static int
1157 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1158 		      unsigned n)
1159 {
1160 	unsigned i;
1161 	unsigned count = 0;
1162 
1163 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1164 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1165 
1166 	if (n < nstats)
1167 		return nstats;
1168 
1169 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1170 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1171 
1172 		if (rxvq == NULL)
1173 			continue;
1174 
1175 		unsigned t;
1176 
1177 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1178 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1179 				rte_virtio_rxq_stat_strings[t].offset);
1180 			xstats[count].id = count;
1181 			count++;
1182 		}
1183 	}
1184 
1185 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1186 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1187 
1188 		if (txvq == NULL)
1189 			continue;
1190 
1191 		unsigned t;
1192 
1193 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1194 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
1195 				rte_virtio_txq_stat_strings[t].offset);
1196 			xstats[count].id = count;
1197 			count++;
1198 		}
1199 	}
1200 
1201 	return count;
1202 }
1203 
1204 static int
1205 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1206 {
1207 	virtio_update_stats(dev, stats);
1208 
1209 	return 0;
1210 }
1211 
1212 static int
1213 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1214 {
1215 	unsigned int i;
1216 
1217 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1218 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1219 		if (txvq == NULL)
1220 			continue;
1221 
1222 		txvq->stats.packets = 0;
1223 		txvq->stats.bytes = 0;
1224 		txvq->stats.multicast = 0;
1225 		txvq->stats.broadcast = 0;
1226 		memset(txvq->stats.size_bins, 0,
1227 		       sizeof(txvq->stats.size_bins[0]) * 8);
1228 	}
1229 
1230 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1231 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1232 		if (rxvq == NULL)
1233 			continue;
1234 
1235 		rxvq->stats.packets = 0;
1236 		rxvq->stats.bytes = 0;
1237 		rxvq->stats.errors = 0;
1238 		rxvq->stats.multicast = 0;
1239 		rxvq->stats.broadcast = 0;
1240 		memset(rxvq->stats.size_bins, 0,
1241 		       sizeof(rxvq->stats.size_bins[0]) * 8);
1242 	}
1243 
1244 	return 0;
1245 }
1246 
1247 static void
1248 virtio_set_hwaddr(struct virtio_hw *hw)
1249 {
1250 	virtio_write_dev_config(hw,
1251 			offsetof(struct virtio_net_config, mac),
1252 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1253 }
1254 
1255 static void
1256 virtio_get_hwaddr(struct virtio_hw *hw)
1257 {
1258 	if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1259 		virtio_read_dev_config(hw,
1260 			offsetof(struct virtio_net_config, mac),
1261 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1262 	} else {
1263 		rte_eth_random_addr(&hw->mac_addr[0]);
1264 		virtio_set_hwaddr(hw);
1265 	}
1266 }
1267 
1268 static int
1269 virtio_mac_table_set(struct virtio_hw *hw,
1270 		     const struct virtio_net_ctrl_mac *uc,
1271 		     const struct virtio_net_ctrl_mac *mc)
1272 {
1273 	struct virtio_pmd_ctrl ctrl;
1274 	int err, len[2];
1275 
1276 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1277 		PMD_DRV_LOG(INFO, "host does not support mac table");
1278 		return -1;
1279 	}
1280 
1281 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1282 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1283 
1284 	len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1285 	memcpy(ctrl.data, uc, len[0]);
1286 
1287 	len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1288 	memcpy(ctrl.data + len[0], mc, len[1]);
1289 
1290 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1291 	if (err != 0)
1292 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1293 	return err;
1294 }
1295 
1296 static int
1297 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1298 		    uint32_t index, uint32_t vmdq __rte_unused)
1299 {
1300 	struct virtio_hw *hw = dev->data->dev_private;
1301 	const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1302 	unsigned int i;
1303 	struct virtio_net_ctrl_mac *uc, *mc;
1304 
1305 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1306 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1307 		return -EINVAL;
1308 	}
1309 
1310 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1311 		sizeof(uc->entries));
1312 	uc->entries = 0;
1313 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1314 		sizeof(mc->entries));
1315 	mc->entries = 0;
1316 
1317 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1318 		const struct rte_ether_addr *addr
1319 			= (i == index) ? mac_addr : addrs + i;
1320 		struct virtio_net_ctrl_mac *tbl
1321 			= rte_is_multicast_ether_addr(addr) ? mc : uc;
1322 
1323 		memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1324 	}
1325 
1326 	return virtio_mac_table_set(hw, uc, mc);
1327 }
1328 
1329 static void
1330 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1331 {
1332 	struct virtio_hw *hw = dev->data->dev_private;
1333 	struct rte_ether_addr *addrs = dev->data->mac_addrs;
1334 	struct virtio_net_ctrl_mac *uc, *mc;
1335 	unsigned int i;
1336 
1337 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1338 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1339 		return;
1340 	}
1341 
1342 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1343 		sizeof(uc->entries));
1344 	uc->entries = 0;
1345 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1346 		sizeof(mc->entries));
1347 	mc->entries = 0;
1348 
1349 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1350 		struct virtio_net_ctrl_mac *tbl;
1351 
1352 		if (i == index || rte_is_zero_ether_addr(addrs + i))
1353 			continue;
1354 
1355 		tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1356 		memcpy(&tbl->macs[tbl->entries++], addrs + i,
1357 			RTE_ETHER_ADDR_LEN);
1358 	}
1359 
1360 	virtio_mac_table_set(hw, uc, mc);
1361 }
1362 
1363 static int
1364 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1365 {
1366 	struct virtio_hw *hw = dev->data->dev_private;
1367 
1368 	memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1369 
1370 	/* Use atomic update if available */
1371 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1372 		struct virtio_pmd_ctrl ctrl;
1373 		int len = RTE_ETHER_ADDR_LEN;
1374 
1375 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1376 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1377 
1378 		memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1379 		return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1380 	}
1381 
1382 	if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1383 		return -ENOTSUP;
1384 
1385 	virtio_set_hwaddr(hw);
1386 	return 0;
1387 }
1388 
1389 #define CLB_VAL_IDX 0
1390 #define CLB_MSK_IDX 1
1391 #define CLB_MATCH_IDX 2
1392 static int
1393 virtio_monitor_callback(const uint64_t value,
1394 		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1395 {
1396 	const uint64_t m = opaque[CLB_MSK_IDX];
1397 	const uint64_t v = opaque[CLB_VAL_IDX];
1398 	const uint64_t c = opaque[CLB_MATCH_IDX];
1399 
1400 	if (c)
1401 		return (value & m) == v ? -1 : 0;
1402 	else
1403 		return (value & m) == v ? 0 : -1;
1404 }
1405 
1406 static int
1407 virtio_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1408 {
1409 	struct virtnet_rx *rxvq = rx_queue;
1410 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1411 	struct virtio_hw *hw;
1412 
1413 	if (vq == NULL)
1414 		return -EINVAL;
1415 
1416 	hw = vq->hw;
1417 	if (virtio_with_packed_queue(hw)) {
1418 		struct vring_packed_desc *desc;
1419 		desc = vq->vq_packed.ring.desc;
1420 		pmc->addr = &desc[vq->vq_used_cons_idx].flags;
1421 		if (vq->vq_packed.used_wrap_counter)
1422 			pmc->opaque[CLB_VAL_IDX] =
1423 						VRING_PACKED_DESC_F_AVAIL_USED;
1424 		else
1425 			pmc->opaque[CLB_VAL_IDX] = 0;
1426 		pmc->opaque[CLB_MSK_IDX] = VRING_PACKED_DESC_F_AVAIL_USED;
1427 		pmc->opaque[CLB_MATCH_IDX] = 1;
1428 		pmc->size = sizeof(desc[vq->vq_used_cons_idx].flags);
1429 	} else {
1430 		pmc->addr = &vq->vq_split.ring.used->idx;
1431 		pmc->opaque[CLB_VAL_IDX] = vq->vq_used_cons_idx
1432 					& (vq->vq_nentries - 1);
1433 		pmc->opaque[CLB_MSK_IDX] = vq->vq_nentries - 1;
1434 		pmc->opaque[CLB_MATCH_IDX] = 0;
1435 		pmc->size = sizeof(vq->vq_split.ring.used->idx);
1436 	}
1437 	pmc->fn = virtio_monitor_callback;
1438 
1439 	return 0;
1440 }
1441 
1442 static int
1443 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1444 {
1445 	struct virtio_hw *hw = dev->data->dev_private;
1446 	struct virtio_pmd_ctrl ctrl;
1447 	int len;
1448 
1449 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1450 		return -ENOTSUP;
1451 
1452 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1453 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1454 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1455 	len = sizeof(vlan_id);
1456 
1457 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1458 }
1459 
1460 static int
1461 virtio_intr_unmask(struct rte_eth_dev *dev)
1462 {
1463 	struct virtio_hw *hw = dev->data->dev_private;
1464 
1465 	if (rte_intr_ack(dev->intr_handle) < 0)
1466 		return -1;
1467 
1468 	if (VIRTIO_OPS(hw)->intr_detect)
1469 		VIRTIO_OPS(hw)->intr_detect(hw);
1470 
1471 	return 0;
1472 }
1473 
1474 static int
1475 virtio_intr_enable(struct rte_eth_dev *dev)
1476 {
1477 	struct virtio_hw *hw = dev->data->dev_private;
1478 
1479 	if (rte_intr_enable(dev->intr_handle) < 0)
1480 		return -1;
1481 
1482 	if (VIRTIO_OPS(hw)->intr_detect)
1483 		VIRTIO_OPS(hw)->intr_detect(hw);
1484 
1485 	return 0;
1486 }
1487 
1488 static int
1489 virtio_intr_disable(struct rte_eth_dev *dev)
1490 {
1491 	struct virtio_hw *hw = dev->data->dev_private;
1492 
1493 	if (rte_intr_disable(dev->intr_handle) < 0)
1494 		return -1;
1495 
1496 	if (VIRTIO_OPS(hw)->intr_detect)
1497 		VIRTIO_OPS(hw)->intr_detect(hw);
1498 
1499 	return 0;
1500 }
1501 
1502 static int
1503 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1504 {
1505 	uint64_t host_features;
1506 
1507 	/* Prepare guest_features: feature that driver wants to support */
1508 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1509 		req_features);
1510 
1511 	/* Read device(host) feature bits */
1512 	host_features = VIRTIO_OPS(hw)->get_features(hw);
1513 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1514 		host_features);
1515 
1516 	/* If supported, ensure MTU value is valid before acknowledging it. */
1517 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1518 		struct virtio_net_config config;
1519 
1520 		virtio_read_dev_config(hw,
1521 			offsetof(struct virtio_net_config, mtu),
1522 			&config.mtu, sizeof(config.mtu));
1523 
1524 		if (config.mtu < RTE_ETHER_MIN_MTU)
1525 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1526 	}
1527 
1528 	/*
1529 	 * Negotiate features: Subset of device feature bits are written back
1530 	 * guest feature bits.
1531 	 */
1532 	hw->guest_features = req_features;
1533 	hw->guest_features = virtio_negotiate_features(hw, host_features);
1534 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1535 		hw->guest_features);
1536 
1537 	if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1538 		return -1;
1539 
1540 	if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1541 		virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1542 
1543 		if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1544 			PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1545 			return -1;
1546 		}
1547 	}
1548 
1549 	hw->req_guest_features = req_features;
1550 
1551 	return 0;
1552 }
1553 
1554 int
1555 virtio_dev_pause(struct rte_eth_dev *dev)
1556 {
1557 	struct virtio_hw *hw = dev->data->dev_private;
1558 
1559 	rte_spinlock_lock(&hw->state_lock);
1560 
1561 	if (hw->started == 0) {
1562 		/* Device is just stopped. */
1563 		rte_spinlock_unlock(&hw->state_lock);
1564 		return -1;
1565 	}
1566 	hw->started = 0;
1567 	/*
1568 	 * Prevent the worker threads from touching queues to avoid contention,
1569 	 * 1 ms should be enough for the ongoing Tx function to finish.
1570 	 */
1571 	rte_delay_ms(1);
1572 	return 0;
1573 }
1574 
1575 /*
1576  * Recover hw state to let the worker threads continue.
1577  */
1578 void
1579 virtio_dev_resume(struct rte_eth_dev *dev)
1580 {
1581 	struct virtio_hw *hw = dev->data->dev_private;
1582 
1583 	hw->started = 1;
1584 	rte_spinlock_unlock(&hw->state_lock);
1585 }
1586 
1587 /*
1588  * Should be called only after device is paused.
1589  */
1590 int
1591 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1592 		int nb_pkts)
1593 {
1594 	struct virtio_hw *hw = dev->data->dev_private;
1595 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1596 	int ret;
1597 
1598 	hw->inject_pkts = tx_pkts;
1599 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1600 	hw->inject_pkts = NULL;
1601 
1602 	return ret;
1603 }
1604 
1605 static void
1606 virtio_notify_peers(struct rte_eth_dev *dev)
1607 {
1608 	struct virtio_hw *hw = dev->data->dev_private;
1609 	struct virtnet_rx *rxvq;
1610 	struct rte_mbuf *rarp_mbuf;
1611 
1612 	if (!dev->data->rx_queues)
1613 		return;
1614 
1615 	rxvq = dev->data->rx_queues[0];
1616 	if (!rxvq)
1617 		return;
1618 
1619 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1620 			(struct rte_ether_addr *)hw->mac_addr);
1621 	if (rarp_mbuf == NULL) {
1622 		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1623 		return;
1624 	}
1625 
1626 	/* If virtio port just stopped, no need to send RARP */
1627 	if (virtio_dev_pause(dev) < 0) {
1628 		rte_pktmbuf_free(rarp_mbuf);
1629 		return;
1630 	}
1631 
1632 	virtio_inject_pkts(dev, &rarp_mbuf, 1);
1633 	virtio_dev_resume(dev);
1634 }
1635 
1636 static void
1637 virtio_ack_link_announce(struct rte_eth_dev *dev)
1638 {
1639 	struct virtio_hw *hw = dev->data->dev_private;
1640 	struct virtio_pmd_ctrl ctrl;
1641 
1642 	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1643 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1644 
1645 	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1646 }
1647 
1648 /*
1649  * Process virtio config changed interrupt. Call the callback
1650  * if link state changed, generate gratuitous RARP packet if
1651  * the status indicates an ANNOUNCE.
1652  */
1653 void
1654 virtio_interrupt_handler(void *param)
1655 {
1656 	struct rte_eth_dev *dev = param;
1657 	struct virtio_hw *hw = dev->data->dev_private;
1658 	uint8_t isr;
1659 	uint16_t status;
1660 
1661 	/* Read interrupt status which clears interrupt */
1662 	isr = virtio_get_isr(hw);
1663 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1664 
1665 	if (virtio_intr_unmask(dev) < 0)
1666 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1667 
1668 	if (isr & VIRTIO_ISR_CONFIG) {
1669 		if (virtio_dev_link_update(dev, 0) == 0)
1670 			rte_eth_dev_callback_process(dev,
1671 						     RTE_ETH_EVENT_INTR_LSC,
1672 						     NULL);
1673 
1674 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1675 			virtio_read_dev_config(hw,
1676 				offsetof(struct virtio_net_config, status),
1677 				&status, sizeof(status));
1678 			if (status & VIRTIO_NET_S_ANNOUNCE) {
1679 				virtio_notify_peers(dev);
1680 				if (hw->cvq)
1681 					virtio_ack_link_announce(dev);
1682 			}
1683 		}
1684 	}
1685 }
1686 
1687 /* set rx and tx handlers according to what is supported */
1688 static void
1689 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1690 {
1691 	struct virtio_hw *hw = eth_dev->data->dev_private;
1692 
1693 	eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1694 	if (virtio_with_packed_queue(hw)) {
1695 		PMD_INIT_LOG(INFO,
1696 			"virtio: using packed ring %s Tx path on port %u",
1697 			hw->use_vec_tx ? "vectorized" : "standard",
1698 			eth_dev->data->port_id);
1699 		if (hw->use_vec_tx)
1700 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1701 		else
1702 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1703 	} else {
1704 		if (hw->use_inorder_tx) {
1705 			PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1706 				eth_dev->data->port_id);
1707 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1708 		} else {
1709 			PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1710 				eth_dev->data->port_id);
1711 			eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1712 		}
1713 	}
1714 
1715 	if (virtio_with_packed_queue(hw)) {
1716 		if (hw->use_vec_rx) {
1717 			PMD_INIT_LOG(INFO,
1718 				"virtio: using packed ring vectorized Rx path on port %u",
1719 				eth_dev->data->port_id);
1720 			eth_dev->rx_pkt_burst =
1721 				&virtio_recv_pkts_packed_vec;
1722 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1723 			PMD_INIT_LOG(INFO,
1724 				"virtio: using packed ring mergeable buffer Rx path on port %u",
1725 				eth_dev->data->port_id);
1726 			eth_dev->rx_pkt_burst =
1727 				&virtio_recv_mergeable_pkts_packed;
1728 		} else {
1729 			PMD_INIT_LOG(INFO,
1730 				"virtio: using packed ring standard Rx path on port %u",
1731 				eth_dev->data->port_id);
1732 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1733 		}
1734 	} else {
1735 		if (hw->use_vec_rx) {
1736 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1737 				eth_dev->data->port_id);
1738 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1739 		} else if (hw->use_inorder_rx) {
1740 			PMD_INIT_LOG(INFO,
1741 				"virtio: using inorder Rx path on port %u",
1742 				eth_dev->data->port_id);
1743 			eth_dev->rx_pkt_burst =	&virtio_recv_pkts_inorder;
1744 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1745 			PMD_INIT_LOG(INFO,
1746 				"virtio: using mergeable buffer Rx path on port %u",
1747 				eth_dev->data->port_id);
1748 			eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1749 		} else {
1750 			PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1751 				eth_dev->data->port_id);
1752 			eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1753 		}
1754 	}
1755 
1756 }
1757 
1758 /* Only support 1:1 queue/interrupt mapping so far.
1759  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1760  * interrupt vectors (<N+1).
1761  */
1762 static int
1763 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1764 {
1765 	uint32_t i;
1766 	struct virtio_hw *hw = dev->data->dev_private;
1767 
1768 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1769 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1770 		if (rte_intr_vec_list_index_set(dev->intr_handle, i,
1771 						       i + 1))
1772 			return -rte_errno;
1773 		if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1774 						 VIRTIO_MSI_NO_VECTOR) {
1775 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1776 			return -EBUSY;
1777 		}
1778 	}
1779 
1780 	return 0;
1781 }
1782 
1783 static void
1784 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1785 {
1786 	uint32_t i;
1787 	struct virtio_hw *hw = dev->data->dev_private;
1788 
1789 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1790 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1791 		VIRTIO_OPS(hw)->set_queue_irq(hw,
1792 					     hw->vqs[i * VTNET_CQ],
1793 					     VIRTIO_MSI_NO_VECTOR);
1794 }
1795 
1796 static int
1797 virtio_configure_intr(struct rte_eth_dev *dev)
1798 {
1799 	struct virtio_hw *hw = dev->data->dev_private;
1800 
1801 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1802 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1803 		return -ENOTSUP;
1804 	}
1805 
1806 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1807 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1808 		return -1;
1809 	}
1810 
1811 	if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
1812 				    hw->max_queue_pairs)) {
1813 		PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1814 			     hw->max_queue_pairs);
1815 		return -ENOMEM;
1816 	}
1817 
1818 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1819 		/* Re-register callback to update max_intr */
1820 		rte_intr_callback_unregister(dev->intr_handle,
1821 					     virtio_interrupt_handler,
1822 					     dev);
1823 		rte_intr_callback_register(dev->intr_handle,
1824 					   virtio_interrupt_handler,
1825 					   dev);
1826 	}
1827 
1828 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1829 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1830 	 * And for legacy devices, this should be done before queue/vec binding
1831 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1832 	 * (22) will be ignored.
1833 	 */
1834 	if (virtio_intr_enable(dev) < 0) {
1835 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1836 		return -1;
1837 	}
1838 
1839 	if (virtio_queues_bind_intr(dev) < 0) {
1840 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1841 		return -1;
1842 	}
1843 
1844 	return 0;
1845 }
1846 
1847 static void
1848 virtio_get_speed_duplex(struct rte_eth_dev *eth_dev,
1849 			struct rte_eth_link *link)
1850 {
1851 	struct virtio_hw *hw = eth_dev->data->dev_private;
1852 	struct virtio_net_config *config;
1853 	struct virtio_net_config local_config;
1854 
1855 	config = &local_config;
1856 	virtio_read_dev_config(hw,
1857 		offsetof(struct virtio_net_config, speed),
1858 		&config->speed, sizeof(config->speed));
1859 	virtio_read_dev_config(hw,
1860 		offsetof(struct virtio_net_config, duplex),
1861 		&config->duplex, sizeof(config->duplex));
1862 	hw->speed = config->speed;
1863 	hw->duplex = config->duplex;
1864 	if (link != NULL) {
1865 		link->link_duplex = hw->duplex;
1866 		link->link_speed  = hw->speed;
1867 	}
1868 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1869 		     hw->speed, hw->duplex);
1870 }
1871 
1872 static uint64_t
1873 ethdev_to_virtio_rss_offloads(uint64_t ethdev_hash_types)
1874 {
1875 	uint64_t virtio_hash_types = 0;
1876 
1877 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1878 				RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1879 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV4;
1880 
1881 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1882 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV4;
1883 
1884 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1885 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV4;
1886 
1887 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1888 				RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1889 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV6;
1890 
1891 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1892 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV6;
1893 
1894 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1895 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV6;
1896 
1897 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_EX)
1898 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IP_EX;
1899 
1900 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_TCP_EX)
1901 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCP_EX;
1902 
1903 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_UDP_EX)
1904 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDP_EX;
1905 
1906 	return virtio_hash_types;
1907 }
1908 
1909 static uint64_t
1910 virtio_to_ethdev_rss_offloads(uint64_t virtio_hash_types)
1911 {
1912 	uint64_t rss_offloads = 0;
1913 
1914 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV4)
1915 		rss_offloads |= RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1916 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER;
1917 
1918 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV4)
1919 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
1920 
1921 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV4)
1922 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
1923 
1924 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV6)
1925 		rss_offloads |= RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1926 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
1927 
1928 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV6)
1929 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
1930 
1931 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV6)
1932 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
1933 
1934 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IP_EX)
1935 		rss_offloads |= RTE_ETH_RSS_IPV6_EX;
1936 
1937 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCP_EX)
1938 		rss_offloads |= RTE_ETH_RSS_IPV6_TCP_EX;
1939 
1940 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDP_EX)
1941 		rss_offloads |= RTE_ETH_RSS_IPV6_UDP_EX;
1942 
1943 	return rss_offloads;
1944 }
1945 
1946 static int
1947 virtio_dev_get_rss_config(struct virtio_hw *hw, uint32_t *rss_hash_types)
1948 {
1949 	struct virtio_net_config local_config;
1950 	struct virtio_net_config *config = &local_config;
1951 
1952 	virtio_read_dev_config(hw,
1953 			offsetof(struct virtio_net_config, rss_max_key_size),
1954 			&config->rss_max_key_size,
1955 			sizeof(config->rss_max_key_size));
1956 	if (config->rss_max_key_size < VIRTIO_NET_RSS_KEY_SIZE) {
1957 		PMD_INIT_LOG(ERR, "Invalid device RSS max key size (%u)",
1958 				config->rss_max_key_size);
1959 		return -EINVAL;
1960 	}
1961 
1962 	virtio_read_dev_config(hw,
1963 			offsetof(struct virtio_net_config,
1964 				rss_max_indirection_table_length),
1965 			&config->rss_max_indirection_table_length,
1966 			sizeof(config->rss_max_indirection_table_length));
1967 	if (config->rss_max_indirection_table_length < VIRTIO_NET_RSS_RETA_SIZE) {
1968 		PMD_INIT_LOG(ERR, "Invalid device RSS max reta size (%u)",
1969 				config->rss_max_indirection_table_length);
1970 		return -EINVAL;
1971 	}
1972 
1973 	virtio_read_dev_config(hw,
1974 			offsetof(struct virtio_net_config, supported_hash_types),
1975 			&config->supported_hash_types,
1976 			sizeof(config->supported_hash_types));
1977 	if ((config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK) == 0) {
1978 		PMD_INIT_LOG(ERR, "Invalid device RSS hash types (0x%x)",
1979 				config->supported_hash_types);
1980 		return -EINVAL;
1981 	}
1982 
1983 	*rss_hash_types = config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
1984 
1985 	PMD_INIT_LOG(DEBUG, "Device RSS config:");
1986 	PMD_INIT_LOG(DEBUG, "\t-Max key size: %u", config->rss_max_key_size);
1987 	PMD_INIT_LOG(DEBUG, "\t-Max reta size: %u", config->rss_max_indirection_table_length);
1988 	PMD_INIT_LOG(DEBUG, "\t-Supported hash types: 0x%x", *rss_hash_types);
1989 
1990 	return 0;
1991 }
1992 
1993 static int
1994 virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
1995 		struct rte_eth_rss_conf *rss_conf)
1996 {
1997 	struct virtio_hw *hw = dev->data->dev_private;
1998 	char old_rss_key[VIRTIO_NET_RSS_KEY_SIZE];
1999 	uint32_t old_hash_types;
2000 	uint16_t nb_queues;
2001 	int ret;
2002 
2003 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2004 		return -ENOTSUP;
2005 
2006 	if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(VIRTIO_NET_HASH_TYPE_MASK))
2007 		return -EINVAL;
2008 
2009 	old_hash_types = hw->rss_hash_types;
2010 	hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2011 
2012 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2013 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2014 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2015 					VIRTIO_NET_RSS_KEY_SIZE);
2016 			ret = -EINVAL;
2017 			goto restore_types;
2018 		}
2019 		memcpy(old_rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2020 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2021 	}
2022 
2023 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2024 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2025 	if (ret < 0) {
2026 		PMD_INIT_LOG(ERR, "Failed to apply new RSS config to the device");
2027 		goto restore_key;
2028 	}
2029 
2030 	return 0;
2031 restore_key:
2032 	if (rss_conf->rss_key && rss_conf->rss_key_len)
2033 		memcpy(hw->rss_key, old_rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2034 restore_types:
2035 	hw->rss_hash_types = old_hash_types;
2036 
2037 	return ret;
2038 }
2039 
2040 static int
2041 virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2042 		struct rte_eth_rss_conf *rss_conf)
2043 {
2044 	struct virtio_hw *hw = dev->data->dev_private;
2045 
2046 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2047 		return -ENOTSUP;
2048 
2049 	if (rss_conf->rss_key && rss_conf->rss_key_len >= VIRTIO_NET_RSS_KEY_SIZE)
2050 		memcpy(rss_conf->rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2051 	rss_conf->rss_key_len = VIRTIO_NET_RSS_KEY_SIZE;
2052 	rss_conf->rss_hf = virtio_to_ethdev_rss_offloads(hw->rss_hash_types);
2053 
2054 	return 0;
2055 }
2056 
2057 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
2058 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2059 			 uint16_t reta_size)
2060 {
2061 	struct virtio_hw *hw = dev->data->dev_private;
2062 	uint16_t nb_queues;
2063 	uint16_t old_reta[VIRTIO_NET_RSS_RETA_SIZE];
2064 	int idx, pos, i, ret;
2065 
2066 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2067 		return -ENOTSUP;
2068 
2069 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2070 		return -EINVAL;
2071 
2072 	memcpy(old_reta, hw->rss_reta, sizeof(old_reta));
2073 
2074 	for (i = 0; i < reta_size; i++) {
2075 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2076 		pos = i % RTE_ETH_RETA_GROUP_SIZE;
2077 
2078 		if (((reta_conf[idx].mask >> pos) & 0x1) == 0)
2079 			continue;
2080 
2081 		hw->rss_reta[i] = reta_conf[idx].reta[pos];
2082 	}
2083 
2084 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2085 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2086 	if (ret < 0) {
2087 		PMD_INIT_LOG(ERR, "Failed to apply new RETA to the device");
2088 		memcpy(hw->rss_reta, old_reta, sizeof(old_reta));
2089 	}
2090 
2091 	hw->rss_rx_queues = dev->data->nb_rx_queues;
2092 
2093 	return ret;
2094 }
2095 
2096 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
2097 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2098 			 uint16_t reta_size)
2099 {
2100 	struct virtio_hw *hw = dev->data->dev_private;
2101 	int idx, i;
2102 
2103 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2104 		return -ENOTSUP;
2105 
2106 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2107 		return -EINVAL;
2108 
2109 	for (i = 0; i < reta_size; i++) {
2110 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2111 		reta_conf[idx].reta[i % RTE_ETH_RETA_GROUP_SIZE] = hw->rss_reta[i];
2112 	}
2113 
2114 	return 0;
2115 }
2116 
2117 /*
2118  * As default RSS hash key, it uses the default key of the
2119  * Intel IXGBE devices. It can be updated by the application
2120  * with any 40B key value.
2121  */
2122 static uint8_t rss_intel_key[VIRTIO_NET_RSS_KEY_SIZE] = {
2123 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2124 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2125 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2126 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2127 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2128 };
2129 
2130 static int
2131 virtio_dev_rss_init(struct rte_eth_dev *eth_dev)
2132 {
2133 	struct virtio_hw *hw = eth_dev->data->dev_private;
2134 	uint16_t nb_rx_queues = eth_dev->data->nb_rx_queues;
2135 	struct rte_eth_rss_conf *rss_conf;
2136 	int ret, i;
2137 
2138 	if (!nb_rx_queues) {
2139 		PMD_INIT_LOG(ERR, "Cannot init RSS if no Rx queues");
2140 		return -EINVAL;
2141 	}
2142 
2143 	rss_conf = &eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
2144 
2145 	ret = virtio_dev_get_rss_config(hw, &hw->rss_hash_types);
2146 	if (ret)
2147 		return ret;
2148 
2149 	if (rss_conf->rss_hf) {
2150 		/*  Ensure requested hash types are supported by the device */
2151 		if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(hw->rss_hash_types))
2152 			return -EINVAL;
2153 
2154 		hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2155 	}
2156 
2157 	if (!hw->rss_key) {
2158 		/* Setup default RSS key if not already setup by the user */
2159 		hw->rss_key = rte_malloc_socket("rss_key",
2160 				VIRTIO_NET_RSS_KEY_SIZE, 0,
2161 				eth_dev->device->numa_node);
2162 		if (!hw->rss_key) {
2163 			PMD_INIT_LOG(ERR, "Failed to allocate RSS key");
2164 			return -1;
2165 		}
2166 	}
2167 
2168 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2169 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2170 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2171 					VIRTIO_NET_RSS_KEY_SIZE);
2172 			return -EINVAL;
2173 		}
2174 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2175 	} else {
2176 		memcpy(hw->rss_key, rss_intel_key, VIRTIO_NET_RSS_KEY_SIZE);
2177 	}
2178 
2179 	if (!hw->rss_reta) {
2180 		/* Setup default RSS reta if not already setup by the user */
2181 		hw->rss_reta = rte_zmalloc_socket("rss_reta",
2182 				VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t), 0,
2183 				eth_dev->device->numa_node);
2184 		if (!hw->rss_reta) {
2185 			PMD_INIT_LOG(ERR, "Failed to allocate RSS reta");
2186 			return -1;
2187 		}
2188 
2189 		hw->rss_rx_queues = 0;
2190 	}
2191 
2192 	/* Re-initialize the RSS reta if the number of RX queues has changed */
2193 	if (hw->rss_rx_queues != nb_rx_queues) {
2194 		for (i = 0; i < VIRTIO_NET_RSS_RETA_SIZE; i++)
2195 			hw->rss_reta[i] = i % nb_rx_queues;
2196 		hw->rss_rx_queues = nb_rx_queues;
2197 	}
2198 
2199 	return 0;
2200 }
2201 
2202 #define DUPLEX_UNKNOWN   0xff
2203 /* reset device and renegotiate features if needed */
2204 static int
2205 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
2206 {
2207 	struct virtio_hw *hw = eth_dev->data->dev_private;
2208 	struct virtio_net_config *config;
2209 	struct virtio_net_config local_config;
2210 	int ret;
2211 
2212 	/* Reset the device although not necessary at startup */
2213 	virtio_reset(hw);
2214 
2215 	if (hw->vqs) {
2216 		virtio_dev_free_mbufs(eth_dev);
2217 		virtio_free_queues(hw);
2218 	}
2219 
2220 	/* Tell the host we've noticed this device. */
2221 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
2222 
2223 	/* Tell the host we've known how to drive the device. */
2224 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
2225 	if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
2226 		return -1;
2227 
2228 	hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
2229 
2230 	/* If host does not support both status and MSI-X then disable LSC */
2231 	if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
2232 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
2233 	else
2234 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2235 
2236 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2237 
2238 	/* Setting up rx_header size for the device */
2239 	if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
2240 	    virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2241 	    virtio_with_packed_queue(hw))
2242 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2243 	else
2244 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
2245 
2246 	/* Copy the permanent MAC address to: virtio_hw */
2247 	virtio_get_hwaddr(hw);
2248 	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
2249 			&eth_dev->data->mac_addrs[0]);
2250 	PMD_INIT_LOG(DEBUG,
2251 		     "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2252 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2253 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2254 
2255 	hw->get_speed_via_feat = hw->speed == RTE_ETH_SPEED_NUM_UNKNOWN &&
2256 			     virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX);
2257 	if (hw->get_speed_via_feat)
2258 		virtio_get_speed_duplex(eth_dev, NULL);
2259 	if (hw->duplex == DUPLEX_UNKNOWN)
2260 		hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
2261 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
2262 		hw->speed, hw->duplex);
2263 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
2264 		config = &local_config;
2265 
2266 		virtio_read_dev_config(hw,
2267 			offsetof(struct virtio_net_config, mac),
2268 			&config->mac, sizeof(config->mac));
2269 
2270 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2271 			virtio_read_dev_config(hw,
2272 				offsetof(struct virtio_net_config, status),
2273 				&config->status, sizeof(config->status));
2274 		} else {
2275 			PMD_INIT_LOG(DEBUG,
2276 				     "VIRTIO_NET_F_STATUS is not supported");
2277 			config->status = 0;
2278 		}
2279 
2280 		if (virtio_with_feature(hw, VIRTIO_NET_F_MQ) ||
2281 				virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2282 			virtio_read_dev_config(hw,
2283 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
2284 				&config->max_virtqueue_pairs,
2285 				sizeof(config->max_virtqueue_pairs));
2286 		} else {
2287 			PMD_INIT_LOG(DEBUG,
2288 				     "Neither VIRTIO_NET_F_MQ nor VIRTIO_NET_F_RSS are supported");
2289 			config->max_virtqueue_pairs = 1;
2290 		}
2291 
2292 		hw->max_queue_pairs = config->max_virtqueue_pairs;
2293 
2294 		if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
2295 			virtio_read_dev_config(hw,
2296 				offsetof(struct virtio_net_config, mtu),
2297 				&config->mtu,
2298 				sizeof(config->mtu));
2299 
2300 			/*
2301 			 * MTU value has already been checked at negotiation
2302 			 * time, but check again in case it has changed since
2303 			 * then, which should not happen.
2304 			 */
2305 			if (config->mtu < RTE_ETHER_MIN_MTU) {
2306 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
2307 						config->mtu);
2308 				return -1;
2309 			}
2310 
2311 			hw->max_mtu = config->mtu;
2312 			/* Set initial MTU to maximum one supported by vhost */
2313 			eth_dev->data->mtu = config->mtu;
2314 
2315 		} else {
2316 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2317 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
2318 		}
2319 
2320 		hw->rss_hash_types = 0;
2321 		if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2322 			if (virtio_dev_rss_init(eth_dev))
2323 				return -1;
2324 
2325 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
2326 				config->max_virtqueue_pairs);
2327 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
2328 		PMD_INIT_LOG(DEBUG,
2329 				"PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2330 				config->mac[0], config->mac[1],
2331 				config->mac[2], config->mac[3],
2332 				config->mac[4], config->mac[5]);
2333 	} else {
2334 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
2335 		hw->max_queue_pairs = 1;
2336 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2337 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
2338 	}
2339 
2340 	ret = virtio_alloc_queues(eth_dev);
2341 	if (ret < 0)
2342 		return ret;
2343 
2344 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
2345 		if (virtio_configure_intr(eth_dev) < 0) {
2346 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
2347 			virtio_free_queues(hw);
2348 			return -1;
2349 		}
2350 	}
2351 
2352 	virtio_reinit_complete(hw);
2353 
2354 	return 0;
2355 }
2356 
2357 /*
2358  * This function is based on probe() function in virtio_pci.c
2359  * It returns 0 on success.
2360  */
2361 int
2362 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
2363 {
2364 	struct virtio_hw *hw = eth_dev->data->dev_private;
2365 	uint32_t speed = RTE_ETH_SPEED_NUM_UNKNOWN;
2366 	int vectorized = 0;
2367 	int ret;
2368 
2369 	if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
2370 		PMD_INIT_LOG(ERR,
2371 			"Not sufficient headroom required = %d, avail = %d",
2372 			(int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
2373 			RTE_PKTMBUF_HEADROOM);
2374 
2375 		return -1;
2376 	}
2377 
2378 	eth_dev->dev_ops = &virtio_eth_dev_ops;
2379 
2380 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
2381 		set_rxtx_funcs(eth_dev);
2382 		return 0;
2383 	}
2384 
2385 	ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
2386 	if (ret < 0)
2387 		return ret;
2388 	hw->speed = speed;
2389 	hw->duplex = DUPLEX_UNKNOWN;
2390 
2391 	/* Allocate memory for storing MAC addresses */
2392 	eth_dev->data->mac_addrs = rte_zmalloc("virtio",
2393 				VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
2394 	if (eth_dev->data->mac_addrs == NULL) {
2395 		PMD_INIT_LOG(ERR,
2396 			"Failed to allocate %d bytes needed to store MAC addresses",
2397 			VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
2398 		return -ENOMEM;
2399 	}
2400 
2401 	rte_spinlock_init(&hw->state_lock);
2402 
2403 	/* reset device and negotiate default features */
2404 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
2405 	if (ret < 0)
2406 		goto err_virtio_init;
2407 
2408 	if (vectorized) {
2409 		if (!virtio_with_packed_queue(hw)) {
2410 			hw->use_vec_rx = 1;
2411 		} else {
2412 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
2413 			hw->use_vec_rx = 1;
2414 			hw->use_vec_tx = 1;
2415 #else
2416 			PMD_DRV_LOG(INFO,
2417 				"building environment do not support packed ring vectorized");
2418 #endif
2419 		}
2420 	}
2421 
2422 	hw->opened = 1;
2423 
2424 	return 0;
2425 
2426 err_virtio_init:
2427 	rte_free(eth_dev->data->mac_addrs);
2428 	eth_dev->data->mac_addrs = NULL;
2429 	return ret;
2430 }
2431 
2432 static uint32_t
2433 virtio_dev_speed_capa_get(uint32_t speed)
2434 {
2435 	switch (speed) {
2436 	case RTE_ETH_SPEED_NUM_10G:
2437 		return RTE_ETH_LINK_SPEED_10G;
2438 	case RTE_ETH_SPEED_NUM_20G:
2439 		return RTE_ETH_LINK_SPEED_20G;
2440 	case RTE_ETH_SPEED_NUM_25G:
2441 		return RTE_ETH_LINK_SPEED_25G;
2442 	case RTE_ETH_SPEED_NUM_40G:
2443 		return RTE_ETH_LINK_SPEED_40G;
2444 	case RTE_ETH_SPEED_NUM_50G:
2445 		return RTE_ETH_LINK_SPEED_50G;
2446 	case RTE_ETH_SPEED_NUM_56G:
2447 		return RTE_ETH_LINK_SPEED_56G;
2448 	case RTE_ETH_SPEED_NUM_100G:
2449 		return RTE_ETH_LINK_SPEED_100G;
2450 	case RTE_ETH_SPEED_NUM_200G:
2451 		return RTE_ETH_LINK_SPEED_200G;
2452 	default:
2453 		return 0;
2454 	}
2455 }
2456 
2457 static int vectorized_check_handler(__rte_unused const char *key,
2458 		const char *value, void *ret_val)
2459 {
2460 	if (strcmp(value, "1") == 0)
2461 		*(int *)ret_val = 1;
2462 	else
2463 		*(int *)ret_val = 0;
2464 
2465 	return 0;
2466 }
2467 
2468 #define VIRTIO_ARG_SPEED      "speed"
2469 #define VIRTIO_ARG_VECTORIZED "vectorized"
2470 
2471 static int
2472 link_speed_handler(const char *key __rte_unused,
2473 		const char *value, void *ret_val)
2474 {
2475 	uint32_t val;
2476 	if (!value || !ret_val)
2477 		return -EINVAL;
2478 	val = strtoul(value, NULL, 0);
2479 	/* validate input */
2480 	if (virtio_dev_speed_capa_get(val) == 0)
2481 		return -EINVAL;
2482 	*(uint32_t *)ret_val = val;
2483 
2484 	return 0;
2485 }
2486 
2487 
2488 static int
2489 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2490 {
2491 	struct rte_kvargs *kvlist;
2492 	int ret = 0;
2493 
2494 	if (devargs == NULL)
2495 		return 0;
2496 
2497 	kvlist = rte_kvargs_parse(devargs->args, NULL);
2498 	if (kvlist == NULL) {
2499 		PMD_INIT_LOG(ERR, "error when parsing param");
2500 		return 0;
2501 	}
2502 
2503 	if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2504 		ret = rte_kvargs_process(kvlist,
2505 					VIRTIO_ARG_SPEED,
2506 					link_speed_handler, speed);
2507 		if (ret < 0) {
2508 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2509 					VIRTIO_ARG_SPEED);
2510 			goto exit;
2511 		}
2512 	}
2513 
2514 	if (vectorized &&
2515 		rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2516 		ret = rte_kvargs_process(kvlist,
2517 				VIRTIO_ARG_VECTORIZED,
2518 				vectorized_check_handler, vectorized);
2519 		if (ret < 0) {
2520 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2521 					VIRTIO_ARG_VECTORIZED);
2522 			goto exit;
2523 		}
2524 	}
2525 
2526 exit:
2527 	rte_kvargs_free(kvlist);
2528 	return ret;
2529 }
2530 
2531 static uint8_t
2532 rx_offload_enabled(struct virtio_hw *hw)
2533 {
2534 	return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2535 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2536 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2537 }
2538 
2539 static uint8_t
2540 tx_offload_enabled(struct virtio_hw *hw)
2541 {
2542 	return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2543 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2544 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2545 }
2546 
2547 /*
2548  * Configure virtio device
2549  * It returns 0 on success.
2550  */
2551 static int
2552 virtio_dev_configure(struct rte_eth_dev *dev)
2553 {
2554 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2555 	const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2556 	struct virtio_hw *hw = dev->data->dev_private;
2557 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2558 		hw->vtnet_hdr_size;
2559 	uint64_t rx_offloads = rxmode->offloads;
2560 	uint64_t tx_offloads = txmode->offloads;
2561 	uint64_t req_features;
2562 	int ret;
2563 
2564 	PMD_INIT_LOG(DEBUG, "configure");
2565 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2566 
2567 	if (rxmode->mq_mode != RTE_ETH_MQ_RX_NONE && rxmode->mq_mode != RTE_ETH_MQ_RX_RSS) {
2568 		PMD_DRV_LOG(ERR,
2569 			"Unsupported Rx multi queue mode %d",
2570 			rxmode->mq_mode);
2571 		return -EINVAL;
2572 	}
2573 
2574 	if (txmode->mq_mode != RTE_ETH_MQ_TX_NONE) {
2575 		PMD_DRV_LOG(ERR,
2576 			"Unsupported Tx multi queue mode %d",
2577 			txmode->mq_mode);
2578 		return -EINVAL;
2579 	}
2580 
2581 	if (dev->data->dev_conf.intr_conf.rxq) {
2582 		ret = virtio_init_device(dev, hw->req_guest_features);
2583 		if (ret < 0)
2584 			return ret;
2585 	}
2586 
2587 	if (rxmode->mq_mode == RTE_ETH_MQ_RX_RSS)
2588 		req_features |= (1ULL << VIRTIO_NET_F_RSS);
2589 
2590 	if (rxmode->mtu > hw->max_mtu)
2591 		req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2592 
2593 	hw->max_rx_pkt_len = ether_hdr_len + rxmode->mtu;
2594 
2595 	if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2596 			   RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
2597 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2598 
2599 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
2600 		req_features |=
2601 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2602 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2603 
2604 	if (tx_offloads & (RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2605 			   RTE_ETH_TX_OFFLOAD_TCP_CKSUM))
2606 		req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2607 
2608 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)
2609 		req_features |=
2610 			(1ULL << VIRTIO_NET_F_HOST_TSO4) |
2611 			(1ULL << VIRTIO_NET_F_HOST_TSO6);
2612 
2613 	/* if request features changed, reinit the device */
2614 	if (req_features != hw->req_guest_features) {
2615 		ret = virtio_init_device(dev, req_features);
2616 		if (ret < 0)
2617 			return ret;
2618 	}
2619 
2620 	if ((rxmode->mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
2621 			!virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2622 		PMD_DRV_LOG(ERR, "RSS support requested but not supported by the device");
2623 		return -ENOTSUP;
2624 	}
2625 
2626 	if ((rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2627 			    RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) &&
2628 		!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2629 		PMD_DRV_LOG(ERR,
2630 			"rx checksum not available on this host");
2631 		return -ENOTSUP;
2632 	}
2633 
2634 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) &&
2635 		(!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2636 		 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2637 		PMD_DRV_LOG(ERR,
2638 			"Large Receive Offload not available on this host");
2639 		return -ENOTSUP;
2640 	}
2641 
2642 	/* start control queue */
2643 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2644 		virtio_dev_cq_start(dev);
2645 
2646 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2647 		hw->vlan_strip = 1;
2648 
2649 	hw->rx_ol_scatter = (rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
2650 
2651 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2652 			!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2653 		PMD_DRV_LOG(ERR,
2654 			    "vlan filtering not available on this host");
2655 		return -ENOTSUP;
2656 	}
2657 
2658 	hw->has_tx_offload = tx_offload_enabled(hw);
2659 	hw->has_rx_offload = rx_offload_enabled(hw);
2660 
2661 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2662 		/* Enable vector (0) for Link State Interrupt */
2663 		if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2664 				VIRTIO_MSI_NO_VECTOR) {
2665 			PMD_DRV_LOG(ERR, "failed to set config vector");
2666 			return -EBUSY;
2667 		}
2668 
2669 	if (virtio_with_packed_queue(hw)) {
2670 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2671 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2672 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2673 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2674 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2675 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2676 			PMD_DRV_LOG(INFO,
2677 				"disabled packed ring vectorized path for requirements not met");
2678 			hw->use_vec_rx = 0;
2679 			hw->use_vec_tx = 0;
2680 		}
2681 #elif defined(RTE_ARCH_ARM)
2682 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2683 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2684 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2685 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2686 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2687 			PMD_DRV_LOG(INFO,
2688 				"disabled packed ring vectorized path for requirements not met");
2689 			hw->use_vec_rx = 0;
2690 			hw->use_vec_tx = 0;
2691 		}
2692 #else
2693 		hw->use_vec_rx = 0;
2694 		hw->use_vec_tx = 0;
2695 #endif
2696 
2697 		if (hw->use_vec_rx) {
2698 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2699 				PMD_DRV_LOG(INFO,
2700 					"disabled packed ring vectorized rx for mrg_rxbuf enabled");
2701 				hw->use_vec_rx = 0;
2702 			}
2703 
2704 			if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
2705 				PMD_DRV_LOG(INFO,
2706 					"disabled packed ring vectorized rx for TCP_LRO enabled");
2707 				hw->use_vec_rx = 0;
2708 			}
2709 		}
2710 	} else {
2711 		if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2712 			hw->use_inorder_tx = 1;
2713 			hw->use_inorder_rx = 1;
2714 			hw->use_vec_rx = 0;
2715 		}
2716 
2717 		if (hw->use_vec_rx) {
2718 #if defined RTE_ARCH_ARM
2719 			if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2720 				PMD_DRV_LOG(INFO,
2721 					"disabled split ring vectorized path for requirement not met");
2722 				hw->use_vec_rx = 0;
2723 			}
2724 #endif
2725 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2726 				PMD_DRV_LOG(INFO,
2727 					"disabled split ring vectorized rx for mrg_rxbuf enabled");
2728 				hw->use_vec_rx = 0;
2729 			}
2730 
2731 			if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2732 					   RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
2733 					   RTE_ETH_RX_OFFLOAD_TCP_LRO |
2734 					   RTE_ETH_RX_OFFLOAD_VLAN_STRIP)) {
2735 				PMD_DRV_LOG(INFO,
2736 					"disabled split ring vectorized rx for offloading enabled");
2737 				hw->use_vec_rx = 0;
2738 			}
2739 
2740 			if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2741 				PMD_DRV_LOG(INFO,
2742 					"disabled split ring vectorized rx, max SIMD bitwidth too low");
2743 				hw->use_vec_rx = 0;
2744 			}
2745 		}
2746 	}
2747 
2748 	return 0;
2749 }
2750 
2751 
2752 static int
2753 virtio_dev_start(struct rte_eth_dev *dev)
2754 {
2755 	uint16_t nb_queues, i;
2756 	struct virtqueue *vq;
2757 	struct virtio_hw *hw = dev->data->dev_private;
2758 	int ret;
2759 
2760 	/* Finish the initialization of the queues */
2761 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2762 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
2763 		if (ret < 0)
2764 			return ret;
2765 	}
2766 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2767 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
2768 		if (ret < 0)
2769 			return ret;
2770 	}
2771 
2772 	/* check if lsc interrupt feature is enabled */
2773 	if (dev->data->dev_conf.intr_conf.lsc) {
2774 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2775 			PMD_DRV_LOG(ERR, "link status not supported by host");
2776 			return -ENOTSUP;
2777 		}
2778 	}
2779 
2780 	/* Enable uio/vfio intr/eventfd mapping: although we already did that
2781 	 * in device configure, but it could be unmapped  when device is
2782 	 * stopped.
2783 	 */
2784 	if (dev->data->dev_conf.intr_conf.lsc ||
2785 	    dev->data->dev_conf.intr_conf.rxq) {
2786 		virtio_intr_disable(dev);
2787 
2788 		/* Setup interrupt callback  */
2789 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2790 			rte_intr_callback_register(dev->intr_handle,
2791 						   virtio_interrupt_handler,
2792 						   dev);
2793 
2794 		if (virtio_intr_enable(dev) < 0) {
2795 			PMD_DRV_LOG(ERR, "interrupt enable failed");
2796 			return -EIO;
2797 		}
2798 	}
2799 
2800 	/*Notify the backend
2801 	 *Otherwise the tap backend might already stop its queue due to fullness.
2802 	 *vhost backend will have no chance to be waked up
2803 	 */
2804 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2805 	if (hw->max_queue_pairs > 1) {
2806 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2807 			return -EINVAL;
2808 	}
2809 
2810 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2811 
2812 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2813 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2814 		/* Flush the old packets */
2815 		virtqueue_rxvq_flush(vq);
2816 		virtqueue_notify(vq);
2817 	}
2818 
2819 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2820 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2821 		virtqueue_notify(vq);
2822 	}
2823 
2824 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2825 
2826 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2827 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2828 		VIRTQUEUE_DUMP(vq);
2829 	}
2830 
2831 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2832 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2833 		VIRTQUEUE_DUMP(vq);
2834 	}
2835 
2836 	set_rxtx_funcs(dev);
2837 	hw->started = 1;
2838 
2839 	/* Initialize Link state */
2840 	virtio_dev_link_update(dev, 0);
2841 
2842 	return 0;
2843 }
2844 
2845 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2846 {
2847 	struct virtio_hw *hw = dev->data->dev_private;
2848 	uint16_t nr_vq = virtio_get_nr_vq(hw);
2849 	const char *type __rte_unused;
2850 	unsigned int i, mbuf_num = 0;
2851 	struct virtqueue *vq;
2852 	struct rte_mbuf *buf;
2853 	int queue_type;
2854 
2855 	if (hw->vqs == NULL)
2856 		return;
2857 
2858 	for (i = 0; i < nr_vq; i++) {
2859 		vq = hw->vqs[i];
2860 		if (!vq)
2861 			continue;
2862 
2863 		queue_type = virtio_get_queue_type(hw, i);
2864 		if (queue_type == VTNET_RQ)
2865 			type = "rxq";
2866 		else if (queue_type == VTNET_TQ)
2867 			type = "txq";
2868 		else
2869 			continue;
2870 
2871 		PMD_INIT_LOG(DEBUG,
2872 			"Before freeing %s[%d] used and unused buf",
2873 			type, i);
2874 		VIRTQUEUE_DUMP(vq);
2875 
2876 		while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2877 			rte_pktmbuf_free(buf);
2878 			mbuf_num++;
2879 		}
2880 
2881 		PMD_INIT_LOG(DEBUG,
2882 			"After freeing %s[%d] used and unused buf",
2883 			type, i);
2884 		VIRTQUEUE_DUMP(vq);
2885 	}
2886 
2887 	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2888 }
2889 
2890 static void
2891 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2892 {
2893 	struct virtio_hw *hw = dev->data->dev_private;
2894 	struct virtqueue *vq;
2895 	int qidx;
2896 	void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2897 
2898 	if (virtio_with_packed_queue(hw)) {
2899 		if (hw->use_vec_tx)
2900 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2901 		else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2902 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2903 		else
2904 			xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2905 	} else {
2906 		if (hw->use_inorder_tx)
2907 			xmit_cleanup = &virtio_xmit_cleanup_inorder;
2908 		else
2909 			xmit_cleanup = &virtio_xmit_cleanup;
2910 	}
2911 
2912 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2913 		vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2914 		if (vq != NULL)
2915 			xmit_cleanup(vq, virtqueue_nused(vq));
2916 	}
2917 }
2918 
2919 /*
2920  * Stop device: disable interrupt and mark link down
2921  */
2922 int
2923 virtio_dev_stop(struct rte_eth_dev *dev)
2924 {
2925 	struct virtio_hw *hw = dev->data->dev_private;
2926 	struct rte_eth_link link;
2927 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2928 
2929 	PMD_INIT_LOG(DEBUG, "stop");
2930 	dev->data->dev_started = 0;
2931 
2932 	rte_spinlock_lock(&hw->state_lock);
2933 	if (!hw->started)
2934 		goto out_unlock;
2935 	hw->started = 0;
2936 
2937 	virtio_tx_completed_cleanup(dev);
2938 
2939 	if (intr_conf->lsc || intr_conf->rxq) {
2940 		virtio_intr_disable(dev);
2941 
2942 		/* Reset interrupt callback  */
2943 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2944 			rte_intr_callback_unregister(dev->intr_handle,
2945 						     virtio_interrupt_handler,
2946 						     dev);
2947 		}
2948 	}
2949 
2950 	memset(&link, 0, sizeof(link));
2951 	rte_eth_linkstatus_set(dev, &link);
2952 out_unlock:
2953 	rte_spinlock_unlock(&hw->state_lock);
2954 
2955 	return 0;
2956 }
2957 
2958 static int
2959 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2960 {
2961 	struct rte_eth_link link;
2962 	uint16_t status;
2963 	struct virtio_hw *hw = dev->data->dev_private;
2964 
2965 	memset(&link, 0, sizeof(link));
2966 	link.link_duplex = hw->duplex;
2967 	link.link_speed  = hw->speed;
2968 	link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2969 
2970 	if (!hw->started) {
2971 		link.link_status = RTE_ETH_LINK_DOWN;
2972 		link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2973 	} else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2974 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
2975 		virtio_read_dev_config(hw,
2976 				offsetof(struct virtio_net_config, status),
2977 				&status, sizeof(status));
2978 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2979 			link.link_status = RTE_ETH_LINK_DOWN;
2980 			link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2981 			PMD_INIT_LOG(DEBUG, "Port %d is down",
2982 				     dev->data->port_id);
2983 		} else {
2984 			link.link_status = RTE_ETH_LINK_UP;
2985 			if (hw->get_speed_via_feat)
2986 				virtio_get_speed_duplex(dev, &link);
2987 			PMD_INIT_LOG(DEBUG, "Port %d is up",
2988 				     dev->data->port_id);
2989 		}
2990 	} else {
2991 		link.link_status = RTE_ETH_LINK_UP;
2992 		if (hw->get_speed_via_feat)
2993 			virtio_get_speed_duplex(dev, &link);
2994 	}
2995 
2996 	return rte_eth_linkstatus_set(dev, &link);
2997 }
2998 
2999 static int
3000 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
3001 {
3002 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
3003 	struct virtio_hw *hw = dev->data->dev_private;
3004 	uint64_t offloads = rxmode->offloads;
3005 
3006 	if (mask & RTE_ETH_VLAN_FILTER_MASK) {
3007 		if ((offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
3008 				!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
3009 
3010 			PMD_DRV_LOG(NOTICE,
3011 				"vlan filtering not available on this host");
3012 
3013 			return -ENOTSUP;
3014 		}
3015 	}
3016 
3017 	if (mask & RTE_ETH_VLAN_STRIP_MASK)
3018 		hw->vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
3019 
3020 	return 0;
3021 }
3022 
3023 static int
3024 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
3025 {
3026 	uint64_t tso_mask, host_features;
3027 	uint32_t rss_hash_types = 0;
3028 	struct virtio_hw *hw = dev->data->dev_private;
3029 	dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
3030 
3031 	dev_info->max_rx_queues =
3032 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
3033 	dev_info->max_tx_queues =
3034 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
3035 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
3036 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
3037 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
3038 	dev_info->max_mtu = hw->max_mtu;
3039 
3040 	host_features = VIRTIO_OPS(hw)->get_features(hw);
3041 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3042 	if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
3043 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SCATTER;
3044 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
3045 		dev_info->rx_offload_capa |=
3046 			RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3047 			RTE_ETH_RX_OFFLOAD_UDP_CKSUM;
3048 	}
3049 	if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
3050 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3051 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
3052 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
3053 	if ((host_features & tso_mask) == tso_mask)
3054 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3055 
3056 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
3057 				    RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
3058 	if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
3059 		dev_info->tx_offload_capa |=
3060 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
3061 			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
3062 	}
3063 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
3064 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
3065 	if ((host_features & tso_mask) == tso_mask)
3066 		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
3067 
3068 	if (host_features & (1ULL << VIRTIO_NET_F_RSS)) {
3069 		virtio_dev_get_rss_config(hw, &rss_hash_types);
3070 		dev_info->hash_key_size = VIRTIO_NET_RSS_KEY_SIZE;
3071 		dev_info->reta_size = VIRTIO_NET_RSS_RETA_SIZE;
3072 		dev_info->flow_type_rss_offloads =
3073 			virtio_to_ethdev_rss_offloads(rss_hash_types);
3074 	} else {
3075 		dev_info->hash_key_size = 0;
3076 		dev_info->reta_size = 0;
3077 		dev_info->flow_type_rss_offloads = 0;
3078 	}
3079 
3080 	if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
3081 		/*
3082 		 * According to 2.7 Packed Virtqueues,
3083 		 * 2.7.10.1 Structure Size and Alignment:
3084 		 * The Queue Size value does not have to be a power of 2.
3085 		 */
3086 		dev_info->rx_desc_lim.nb_max = UINT16_MAX;
3087 		dev_info->tx_desc_lim.nb_max = UINT16_MAX;
3088 	} else {
3089 		/*
3090 		 * According to 2.6 Split Virtqueues:
3091 		 * Queue Size value is always a power of 2. The maximum Queue
3092 		 * Size value is 32768.
3093 		 */
3094 		dev_info->rx_desc_lim.nb_max = 32768;
3095 		dev_info->tx_desc_lim.nb_max = 32768;
3096 	}
3097 	/*
3098 	 * Actual minimum is not the same for virtqueues of different kinds,
3099 	 * but to avoid tangling the code with separate branches, rely on
3100 	 * default thresholds since desc number must be at least of their size.
3101 	 */
3102 	dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
3103 					       RTE_VIRTIO_VPMD_RX_REARM_THRESH);
3104 	dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
3105 	dev_info->rx_desc_lim.nb_align = 1;
3106 	dev_info->tx_desc_lim.nb_align = 1;
3107 
3108 	return 0;
3109 }
3110 
3111 /*
3112  * It enables testpmd to collect per queue stats.
3113  */
3114 static int
3115 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
3116 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
3117 __rte_unused uint8_t is_rx)
3118 {
3119 	return 0;
3120 }
3121 
3122 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
3123 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);
3124