xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision 30a1de105a5f40d77b344a891c4a68f79e815c43)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30 
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38 
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47 	uint32_t *speed,
48 	int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50 				struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52 	int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54 static int virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
55 		struct rte_eth_rss_conf *rss_conf);
56 static int virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
57 		struct rte_eth_rss_conf *rss_conf);
58 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
59 			 struct rte_eth_rss_reta_entry64 *reta_conf,
60 			 uint16_t reta_size);
61 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
62 			 struct rte_eth_rss_reta_entry64 *reta_conf,
63 			 uint16_t reta_size);
64 
65 static void virtio_set_hwaddr(struct virtio_hw *hw);
66 static void virtio_get_hwaddr(struct virtio_hw *hw);
67 
68 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
69 				 struct rte_eth_stats *stats);
70 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
71 				 struct rte_eth_xstat *xstats, unsigned n);
72 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
73 				       struct rte_eth_xstat_name *xstats_names,
74 				       unsigned limit);
75 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
76 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
77 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
78 				uint16_t vlan_id, int on);
79 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
80 				struct rte_ether_addr *mac_addr,
81 				uint32_t index, uint32_t vmdq);
82 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
83 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
84 				struct rte_ether_addr *mac_addr);
85 
86 static int virtio_intr_disable(struct rte_eth_dev *dev);
87 static int virtio_get_monitor_addr(void *rx_queue,
88 				struct rte_power_monitor_cond *pmc);
89 
90 static int virtio_dev_queue_stats_mapping_set(
91 	struct rte_eth_dev *eth_dev,
92 	uint16_t queue_id,
93 	uint8_t stat_idx,
94 	uint8_t is_rx);
95 
96 static void virtio_notify_peers(struct rte_eth_dev *dev);
97 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
98 
99 struct rte_virtio_xstats_name_off {
100 	char name[RTE_ETH_XSTATS_NAME_SIZE];
101 	unsigned offset;
102 };
103 
104 /* [rt]x_qX_ is prepended to the name string here */
105 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
106 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
107 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
108 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
109 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
110 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
111 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
112 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
113 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
114 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
115 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
116 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
117 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
118 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
119 };
120 
121 /* [rt]x_qX_ is prepended to the name string here */
122 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
123 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
124 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
125 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
126 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
127 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
128 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
129 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
130 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
131 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
132 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
133 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
134 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
135 };
136 
137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
138 			    sizeof(rte_virtio_rxq_stat_strings[0]))
139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
140 			    sizeof(rte_virtio_txq_stat_strings[0]))
141 
142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
143 
144 static struct virtio_pmd_ctrl *
145 virtio_send_command_packed(struct virtnet_ctl *cvq,
146 			   struct virtio_pmd_ctrl *ctrl,
147 			   int *dlen, int pkt_num)
148 {
149 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
150 	int head;
151 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
152 	struct virtio_pmd_ctrl *result;
153 	uint16_t flags;
154 	int sum = 0;
155 	int nb_descs = 0;
156 	int k;
157 
158 	/*
159 	 * Format is enforced in qemu code:
160 	 * One TX packet for header;
161 	 * At least one TX packet per argument;
162 	 * One RX packet for ACK.
163 	 */
164 	head = vq->vq_avail_idx;
165 	flags = vq->vq_packed.cached_flags;
166 	desc[head].addr = cvq->virtio_net_hdr_mem;
167 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
168 	vq->vq_free_cnt--;
169 	nb_descs++;
170 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
171 		vq->vq_avail_idx -= vq->vq_nentries;
172 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
173 	}
174 
175 	for (k = 0; k < pkt_num; k++) {
176 		desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
177 			+ sizeof(struct virtio_net_ctrl_hdr)
178 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
179 		desc[vq->vq_avail_idx].len = dlen[k];
180 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
181 			vq->vq_packed.cached_flags;
182 		sum += dlen[k];
183 		vq->vq_free_cnt--;
184 		nb_descs++;
185 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
186 			vq->vq_avail_idx -= vq->vq_nentries;
187 			vq->vq_packed.cached_flags ^=
188 				VRING_PACKED_DESC_F_AVAIL_USED;
189 		}
190 	}
191 
192 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
193 		+ sizeof(struct virtio_net_ctrl_hdr);
194 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
195 	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
196 		vq->vq_packed.cached_flags;
197 	vq->vq_free_cnt--;
198 	nb_descs++;
199 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
200 		vq->vq_avail_idx -= vq->vq_nentries;
201 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
202 	}
203 
204 	virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
205 			vq->hw->weak_barriers);
206 
207 	virtio_wmb(vq->hw->weak_barriers);
208 	virtqueue_notify(vq);
209 
210 	/* wait for used desc in virtqueue
211 	 * desc_is_used has a load-acquire or rte_io_rmb inside
212 	 */
213 	while (!desc_is_used(&desc[head], vq))
214 		usleep(100);
215 
216 	/* now get used descriptors */
217 	vq->vq_free_cnt += nb_descs;
218 	vq->vq_used_cons_idx += nb_descs;
219 	if (vq->vq_used_cons_idx >= vq->vq_nentries) {
220 		vq->vq_used_cons_idx -= vq->vq_nentries;
221 		vq->vq_packed.used_wrap_counter ^= 1;
222 	}
223 
224 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
225 			"vq->vq_avail_idx=%d\n"
226 			"vq->vq_used_cons_idx=%d\n"
227 			"vq->vq_packed.cached_flags=0x%x\n"
228 			"vq->vq_packed.used_wrap_counter=%d",
229 			vq->vq_free_cnt,
230 			vq->vq_avail_idx,
231 			vq->vq_used_cons_idx,
232 			vq->vq_packed.cached_flags,
233 			vq->vq_packed.used_wrap_counter);
234 
235 	result = cvq->virtio_net_hdr_mz->addr;
236 	return result;
237 }
238 
239 static struct virtio_pmd_ctrl *
240 virtio_send_command_split(struct virtnet_ctl *cvq,
241 			  struct virtio_pmd_ctrl *ctrl,
242 			  int *dlen, int pkt_num)
243 {
244 	struct virtio_pmd_ctrl *result;
245 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
246 	uint32_t head, i;
247 	int k, sum = 0;
248 
249 	head = vq->vq_desc_head_idx;
250 
251 	/*
252 	 * Format is enforced in qemu code:
253 	 * One TX packet for header;
254 	 * At least one TX packet per argument;
255 	 * One RX packet for ACK.
256 	 */
257 	vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
258 	vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
259 	vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
260 	vq->vq_free_cnt--;
261 	i = vq->vq_split.ring.desc[head].next;
262 
263 	for (k = 0; k < pkt_num; k++) {
264 		vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
265 		vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
266 			+ sizeof(struct virtio_net_ctrl_hdr)
267 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
268 		vq->vq_split.ring.desc[i].len = dlen[k];
269 		sum += dlen[k];
270 		vq->vq_free_cnt--;
271 		i = vq->vq_split.ring.desc[i].next;
272 	}
273 
274 	vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
275 	vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
276 			+ sizeof(struct virtio_net_ctrl_hdr);
277 	vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
278 	vq->vq_free_cnt--;
279 
280 	vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
281 
282 	vq_update_avail_ring(vq, head);
283 	vq_update_avail_idx(vq);
284 
285 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
286 
287 	virtqueue_notify(vq);
288 
289 	while (virtqueue_nused(vq) == 0)
290 		usleep(100);
291 
292 	while (virtqueue_nused(vq)) {
293 		uint32_t idx, desc_idx, used_idx;
294 		struct vring_used_elem *uep;
295 
296 		used_idx = (uint32_t)(vq->vq_used_cons_idx
297 				& (vq->vq_nentries - 1));
298 		uep = &vq->vq_split.ring.used->ring[used_idx];
299 		idx = (uint32_t) uep->id;
300 		desc_idx = idx;
301 
302 		while (vq->vq_split.ring.desc[desc_idx].flags &
303 				VRING_DESC_F_NEXT) {
304 			desc_idx = vq->vq_split.ring.desc[desc_idx].next;
305 			vq->vq_free_cnt++;
306 		}
307 
308 		vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
309 		vq->vq_desc_head_idx = idx;
310 
311 		vq->vq_used_cons_idx++;
312 		vq->vq_free_cnt++;
313 	}
314 
315 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
316 			vq->vq_free_cnt, vq->vq_desc_head_idx);
317 
318 	result = cvq->virtio_net_hdr_mz->addr;
319 	return result;
320 }
321 
322 static int
323 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
324 		    int *dlen, int pkt_num)
325 {
326 	virtio_net_ctrl_ack status = ~0;
327 	struct virtio_pmd_ctrl *result;
328 	struct virtqueue *vq;
329 
330 	ctrl->status = status;
331 
332 	if (!cvq) {
333 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
334 		return -1;
335 	}
336 
337 	rte_spinlock_lock(&cvq->lock);
338 	vq = virtnet_cq_to_vq(cvq);
339 
340 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
341 		"vq->hw->cvq = %p vq = %p",
342 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
343 
344 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
345 		rte_spinlock_unlock(&cvq->lock);
346 		return -1;
347 	}
348 
349 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
350 		sizeof(struct virtio_pmd_ctrl));
351 
352 	if (virtio_with_packed_queue(vq->hw))
353 		result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
354 	else
355 		result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
356 
357 	rte_spinlock_unlock(&cvq->lock);
358 	return result->status;
359 }
360 
361 static int
362 virtio_set_multiple_queues_rss(struct rte_eth_dev *dev, uint16_t nb_queues)
363 {
364 	struct virtio_hw *hw = dev->data->dev_private;
365 	struct virtio_pmd_ctrl ctrl;
366 	struct virtio_net_ctrl_rss rss;
367 	int dlen, ret;
368 
369 	rss.hash_types = hw->rss_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
370 	RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(VIRTIO_NET_RSS_RETA_SIZE));
371 	rss.indirection_table_mask = VIRTIO_NET_RSS_RETA_SIZE - 1;
372 	rss.unclassified_queue = 0;
373 	memcpy(rss.indirection_table, hw->rss_reta, VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t));
374 	rss.max_tx_vq = nb_queues;
375 	rss.hash_key_length = VIRTIO_NET_RSS_KEY_SIZE;
376 	memcpy(rss.hash_key_data, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
377 
378 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
379 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_RSS_CONFIG;
380 	memcpy(ctrl.data, &rss, sizeof(rss));
381 
382 	dlen = sizeof(rss);
383 
384 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
385 	if (ret) {
386 		PMD_INIT_LOG(ERR, "RSS multiqueue configured but send command failed");
387 		return -EINVAL;
388 	}
389 
390 	return 0;
391 }
392 
393 static int
394 virtio_set_multiple_queues_auto(struct rte_eth_dev *dev, uint16_t nb_queues)
395 {
396 	struct virtio_hw *hw = dev->data->dev_private;
397 	struct virtio_pmd_ctrl ctrl;
398 	int dlen;
399 	int ret;
400 
401 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
402 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
403 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
404 
405 	dlen = sizeof(uint16_t);
406 
407 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
408 	if (ret) {
409 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
410 			  "failed, this is too late now...");
411 		return -EINVAL;
412 	}
413 
414 	return 0;
415 }
416 
417 static int
418 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
419 {
420 	struct virtio_hw *hw = dev->data->dev_private;
421 
422 	if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
423 		return virtio_set_multiple_queues_rss(dev, nb_queues);
424 	else
425 		return virtio_set_multiple_queues_auto(dev, nb_queues);
426 }
427 
428 static uint16_t
429 virtio_get_nr_vq(struct virtio_hw *hw)
430 {
431 	uint16_t nr_vq = hw->max_queue_pairs * 2;
432 
433 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
434 		nr_vq += 1;
435 
436 	return nr_vq;
437 }
438 
439 static void
440 virtio_init_vring(struct virtqueue *vq)
441 {
442 	int size = vq->vq_nentries;
443 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
444 
445 	PMD_INIT_FUNC_TRACE();
446 
447 	memset(ring_mem, 0, vq->vq_ring_size);
448 
449 	vq->vq_used_cons_idx = 0;
450 	vq->vq_desc_head_idx = 0;
451 	vq->vq_avail_idx = 0;
452 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
453 	vq->vq_free_cnt = vq->vq_nentries;
454 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
455 	if (virtio_with_packed_queue(vq->hw)) {
456 		vring_init_packed(&vq->vq_packed.ring, ring_mem,
457 				  VIRTIO_VRING_ALIGN, size);
458 		vring_desc_init_packed(vq, size);
459 	} else {
460 		struct vring *vr = &vq->vq_split.ring;
461 
462 		vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
463 		vring_desc_init_split(vr->desc, size);
464 	}
465 	/*
466 	 * Disable device(host) interrupting guest
467 	 */
468 	virtqueue_disable_intr(vq);
469 }
470 
471 static int
472 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
473 {
474 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
475 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
476 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
477 	unsigned int vq_size, size;
478 	struct virtio_hw *hw = dev->data->dev_private;
479 	struct virtnet_rx *rxvq = NULL;
480 	struct virtnet_tx *txvq = NULL;
481 	struct virtnet_ctl *cvq = NULL;
482 	struct virtqueue *vq;
483 	size_t sz_hdr_mz = 0;
484 	void *sw_ring = NULL;
485 	int queue_type = virtio_get_queue_type(hw, queue_idx);
486 	int ret;
487 	int numa_node = dev->device->numa_node;
488 	struct rte_mbuf *fake_mbuf = NULL;
489 
490 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
491 			queue_idx, numa_node);
492 
493 	/*
494 	 * Read the virtqueue size from the Queue Size field
495 	 * Always power of 2 and if 0 virtqueue does not exist
496 	 */
497 	vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
498 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
499 	if (vq_size == 0) {
500 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
501 		return -EINVAL;
502 	}
503 
504 	if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
505 		PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
506 		return -EINVAL;
507 	}
508 
509 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
510 		 dev->data->port_id, queue_idx);
511 
512 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
513 				vq_size * sizeof(struct vq_desc_extra),
514 				RTE_CACHE_LINE_SIZE);
515 	if (queue_type == VTNET_TQ) {
516 		/*
517 		 * For each xmit packet, allocate a virtio_net_hdr
518 		 * and indirect ring elements
519 		 */
520 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
521 	} else if (queue_type == VTNET_CQ) {
522 		/* Allocate a page for control vq command, data and status */
523 		sz_hdr_mz = rte_mem_page_size();
524 	}
525 
526 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
527 				numa_node);
528 	if (vq == NULL) {
529 		PMD_INIT_LOG(ERR, "can not allocate vq");
530 		return -ENOMEM;
531 	}
532 	hw->vqs[queue_idx] = vq;
533 
534 	vq->hw = hw;
535 	vq->vq_queue_index = queue_idx;
536 	vq->vq_nentries = vq_size;
537 	if (virtio_with_packed_queue(hw)) {
538 		vq->vq_packed.used_wrap_counter = 1;
539 		vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
540 		vq->vq_packed.event_flags_shadow = 0;
541 		if (queue_type == VTNET_RQ)
542 			vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
543 	}
544 
545 	/*
546 	 * Reserve a memzone for vring elements
547 	 */
548 	size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
549 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
550 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
551 		     size, vq->vq_ring_size);
552 
553 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
554 			numa_node, RTE_MEMZONE_IOVA_CONTIG,
555 			VIRTIO_VRING_ALIGN);
556 	if (mz == NULL) {
557 		if (rte_errno == EEXIST)
558 			mz = rte_memzone_lookup(vq_name);
559 		if (mz == NULL) {
560 			ret = -ENOMEM;
561 			goto free_vq;
562 		}
563 	}
564 
565 	memset(mz->addr, 0, mz->len);
566 
567 	if (hw->use_va)
568 		vq->vq_ring_mem = (uintptr_t)mz->addr;
569 	else
570 		vq->vq_ring_mem = mz->iova;
571 
572 	vq->vq_ring_virt_mem = mz->addr;
573 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
574 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
575 
576 	virtio_init_vring(vq);
577 
578 	if (sz_hdr_mz) {
579 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
580 			 dev->data->port_id, queue_idx);
581 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
582 				numa_node, RTE_MEMZONE_IOVA_CONTIG,
583 				RTE_CACHE_LINE_SIZE);
584 		if (hdr_mz == NULL) {
585 			if (rte_errno == EEXIST)
586 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
587 			if (hdr_mz == NULL) {
588 				ret = -ENOMEM;
589 				goto free_mz;
590 			}
591 		}
592 	}
593 
594 	if (queue_type == VTNET_RQ) {
595 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
596 			       sizeof(vq->sw_ring[0]);
597 
598 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
599 				RTE_CACHE_LINE_SIZE, numa_node);
600 		if (!sw_ring) {
601 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
602 			ret = -ENOMEM;
603 			goto free_hdr_mz;
604 		}
605 
606 		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
607 				RTE_CACHE_LINE_SIZE, numa_node);
608 		if (!fake_mbuf) {
609 			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
610 			ret = -ENOMEM;
611 			goto free_sw_ring;
612 		}
613 
614 		vq->sw_ring = sw_ring;
615 		rxvq = &vq->rxq;
616 		rxvq->port_id = dev->data->port_id;
617 		rxvq->mz = mz;
618 		rxvq->fake_mbuf = fake_mbuf;
619 	} else if (queue_type == VTNET_TQ) {
620 		txvq = &vq->txq;
621 		txvq->port_id = dev->data->port_id;
622 		txvq->mz = mz;
623 		txvq->virtio_net_hdr_mz = hdr_mz;
624 		if (hw->use_va)
625 			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
626 		else
627 			txvq->virtio_net_hdr_mem = hdr_mz->iova;
628 	} else if (queue_type == VTNET_CQ) {
629 		cvq = &vq->cq;
630 		cvq->mz = mz;
631 		cvq->virtio_net_hdr_mz = hdr_mz;
632 		if (hw->use_va)
633 			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
634 		else
635 			cvq->virtio_net_hdr_mem = hdr_mz->iova;
636 		memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
637 
638 		hw->cvq = cvq;
639 	}
640 
641 	if (hw->use_va)
642 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
643 	else
644 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
645 
646 	if (queue_type == VTNET_TQ) {
647 		struct virtio_tx_region *txr;
648 		unsigned int i;
649 
650 		txr = hdr_mz->addr;
651 		memset(txr, 0, vq_size * sizeof(*txr));
652 		for (i = 0; i < vq_size; i++) {
653 			/* first indirect descriptor is always the tx header */
654 			if (!virtio_with_packed_queue(hw)) {
655 				struct vring_desc *start_dp = txr[i].tx_indir;
656 				vring_desc_init_split(start_dp,
657 						      RTE_DIM(txr[i].tx_indir));
658 				start_dp->addr = txvq->virtio_net_hdr_mem
659 					+ i * sizeof(*txr)
660 					+ offsetof(struct virtio_tx_region,
661 						   tx_hdr);
662 				start_dp->len = hw->vtnet_hdr_size;
663 				start_dp->flags = VRING_DESC_F_NEXT;
664 			} else {
665 				struct vring_packed_desc *start_dp =
666 					txr[i].tx_packed_indir;
667 				vring_desc_init_indirect_packed(start_dp,
668 				      RTE_DIM(txr[i].tx_packed_indir));
669 				start_dp->addr = txvq->virtio_net_hdr_mem
670 					+ i * sizeof(*txr)
671 					+ offsetof(struct virtio_tx_region,
672 						   tx_hdr);
673 				start_dp->len = hw->vtnet_hdr_size;
674 			}
675 		}
676 	}
677 
678 	if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
679 		PMD_INIT_LOG(ERR, "setup_queue failed");
680 		ret = -EINVAL;
681 		goto clean_vq;
682 	}
683 
684 	return 0;
685 
686 clean_vq:
687 	hw->cvq = NULL;
688 	rte_free(fake_mbuf);
689 free_sw_ring:
690 	rte_free(sw_ring);
691 free_hdr_mz:
692 	rte_memzone_free(hdr_mz);
693 free_mz:
694 	rte_memzone_free(mz);
695 free_vq:
696 	rte_free(vq);
697 	hw->vqs[queue_idx] = NULL;
698 
699 	return ret;
700 }
701 
702 static void
703 virtio_free_queues(struct virtio_hw *hw)
704 {
705 	uint16_t nr_vq = virtio_get_nr_vq(hw);
706 	struct virtqueue *vq;
707 	int queue_type;
708 	uint16_t i;
709 
710 	if (hw->vqs == NULL)
711 		return;
712 
713 	for (i = 0; i < nr_vq; i++) {
714 		vq = hw->vqs[i];
715 		if (!vq)
716 			continue;
717 
718 		queue_type = virtio_get_queue_type(hw, i);
719 		if (queue_type == VTNET_RQ) {
720 			rte_free(vq->rxq.fake_mbuf);
721 			rte_free(vq->sw_ring);
722 			rte_memzone_free(vq->rxq.mz);
723 		} else if (queue_type == VTNET_TQ) {
724 			rte_memzone_free(vq->txq.mz);
725 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
726 		} else {
727 			rte_memzone_free(vq->cq.mz);
728 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
729 		}
730 
731 		rte_free(vq);
732 		hw->vqs[i] = NULL;
733 	}
734 
735 	rte_free(hw->vqs);
736 	hw->vqs = NULL;
737 }
738 
739 static int
740 virtio_alloc_queues(struct rte_eth_dev *dev)
741 {
742 	struct virtio_hw *hw = dev->data->dev_private;
743 	uint16_t nr_vq = virtio_get_nr_vq(hw);
744 	uint16_t i;
745 	int ret;
746 
747 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
748 	if (!hw->vqs) {
749 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
750 		return -ENOMEM;
751 	}
752 
753 	for (i = 0; i < nr_vq; i++) {
754 		ret = virtio_init_queue(dev, i);
755 		if (ret < 0) {
756 			virtio_free_queues(hw);
757 			return ret;
758 		}
759 	}
760 
761 	return 0;
762 }
763 
764 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
765 
766 static void
767 virtio_free_rss(struct virtio_hw *hw)
768 {
769 	rte_free(hw->rss_key);
770 	hw->rss_key = NULL;
771 
772 	rte_free(hw->rss_reta);
773 	hw->rss_reta = NULL;
774 }
775 
776 int
777 virtio_dev_close(struct rte_eth_dev *dev)
778 {
779 	struct virtio_hw *hw = dev->data->dev_private;
780 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
781 
782 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
783 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
784 		return 0;
785 
786 	if (!hw->opened)
787 		return 0;
788 	hw->opened = 0;
789 
790 	/* reset the NIC */
791 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
792 		VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
793 	if (intr_conf->rxq)
794 		virtio_queues_unbind_intr(dev);
795 
796 	if (intr_conf->lsc || intr_conf->rxq) {
797 		virtio_intr_disable(dev);
798 		rte_intr_efd_disable(dev->intr_handle);
799 		rte_intr_vec_list_free(dev->intr_handle);
800 	}
801 
802 	virtio_reset(hw);
803 	virtio_dev_free_mbufs(dev);
804 	virtio_free_queues(hw);
805 	virtio_free_rss(hw);
806 
807 	return VIRTIO_OPS(hw)->dev_close(hw);
808 }
809 
810 static int
811 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
812 {
813 	struct virtio_hw *hw = dev->data->dev_private;
814 	struct virtio_pmd_ctrl ctrl;
815 	int dlen[1];
816 	int ret;
817 
818 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
819 		PMD_INIT_LOG(INFO, "host does not support rx control");
820 		return -ENOTSUP;
821 	}
822 
823 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
824 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
825 	ctrl.data[0] = 1;
826 	dlen[0] = 1;
827 
828 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
829 	if (ret) {
830 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
831 		return -EAGAIN;
832 	}
833 
834 	return 0;
835 }
836 
837 static int
838 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
839 {
840 	struct virtio_hw *hw = dev->data->dev_private;
841 	struct virtio_pmd_ctrl ctrl;
842 	int dlen[1];
843 	int ret;
844 
845 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
846 		PMD_INIT_LOG(INFO, "host does not support rx control");
847 		return -ENOTSUP;
848 	}
849 
850 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
851 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
852 	ctrl.data[0] = 0;
853 	dlen[0] = 1;
854 
855 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
856 	if (ret) {
857 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
858 		return -EAGAIN;
859 	}
860 
861 	return 0;
862 }
863 
864 static int
865 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
866 {
867 	struct virtio_hw *hw = dev->data->dev_private;
868 	struct virtio_pmd_ctrl ctrl;
869 	int dlen[1];
870 	int ret;
871 
872 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
873 		PMD_INIT_LOG(INFO, "host does not support rx control");
874 		return -ENOTSUP;
875 	}
876 
877 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
878 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
879 	ctrl.data[0] = 1;
880 	dlen[0] = 1;
881 
882 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
883 	if (ret) {
884 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
885 		return -EAGAIN;
886 	}
887 
888 	return 0;
889 }
890 
891 static int
892 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
893 {
894 	struct virtio_hw *hw = dev->data->dev_private;
895 	struct virtio_pmd_ctrl ctrl;
896 	int dlen[1];
897 	int ret;
898 
899 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
900 		PMD_INIT_LOG(INFO, "host does not support rx control");
901 		return -ENOTSUP;
902 	}
903 
904 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
905 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
906 	ctrl.data[0] = 0;
907 	dlen[0] = 1;
908 
909 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
910 	if (ret) {
911 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
912 		return -EAGAIN;
913 	}
914 
915 	return 0;
916 }
917 
918 uint16_t
919 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
920 {
921 	return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
922 }
923 
924 bool
925 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
926 			bool rx_scatter_enabled, const char **error)
927 {
928 	if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
929 		*error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
930 		return false;
931 	}
932 
933 	return true;
934 }
935 
936 static bool
937 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
938 				      uint16_t frame_size)
939 {
940 	struct virtio_hw *hw = dev->data->dev_private;
941 	struct virtnet_rx *rxvq;
942 	struct virtqueue *vq;
943 	unsigned int qidx;
944 	uint16_t buf_size;
945 	const char *error;
946 
947 	if (hw->vqs == NULL)
948 		return true;
949 
950 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
951 		vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX];
952 		if (vq == NULL)
953 			continue;
954 
955 		rxvq = &vq->rxq;
956 		if (rxvq->mpool == NULL)
957 			continue;
958 		buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
959 
960 		if (!virtio_rx_check_scatter(frame_size, buf_size,
961 					     hw->rx_ol_scatter, &error)) {
962 			PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
963 				     qidx, error);
964 			return false;
965 		}
966 	}
967 
968 	return true;
969 }
970 
971 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
972 static int
973 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
974 {
975 	struct virtio_hw *hw = dev->data->dev_private;
976 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
977 				 hw->vtnet_hdr_size;
978 	uint32_t frame_size = mtu + ether_hdr_len;
979 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
980 
981 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
982 
983 	if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
984 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
985 			RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
986 		return -EINVAL;
987 	}
988 
989 	if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
990 		PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
991 		return -EINVAL;
992 	}
993 
994 	hw->max_rx_pkt_len = frame_size;
995 
996 	return 0;
997 }
998 
999 static int
1000 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1001 {
1002 	struct virtio_hw *hw = dev->data->dev_private;
1003 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1004 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1005 
1006 	virtqueue_enable_intr(vq);
1007 	virtio_mb(hw->weak_barriers);
1008 	return 0;
1009 }
1010 
1011 static int
1012 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1013 {
1014 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1015 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1016 
1017 	virtqueue_disable_intr(vq);
1018 	return 0;
1019 }
1020 
1021 /*
1022  * dev_ops for virtio, bare necessities for basic operation
1023  */
1024 static const struct eth_dev_ops virtio_eth_dev_ops = {
1025 	.dev_configure           = virtio_dev_configure,
1026 	.dev_start               = virtio_dev_start,
1027 	.dev_stop                = virtio_dev_stop,
1028 	.dev_close               = virtio_dev_close,
1029 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
1030 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
1031 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
1032 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
1033 	.mtu_set                 = virtio_mtu_set,
1034 	.dev_infos_get           = virtio_dev_info_get,
1035 	.stats_get               = virtio_dev_stats_get,
1036 	.xstats_get              = virtio_dev_xstats_get,
1037 	.xstats_get_names        = virtio_dev_xstats_get_names,
1038 	.stats_reset             = virtio_dev_stats_reset,
1039 	.xstats_reset            = virtio_dev_stats_reset,
1040 	.link_update             = virtio_dev_link_update,
1041 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
1042 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
1043 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
1044 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
1045 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
1046 	.rss_hash_update         = virtio_dev_rss_hash_update,
1047 	.rss_hash_conf_get       = virtio_dev_rss_hash_conf_get,
1048 	.reta_update             = virtio_dev_rss_reta_update,
1049 	.reta_query              = virtio_dev_rss_reta_query,
1050 	/* collect stats per queue */
1051 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1052 	.vlan_filter_set         = virtio_vlan_filter_set,
1053 	.mac_addr_add            = virtio_mac_addr_add,
1054 	.mac_addr_remove         = virtio_mac_addr_remove,
1055 	.mac_addr_set            = virtio_mac_addr_set,
1056 	.get_monitor_addr        = virtio_get_monitor_addr,
1057 };
1058 
1059 /*
1060  * dev_ops for virtio-user in secondary processes, as we just have
1061  * some limited supports currently.
1062  */
1063 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1064 	.dev_infos_get           = virtio_dev_info_get,
1065 	.stats_get               = virtio_dev_stats_get,
1066 	.xstats_get              = virtio_dev_xstats_get,
1067 	.xstats_get_names        = virtio_dev_xstats_get_names,
1068 	.stats_reset             = virtio_dev_stats_reset,
1069 	.xstats_reset            = virtio_dev_stats_reset,
1070 	/* collect stats per queue */
1071 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1072 };
1073 
1074 static void
1075 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1076 {
1077 	unsigned i;
1078 
1079 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1080 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1081 		if (txvq == NULL)
1082 			continue;
1083 
1084 		stats->opackets += txvq->stats.packets;
1085 		stats->obytes += txvq->stats.bytes;
1086 
1087 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1088 			stats->q_opackets[i] = txvq->stats.packets;
1089 			stats->q_obytes[i] = txvq->stats.bytes;
1090 		}
1091 	}
1092 
1093 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1094 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1095 		if (rxvq == NULL)
1096 			continue;
1097 
1098 		stats->ipackets += rxvq->stats.packets;
1099 		stats->ibytes += rxvq->stats.bytes;
1100 		stats->ierrors += rxvq->stats.errors;
1101 
1102 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1103 			stats->q_ipackets[i] = rxvq->stats.packets;
1104 			stats->q_ibytes[i] = rxvq->stats.bytes;
1105 		}
1106 	}
1107 
1108 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1109 }
1110 
1111 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1112 				       struct rte_eth_xstat_name *xstats_names,
1113 				       __rte_unused unsigned limit)
1114 {
1115 	unsigned i;
1116 	unsigned count = 0;
1117 	unsigned t;
1118 
1119 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1120 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1121 
1122 	if (xstats_names != NULL) {
1123 		/* Note: limit checked in rte_eth_xstats_names() */
1124 
1125 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1126 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1127 			if (rxvq == NULL)
1128 				continue;
1129 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1130 				snprintf(xstats_names[count].name,
1131 					sizeof(xstats_names[count].name),
1132 					"rx_q%u_%s", i,
1133 					rte_virtio_rxq_stat_strings[t].name);
1134 				count++;
1135 			}
1136 		}
1137 
1138 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1139 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
1140 			if (txvq == NULL)
1141 				continue;
1142 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1143 				snprintf(xstats_names[count].name,
1144 					sizeof(xstats_names[count].name),
1145 					"tx_q%u_%s", i,
1146 					rte_virtio_txq_stat_strings[t].name);
1147 				count++;
1148 			}
1149 		}
1150 		return count;
1151 	}
1152 	return nstats;
1153 }
1154 
1155 static int
1156 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1157 		      unsigned n)
1158 {
1159 	unsigned i;
1160 	unsigned count = 0;
1161 
1162 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1163 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1164 
1165 	if (n < nstats)
1166 		return nstats;
1167 
1168 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1169 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1170 
1171 		if (rxvq == NULL)
1172 			continue;
1173 
1174 		unsigned t;
1175 
1176 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1177 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1178 				rte_virtio_rxq_stat_strings[t].offset);
1179 			xstats[count].id = count;
1180 			count++;
1181 		}
1182 	}
1183 
1184 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1185 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1186 
1187 		if (txvq == NULL)
1188 			continue;
1189 
1190 		unsigned t;
1191 
1192 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1193 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
1194 				rte_virtio_txq_stat_strings[t].offset);
1195 			xstats[count].id = count;
1196 			count++;
1197 		}
1198 	}
1199 
1200 	return count;
1201 }
1202 
1203 static int
1204 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1205 {
1206 	virtio_update_stats(dev, stats);
1207 
1208 	return 0;
1209 }
1210 
1211 static int
1212 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1213 {
1214 	unsigned int i;
1215 
1216 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1217 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1218 		if (txvq == NULL)
1219 			continue;
1220 
1221 		txvq->stats.packets = 0;
1222 		txvq->stats.bytes = 0;
1223 		txvq->stats.multicast = 0;
1224 		txvq->stats.broadcast = 0;
1225 		memset(txvq->stats.size_bins, 0,
1226 		       sizeof(txvq->stats.size_bins[0]) * 8);
1227 	}
1228 
1229 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1230 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1231 		if (rxvq == NULL)
1232 			continue;
1233 
1234 		rxvq->stats.packets = 0;
1235 		rxvq->stats.bytes = 0;
1236 		rxvq->stats.errors = 0;
1237 		rxvq->stats.multicast = 0;
1238 		rxvq->stats.broadcast = 0;
1239 		memset(rxvq->stats.size_bins, 0,
1240 		       sizeof(rxvq->stats.size_bins[0]) * 8);
1241 	}
1242 
1243 	return 0;
1244 }
1245 
1246 static void
1247 virtio_set_hwaddr(struct virtio_hw *hw)
1248 {
1249 	virtio_write_dev_config(hw,
1250 			offsetof(struct virtio_net_config, mac),
1251 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1252 }
1253 
1254 static void
1255 virtio_get_hwaddr(struct virtio_hw *hw)
1256 {
1257 	if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1258 		virtio_read_dev_config(hw,
1259 			offsetof(struct virtio_net_config, mac),
1260 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1261 	} else {
1262 		rte_eth_random_addr(&hw->mac_addr[0]);
1263 		virtio_set_hwaddr(hw);
1264 	}
1265 }
1266 
1267 static int
1268 virtio_mac_table_set(struct virtio_hw *hw,
1269 		     const struct virtio_net_ctrl_mac *uc,
1270 		     const struct virtio_net_ctrl_mac *mc)
1271 {
1272 	struct virtio_pmd_ctrl ctrl;
1273 	int err, len[2];
1274 
1275 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1276 		PMD_DRV_LOG(INFO, "host does not support mac table");
1277 		return -1;
1278 	}
1279 
1280 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1281 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1282 
1283 	len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1284 	memcpy(ctrl.data, uc, len[0]);
1285 
1286 	len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1287 	memcpy(ctrl.data + len[0], mc, len[1]);
1288 
1289 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1290 	if (err != 0)
1291 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1292 	return err;
1293 }
1294 
1295 static int
1296 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1297 		    uint32_t index, uint32_t vmdq __rte_unused)
1298 {
1299 	struct virtio_hw *hw = dev->data->dev_private;
1300 	const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1301 	unsigned int i;
1302 	struct virtio_net_ctrl_mac *uc, *mc;
1303 
1304 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1305 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1306 		return -EINVAL;
1307 	}
1308 
1309 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1310 		sizeof(uc->entries));
1311 	uc->entries = 0;
1312 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1313 		sizeof(mc->entries));
1314 	mc->entries = 0;
1315 
1316 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1317 		const struct rte_ether_addr *addr
1318 			= (i == index) ? mac_addr : addrs + i;
1319 		struct virtio_net_ctrl_mac *tbl
1320 			= rte_is_multicast_ether_addr(addr) ? mc : uc;
1321 
1322 		memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1323 	}
1324 
1325 	return virtio_mac_table_set(hw, uc, mc);
1326 }
1327 
1328 static void
1329 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1330 {
1331 	struct virtio_hw *hw = dev->data->dev_private;
1332 	struct rte_ether_addr *addrs = dev->data->mac_addrs;
1333 	struct virtio_net_ctrl_mac *uc, *mc;
1334 	unsigned int i;
1335 
1336 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1337 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1338 		return;
1339 	}
1340 
1341 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1342 		sizeof(uc->entries));
1343 	uc->entries = 0;
1344 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1345 		sizeof(mc->entries));
1346 	mc->entries = 0;
1347 
1348 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1349 		struct virtio_net_ctrl_mac *tbl;
1350 
1351 		if (i == index || rte_is_zero_ether_addr(addrs + i))
1352 			continue;
1353 
1354 		tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1355 		memcpy(&tbl->macs[tbl->entries++], addrs + i,
1356 			RTE_ETHER_ADDR_LEN);
1357 	}
1358 
1359 	virtio_mac_table_set(hw, uc, mc);
1360 }
1361 
1362 static int
1363 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1364 {
1365 	struct virtio_hw *hw = dev->data->dev_private;
1366 
1367 	memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1368 
1369 	/* Use atomic update if available */
1370 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1371 		struct virtio_pmd_ctrl ctrl;
1372 		int len = RTE_ETHER_ADDR_LEN;
1373 
1374 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1375 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1376 
1377 		memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1378 		return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1379 	}
1380 
1381 	if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1382 		return -ENOTSUP;
1383 
1384 	virtio_set_hwaddr(hw);
1385 	return 0;
1386 }
1387 
1388 #define CLB_VAL_IDX 0
1389 #define CLB_MSK_IDX 1
1390 #define CLB_MATCH_IDX 2
1391 static int
1392 virtio_monitor_callback(const uint64_t value,
1393 		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1394 {
1395 	const uint64_t m = opaque[CLB_MSK_IDX];
1396 	const uint64_t v = opaque[CLB_VAL_IDX];
1397 	const uint64_t c = opaque[CLB_MATCH_IDX];
1398 
1399 	if (c)
1400 		return (value & m) == v ? -1 : 0;
1401 	else
1402 		return (value & m) == v ? 0 : -1;
1403 }
1404 
1405 static int
1406 virtio_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1407 {
1408 	struct virtnet_rx *rxvq = rx_queue;
1409 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1410 	struct virtio_hw *hw;
1411 
1412 	if (vq == NULL)
1413 		return -EINVAL;
1414 
1415 	hw = vq->hw;
1416 	if (virtio_with_packed_queue(hw)) {
1417 		struct vring_packed_desc *desc;
1418 		desc = vq->vq_packed.ring.desc;
1419 		pmc->addr = &desc[vq->vq_used_cons_idx].flags;
1420 		if (vq->vq_packed.used_wrap_counter)
1421 			pmc->opaque[CLB_VAL_IDX] =
1422 						VRING_PACKED_DESC_F_AVAIL_USED;
1423 		else
1424 			pmc->opaque[CLB_VAL_IDX] = 0;
1425 		pmc->opaque[CLB_MSK_IDX] = VRING_PACKED_DESC_F_AVAIL_USED;
1426 		pmc->opaque[CLB_MATCH_IDX] = 1;
1427 		pmc->size = sizeof(desc[vq->vq_used_cons_idx].flags);
1428 	} else {
1429 		pmc->addr = &vq->vq_split.ring.used->idx;
1430 		pmc->opaque[CLB_VAL_IDX] = vq->vq_used_cons_idx
1431 					& (vq->vq_nentries - 1);
1432 		pmc->opaque[CLB_MSK_IDX] = vq->vq_nentries - 1;
1433 		pmc->opaque[CLB_MATCH_IDX] = 0;
1434 		pmc->size = sizeof(vq->vq_split.ring.used->idx);
1435 	}
1436 	pmc->fn = virtio_monitor_callback;
1437 
1438 	return 0;
1439 }
1440 
1441 static int
1442 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1443 {
1444 	struct virtio_hw *hw = dev->data->dev_private;
1445 	struct virtio_pmd_ctrl ctrl;
1446 	int len;
1447 
1448 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1449 		return -ENOTSUP;
1450 
1451 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1452 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1453 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1454 	len = sizeof(vlan_id);
1455 
1456 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1457 }
1458 
1459 static int
1460 virtio_intr_unmask(struct rte_eth_dev *dev)
1461 {
1462 	struct virtio_hw *hw = dev->data->dev_private;
1463 
1464 	if (rte_intr_ack(dev->intr_handle) < 0)
1465 		return -1;
1466 
1467 	if (VIRTIO_OPS(hw)->intr_detect)
1468 		VIRTIO_OPS(hw)->intr_detect(hw);
1469 
1470 	return 0;
1471 }
1472 
1473 static int
1474 virtio_intr_enable(struct rte_eth_dev *dev)
1475 {
1476 	struct virtio_hw *hw = dev->data->dev_private;
1477 
1478 	if (rte_intr_enable(dev->intr_handle) < 0)
1479 		return -1;
1480 
1481 	if (VIRTIO_OPS(hw)->intr_detect)
1482 		VIRTIO_OPS(hw)->intr_detect(hw);
1483 
1484 	return 0;
1485 }
1486 
1487 static int
1488 virtio_intr_disable(struct rte_eth_dev *dev)
1489 {
1490 	struct virtio_hw *hw = dev->data->dev_private;
1491 
1492 	if (rte_intr_disable(dev->intr_handle) < 0)
1493 		return -1;
1494 
1495 	if (VIRTIO_OPS(hw)->intr_detect)
1496 		VIRTIO_OPS(hw)->intr_detect(hw);
1497 
1498 	return 0;
1499 }
1500 
1501 static int
1502 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1503 {
1504 	uint64_t host_features;
1505 
1506 	/* Prepare guest_features: feature that driver wants to support */
1507 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1508 		req_features);
1509 
1510 	/* Read device(host) feature bits */
1511 	host_features = VIRTIO_OPS(hw)->get_features(hw);
1512 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1513 		host_features);
1514 
1515 	/* If supported, ensure MTU value is valid before acknowledging it. */
1516 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1517 		struct virtio_net_config config;
1518 
1519 		virtio_read_dev_config(hw,
1520 			offsetof(struct virtio_net_config, mtu),
1521 			&config.mtu, sizeof(config.mtu));
1522 
1523 		if (config.mtu < RTE_ETHER_MIN_MTU)
1524 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1525 	}
1526 
1527 	/*
1528 	 * Negotiate features: Subset of device feature bits are written back
1529 	 * guest feature bits.
1530 	 */
1531 	hw->guest_features = req_features;
1532 	hw->guest_features = virtio_negotiate_features(hw, host_features);
1533 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1534 		hw->guest_features);
1535 
1536 	if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1537 		return -1;
1538 
1539 	if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1540 		virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1541 
1542 		if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1543 			PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1544 			return -1;
1545 		}
1546 	}
1547 
1548 	hw->req_guest_features = req_features;
1549 
1550 	return 0;
1551 }
1552 
1553 int
1554 virtio_dev_pause(struct rte_eth_dev *dev)
1555 {
1556 	struct virtio_hw *hw = dev->data->dev_private;
1557 
1558 	rte_spinlock_lock(&hw->state_lock);
1559 
1560 	if (hw->started == 0) {
1561 		/* Device is just stopped. */
1562 		rte_spinlock_unlock(&hw->state_lock);
1563 		return -1;
1564 	}
1565 	hw->started = 0;
1566 	/*
1567 	 * Prevent the worker threads from touching queues to avoid contention,
1568 	 * 1 ms should be enough for the ongoing Tx function to finish.
1569 	 */
1570 	rte_delay_ms(1);
1571 	return 0;
1572 }
1573 
1574 /*
1575  * Recover hw state to let the worker threads continue.
1576  */
1577 void
1578 virtio_dev_resume(struct rte_eth_dev *dev)
1579 {
1580 	struct virtio_hw *hw = dev->data->dev_private;
1581 
1582 	hw->started = 1;
1583 	rte_spinlock_unlock(&hw->state_lock);
1584 }
1585 
1586 /*
1587  * Should be called only after device is paused.
1588  */
1589 int
1590 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1591 		int nb_pkts)
1592 {
1593 	struct virtio_hw *hw = dev->data->dev_private;
1594 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1595 	int ret;
1596 
1597 	hw->inject_pkts = tx_pkts;
1598 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1599 	hw->inject_pkts = NULL;
1600 
1601 	return ret;
1602 }
1603 
1604 static void
1605 virtio_notify_peers(struct rte_eth_dev *dev)
1606 {
1607 	struct virtio_hw *hw = dev->data->dev_private;
1608 	struct virtnet_rx *rxvq;
1609 	struct rte_mbuf *rarp_mbuf;
1610 
1611 	if (!dev->data->rx_queues)
1612 		return;
1613 
1614 	rxvq = dev->data->rx_queues[0];
1615 	if (!rxvq)
1616 		return;
1617 
1618 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1619 			(struct rte_ether_addr *)hw->mac_addr);
1620 	if (rarp_mbuf == NULL) {
1621 		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1622 		return;
1623 	}
1624 
1625 	/* If virtio port just stopped, no need to send RARP */
1626 	if (virtio_dev_pause(dev) < 0) {
1627 		rte_pktmbuf_free(rarp_mbuf);
1628 		return;
1629 	}
1630 
1631 	virtio_inject_pkts(dev, &rarp_mbuf, 1);
1632 	virtio_dev_resume(dev);
1633 }
1634 
1635 static void
1636 virtio_ack_link_announce(struct rte_eth_dev *dev)
1637 {
1638 	struct virtio_hw *hw = dev->data->dev_private;
1639 	struct virtio_pmd_ctrl ctrl;
1640 
1641 	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1642 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1643 
1644 	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1645 }
1646 
1647 /*
1648  * Process virtio config changed interrupt. Call the callback
1649  * if link state changed, generate gratuitous RARP packet if
1650  * the status indicates an ANNOUNCE.
1651  */
1652 void
1653 virtio_interrupt_handler(void *param)
1654 {
1655 	struct rte_eth_dev *dev = param;
1656 	struct virtio_hw *hw = dev->data->dev_private;
1657 	uint8_t isr;
1658 	uint16_t status;
1659 
1660 	/* Read interrupt status which clears interrupt */
1661 	isr = virtio_get_isr(hw);
1662 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1663 
1664 	if (virtio_intr_unmask(dev) < 0)
1665 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1666 
1667 	if (isr & VIRTIO_ISR_CONFIG) {
1668 		if (virtio_dev_link_update(dev, 0) == 0)
1669 			rte_eth_dev_callback_process(dev,
1670 						     RTE_ETH_EVENT_INTR_LSC,
1671 						     NULL);
1672 
1673 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1674 			virtio_read_dev_config(hw,
1675 				offsetof(struct virtio_net_config, status),
1676 				&status, sizeof(status));
1677 			if (status & VIRTIO_NET_S_ANNOUNCE) {
1678 				virtio_notify_peers(dev);
1679 				if (hw->cvq)
1680 					virtio_ack_link_announce(dev);
1681 			}
1682 		}
1683 	}
1684 }
1685 
1686 /* set rx and tx handlers according to what is supported */
1687 static void
1688 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1689 {
1690 	struct virtio_hw *hw = eth_dev->data->dev_private;
1691 
1692 	eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1693 	if (virtio_with_packed_queue(hw)) {
1694 		PMD_INIT_LOG(INFO,
1695 			"virtio: using packed ring %s Tx path on port %u",
1696 			hw->use_vec_tx ? "vectorized" : "standard",
1697 			eth_dev->data->port_id);
1698 		if (hw->use_vec_tx)
1699 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1700 		else
1701 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1702 	} else {
1703 		if (hw->use_inorder_tx) {
1704 			PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1705 				eth_dev->data->port_id);
1706 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1707 		} else {
1708 			PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1709 				eth_dev->data->port_id);
1710 			eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1711 		}
1712 	}
1713 
1714 	if (virtio_with_packed_queue(hw)) {
1715 		if (hw->use_vec_rx) {
1716 			PMD_INIT_LOG(INFO,
1717 				"virtio: using packed ring vectorized Rx path on port %u",
1718 				eth_dev->data->port_id);
1719 			eth_dev->rx_pkt_burst =
1720 				&virtio_recv_pkts_packed_vec;
1721 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1722 			PMD_INIT_LOG(INFO,
1723 				"virtio: using packed ring mergeable buffer Rx path on port %u",
1724 				eth_dev->data->port_id);
1725 			eth_dev->rx_pkt_burst =
1726 				&virtio_recv_mergeable_pkts_packed;
1727 		} else {
1728 			PMD_INIT_LOG(INFO,
1729 				"virtio: using packed ring standard Rx path on port %u",
1730 				eth_dev->data->port_id);
1731 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1732 		}
1733 	} else {
1734 		if (hw->use_vec_rx) {
1735 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1736 				eth_dev->data->port_id);
1737 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1738 		} else if (hw->use_inorder_rx) {
1739 			PMD_INIT_LOG(INFO,
1740 				"virtio: using inorder Rx path on port %u",
1741 				eth_dev->data->port_id);
1742 			eth_dev->rx_pkt_burst =	&virtio_recv_pkts_inorder;
1743 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1744 			PMD_INIT_LOG(INFO,
1745 				"virtio: using mergeable buffer Rx path on port %u",
1746 				eth_dev->data->port_id);
1747 			eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1748 		} else {
1749 			PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1750 				eth_dev->data->port_id);
1751 			eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1752 		}
1753 	}
1754 
1755 }
1756 
1757 /* Only support 1:1 queue/interrupt mapping so far.
1758  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1759  * interrupt vectors (<N+1).
1760  */
1761 static int
1762 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1763 {
1764 	uint32_t i;
1765 	struct virtio_hw *hw = dev->data->dev_private;
1766 
1767 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1768 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1769 		if (rte_intr_vec_list_index_set(dev->intr_handle, i,
1770 						       i + 1))
1771 			return -rte_errno;
1772 		if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1773 						 VIRTIO_MSI_NO_VECTOR) {
1774 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1775 			return -EBUSY;
1776 		}
1777 	}
1778 
1779 	return 0;
1780 }
1781 
1782 static void
1783 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1784 {
1785 	uint32_t i;
1786 	struct virtio_hw *hw = dev->data->dev_private;
1787 
1788 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1789 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1790 		VIRTIO_OPS(hw)->set_queue_irq(hw,
1791 					     hw->vqs[i * VTNET_CQ],
1792 					     VIRTIO_MSI_NO_VECTOR);
1793 }
1794 
1795 static int
1796 virtio_configure_intr(struct rte_eth_dev *dev)
1797 {
1798 	struct virtio_hw *hw = dev->data->dev_private;
1799 
1800 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1801 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1802 		return -ENOTSUP;
1803 	}
1804 
1805 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1806 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1807 		return -1;
1808 	}
1809 
1810 	if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
1811 				    hw->max_queue_pairs)) {
1812 		PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1813 			     hw->max_queue_pairs);
1814 		return -ENOMEM;
1815 	}
1816 
1817 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1818 		/* Re-register callback to update max_intr */
1819 		rte_intr_callback_unregister(dev->intr_handle,
1820 					     virtio_interrupt_handler,
1821 					     dev);
1822 		rte_intr_callback_register(dev->intr_handle,
1823 					   virtio_interrupt_handler,
1824 					   dev);
1825 	}
1826 
1827 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1828 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1829 	 * And for legacy devices, this should be done before queue/vec binding
1830 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1831 	 * (22) will be ignored.
1832 	 */
1833 	if (virtio_intr_enable(dev) < 0) {
1834 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1835 		return -1;
1836 	}
1837 
1838 	if (virtio_queues_bind_intr(dev) < 0) {
1839 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1840 		return -1;
1841 	}
1842 
1843 	return 0;
1844 }
1845 
1846 static void
1847 virtio_get_speed_duplex(struct rte_eth_dev *eth_dev,
1848 			struct rte_eth_link *link)
1849 {
1850 	struct virtio_hw *hw = eth_dev->data->dev_private;
1851 	struct virtio_net_config *config;
1852 	struct virtio_net_config local_config;
1853 
1854 	config = &local_config;
1855 	virtio_read_dev_config(hw,
1856 		offsetof(struct virtio_net_config, speed),
1857 		&config->speed, sizeof(config->speed));
1858 	virtio_read_dev_config(hw,
1859 		offsetof(struct virtio_net_config, duplex),
1860 		&config->duplex, sizeof(config->duplex));
1861 	hw->speed = config->speed;
1862 	hw->duplex = config->duplex;
1863 	if (link != NULL) {
1864 		link->link_duplex = hw->duplex;
1865 		link->link_speed  = hw->speed;
1866 	}
1867 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1868 		     hw->speed, hw->duplex);
1869 }
1870 
1871 static uint64_t
1872 ethdev_to_virtio_rss_offloads(uint64_t ethdev_hash_types)
1873 {
1874 	uint64_t virtio_hash_types = 0;
1875 
1876 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1877 				RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1878 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV4;
1879 
1880 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1881 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV4;
1882 
1883 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1884 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV4;
1885 
1886 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1887 				RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1888 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV6;
1889 
1890 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1891 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV6;
1892 
1893 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1894 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV6;
1895 
1896 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_EX)
1897 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IP_EX;
1898 
1899 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_TCP_EX)
1900 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCP_EX;
1901 
1902 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_UDP_EX)
1903 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDP_EX;
1904 
1905 	return virtio_hash_types;
1906 }
1907 
1908 static uint64_t
1909 virtio_to_ethdev_rss_offloads(uint64_t virtio_hash_types)
1910 {
1911 	uint64_t rss_offloads = 0;
1912 
1913 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV4)
1914 		rss_offloads |= RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1915 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER;
1916 
1917 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV4)
1918 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
1919 
1920 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV4)
1921 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
1922 
1923 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV6)
1924 		rss_offloads |= RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1925 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
1926 
1927 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV6)
1928 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
1929 
1930 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV6)
1931 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
1932 
1933 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IP_EX)
1934 		rss_offloads |= RTE_ETH_RSS_IPV6_EX;
1935 
1936 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCP_EX)
1937 		rss_offloads |= RTE_ETH_RSS_IPV6_TCP_EX;
1938 
1939 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDP_EX)
1940 		rss_offloads |= RTE_ETH_RSS_IPV6_UDP_EX;
1941 
1942 	return rss_offloads;
1943 }
1944 
1945 static int
1946 virtio_dev_get_rss_config(struct virtio_hw *hw, uint32_t *rss_hash_types)
1947 {
1948 	struct virtio_net_config local_config;
1949 	struct virtio_net_config *config = &local_config;
1950 
1951 	virtio_read_dev_config(hw,
1952 			offsetof(struct virtio_net_config, rss_max_key_size),
1953 			&config->rss_max_key_size,
1954 			sizeof(config->rss_max_key_size));
1955 	if (config->rss_max_key_size < VIRTIO_NET_RSS_KEY_SIZE) {
1956 		PMD_INIT_LOG(ERR, "Invalid device RSS max key size (%u)",
1957 				config->rss_max_key_size);
1958 		return -EINVAL;
1959 	}
1960 
1961 	virtio_read_dev_config(hw,
1962 			offsetof(struct virtio_net_config,
1963 				rss_max_indirection_table_length),
1964 			&config->rss_max_indirection_table_length,
1965 			sizeof(config->rss_max_indirection_table_length));
1966 	if (config->rss_max_indirection_table_length < VIRTIO_NET_RSS_RETA_SIZE) {
1967 		PMD_INIT_LOG(ERR, "Invalid device RSS max reta size (%u)",
1968 				config->rss_max_indirection_table_length);
1969 		return -EINVAL;
1970 	}
1971 
1972 	virtio_read_dev_config(hw,
1973 			offsetof(struct virtio_net_config, supported_hash_types),
1974 			&config->supported_hash_types,
1975 			sizeof(config->supported_hash_types));
1976 	if ((config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK) == 0) {
1977 		PMD_INIT_LOG(ERR, "Invalid device RSS hash types (0x%x)",
1978 				config->supported_hash_types);
1979 		return -EINVAL;
1980 	}
1981 
1982 	*rss_hash_types = config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
1983 
1984 	PMD_INIT_LOG(DEBUG, "Device RSS config:");
1985 	PMD_INIT_LOG(DEBUG, "\t-Max key size: %u", config->rss_max_key_size);
1986 	PMD_INIT_LOG(DEBUG, "\t-Max reta size: %u", config->rss_max_indirection_table_length);
1987 	PMD_INIT_LOG(DEBUG, "\t-Supported hash types: 0x%x", *rss_hash_types);
1988 
1989 	return 0;
1990 }
1991 
1992 static int
1993 virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
1994 		struct rte_eth_rss_conf *rss_conf)
1995 {
1996 	struct virtio_hw *hw = dev->data->dev_private;
1997 	char old_rss_key[VIRTIO_NET_RSS_KEY_SIZE];
1998 	uint32_t old_hash_types;
1999 	uint16_t nb_queues;
2000 	int ret;
2001 
2002 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2003 		return -ENOTSUP;
2004 
2005 	if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(VIRTIO_NET_HASH_TYPE_MASK))
2006 		return -EINVAL;
2007 
2008 	old_hash_types = hw->rss_hash_types;
2009 	hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2010 
2011 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2012 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2013 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2014 					VIRTIO_NET_RSS_KEY_SIZE);
2015 			ret = -EINVAL;
2016 			goto restore_types;
2017 		}
2018 		memcpy(old_rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2019 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2020 	}
2021 
2022 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2023 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2024 	if (ret < 0) {
2025 		PMD_INIT_LOG(ERR, "Failed to apply new RSS config to the device");
2026 		goto restore_key;
2027 	}
2028 
2029 	return 0;
2030 restore_key:
2031 	if (rss_conf->rss_key && rss_conf->rss_key_len)
2032 		memcpy(hw->rss_key, old_rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2033 restore_types:
2034 	hw->rss_hash_types = old_hash_types;
2035 
2036 	return ret;
2037 }
2038 
2039 static int
2040 virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2041 		struct rte_eth_rss_conf *rss_conf)
2042 {
2043 	struct virtio_hw *hw = dev->data->dev_private;
2044 
2045 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2046 		return -ENOTSUP;
2047 
2048 	if (rss_conf->rss_key && rss_conf->rss_key_len >= VIRTIO_NET_RSS_KEY_SIZE)
2049 		memcpy(rss_conf->rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2050 	rss_conf->rss_key_len = VIRTIO_NET_RSS_KEY_SIZE;
2051 	rss_conf->rss_hf = virtio_to_ethdev_rss_offloads(hw->rss_hash_types);
2052 
2053 	return 0;
2054 }
2055 
2056 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
2057 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2058 			 uint16_t reta_size)
2059 {
2060 	struct virtio_hw *hw = dev->data->dev_private;
2061 	uint16_t nb_queues;
2062 	uint16_t old_reta[VIRTIO_NET_RSS_RETA_SIZE];
2063 	int idx, pos, i, ret;
2064 
2065 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2066 		return -ENOTSUP;
2067 
2068 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2069 		return -EINVAL;
2070 
2071 	memcpy(old_reta, hw->rss_reta, sizeof(old_reta));
2072 
2073 	for (i = 0; i < reta_size; i++) {
2074 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2075 		pos = i % RTE_ETH_RETA_GROUP_SIZE;
2076 
2077 		if (((reta_conf[idx].mask >> pos) & 0x1) == 0)
2078 			continue;
2079 
2080 		hw->rss_reta[i] = reta_conf[idx].reta[pos];
2081 	}
2082 
2083 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2084 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2085 	if (ret < 0) {
2086 		PMD_INIT_LOG(ERR, "Failed to apply new RETA to the device");
2087 		memcpy(hw->rss_reta, old_reta, sizeof(old_reta));
2088 	}
2089 
2090 	hw->rss_rx_queues = dev->data->nb_rx_queues;
2091 
2092 	return ret;
2093 }
2094 
2095 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
2096 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2097 			 uint16_t reta_size)
2098 {
2099 	struct virtio_hw *hw = dev->data->dev_private;
2100 	int idx, i;
2101 
2102 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2103 		return -ENOTSUP;
2104 
2105 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2106 		return -EINVAL;
2107 
2108 	for (i = 0; i < reta_size; i++) {
2109 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2110 		reta_conf[idx].reta[i % RTE_ETH_RETA_GROUP_SIZE] = hw->rss_reta[i];
2111 	}
2112 
2113 	return 0;
2114 }
2115 
2116 /*
2117  * As default RSS hash key, it uses the default key of the
2118  * Intel IXGBE devices. It can be updated by the application
2119  * with any 40B key value.
2120  */
2121 static uint8_t rss_intel_key[VIRTIO_NET_RSS_KEY_SIZE] = {
2122 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2123 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2124 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2125 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2126 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2127 };
2128 
2129 static int
2130 virtio_dev_rss_init(struct rte_eth_dev *eth_dev)
2131 {
2132 	struct virtio_hw *hw = eth_dev->data->dev_private;
2133 	uint16_t nb_rx_queues = eth_dev->data->nb_rx_queues;
2134 	struct rte_eth_rss_conf *rss_conf;
2135 	int ret, i;
2136 
2137 	if (!nb_rx_queues) {
2138 		PMD_INIT_LOG(ERR, "Cannot init RSS if no Rx queues");
2139 		return -EINVAL;
2140 	}
2141 
2142 	rss_conf = &eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
2143 
2144 	ret = virtio_dev_get_rss_config(hw, &hw->rss_hash_types);
2145 	if (ret)
2146 		return ret;
2147 
2148 	if (rss_conf->rss_hf) {
2149 		/*  Ensure requested hash types are supported by the device */
2150 		if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(hw->rss_hash_types))
2151 			return -EINVAL;
2152 
2153 		hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2154 	}
2155 
2156 	if (!hw->rss_key) {
2157 		/* Setup default RSS key if not already setup by the user */
2158 		hw->rss_key = rte_malloc_socket("rss_key",
2159 				VIRTIO_NET_RSS_KEY_SIZE, 0,
2160 				eth_dev->device->numa_node);
2161 		if (!hw->rss_key) {
2162 			PMD_INIT_LOG(ERR, "Failed to allocate RSS key");
2163 			return -1;
2164 		}
2165 	}
2166 
2167 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2168 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2169 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2170 					VIRTIO_NET_RSS_KEY_SIZE);
2171 			return -EINVAL;
2172 		}
2173 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2174 	} else {
2175 		memcpy(hw->rss_key, rss_intel_key, VIRTIO_NET_RSS_KEY_SIZE);
2176 	}
2177 
2178 	if (!hw->rss_reta) {
2179 		/* Setup default RSS reta if not already setup by the user */
2180 		hw->rss_reta = rte_zmalloc_socket("rss_reta",
2181 				VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t), 0,
2182 				eth_dev->device->numa_node);
2183 		if (!hw->rss_reta) {
2184 			PMD_INIT_LOG(ERR, "Failed to allocate RSS reta");
2185 			return -1;
2186 		}
2187 
2188 		hw->rss_rx_queues = 0;
2189 	}
2190 
2191 	/* Re-initialize the RSS reta if the number of RX queues has changed */
2192 	if (hw->rss_rx_queues != nb_rx_queues) {
2193 		for (i = 0; i < VIRTIO_NET_RSS_RETA_SIZE; i++)
2194 			hw->rss_reta[i] = i % nb_rx_queues;
2195 		hw->rss_rx_queues = nb_rx_queues;
2196 	}
2197 
2198 	return 0;
2199 }
2200 
2201 #define DUPLEX_UNKNOWN   0xff
2202 /* reset device and renegotiate features if needed */
2203 static int
2204 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
2205 {
2206 	struct virtio_hw *hw = eth_dev->data->dev_private;
2207 	struct virtio_net_config *config;
2208 	struct virtio_net_config local_config;
2209 	int ret;
2210 
2211 	/* Reset the device although not necessary at startup */
2212 	virtio_reset(hw);
2213 
2214 	if (hw->vqs) {
2215 		virtio_dev_free_mbufs(eth_dev);
2216 		virtio_free_queues(hw);
2217 	}
2218 
2219 	/* Tell the host we've noticed this device. */
2220 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
2221 
2222 	/* Tell the host we've known how to drive the device. */
2223 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
2224 	if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
2225 		return -1;
2226 
2227 	hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
2228 
2229 	/* If host does not support both status and MSI-X then disable LSC */
2230 	if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
2231 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
2232 	else
2233 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2234 
2235 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2236 
2237 	/* Setting up rx_header size for the device */
2238 	if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
2239 	    virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2240 	    virtio_with_packed_queue(hw))
2241 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2242 	else
2243 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
2244 
2245 	/* Copy the permanent MAC address to: virtio_hw */
2246 	virtio_get_hwaddr(hw);
2247 	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
2248 			&eth_dev->data->mac_addrs[0]);
2249 	PMD_INIT_LOG(DEBUG,
2250 		     "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2251 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2252 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2253 
2254 	hw->get_speed_via_feat = hw->speed == RTE_ETH_SPEED_NUM_UNKNOWN &&
2255 			     virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX);
2256 	if (hw->get_speed_via_feat)
2257 		virtio_get_speed_duplex(eth_dev, NULL);
2258 	if (hw->duplex == DUPLEX_UNKNOWN)
2259 		hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
2260 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
2261 		hw->speed, hw->duplex);
2262 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
2263 		config = &local_config;
2264 
2265 		virtio_read_dev_config(hw,
2266 			offsetof(struct virtio_net_config, mac),
2267 			&config->mac, sizeof(config->mac));
2268 
2269 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2270 			virtio_read_dev_config(hw,
2271 				offsetof(struct virtio_net_config, status),
2272 				&config->status, sizeof(config->status));
2273 		} else {
2274 			PMD_INIT_LOG(DEBUG,
2275 				     "VIRTIO_NET_F_STATUS is not supported");
2276 			config->status = 0;
2277 		}
2278 
2279 		if (virtio_with_feature(hw, VIRTIO_NET_F_MQ) ||
2280 				virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2281 			virtio_read_dev_config(hw,
2282 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
2283 				&config->max_virtqueue_pairs,
2284 				sizeof(config->max_virtqueue_pairs));
2285 		} else {
2286 			PMD_INIT_LOG(DEBUG,
2287 				     "Neither VIRTIO_NET_F_MQ nor VIRTIO_NET_F_RSS are supported");
2288 			config->max_virtqueue_pairs = 1;
2289 		}
2290 
2291 		hw->max_queue_pairs = config->max_virtqueue_pairs;
2292 
2293 		if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
2294 			virtio_read_dev_config(hw,
2295 				offsetof(struct virtio_net_config, mtu),
2296 				&config->mtu,
2297 				sizeof(config->mtu));
2298 
2299 			/*
2300 			 * MTU value has already been checked at negotiation
2301 			 * time, but check again in case it has changed since
2302 			 * then, which should not happen.
2303 			 */
2304 			if (config->mtu < RTE_ETHER_MIN_MTU) {
2305 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
2306 						config->mtu);
2307 				return -1;
2308 			}
2309 
2310 			hw->max_mtu = config->mtu;
2311 			/* Set initial MTU to maximum one supported by vhost */
2312 			eth_dev->data->mtu = config->mtu;
2313 
2314 		} else {
2315 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2316 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
2317 		}
2318 
2319 		hw->rss_hash_types = 0;
2320 		if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2321 			if (virtio_dev_rss_init(eth_dev))
2322 				return -1;
2323 
2324 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
2325 				config->max_virtqueue_pairs);
2326 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
2327 		PMD_INIT_LOG(DEBUG,
2328 				"PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2329 				config->mac[0], config->mac[1],
2330 				config->mac[2], config->mac[3],
2331 				config->mac[4], config->mac[5]);
2332 	} else {
2333 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
2334 		hw->max_queue_pairs = 1;
2335 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2336 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
2337 	}
2338 
2339 	ret = virtio_alloc_queues(eth_dev);
2340 	if (ret < 0)
2341 		return ret;
2342 
2343 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
2344 		if (virtio_configure_intr(eth_dev) < 0) {
2345 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
2346 			virtio_free_queues(hw);
2347 			return -1;
2348 		}
2349 	}
2350 
2351 	virtio_reinit_complete(hw);
2352 
2353 	return 0;
2354 }
2355 
2356 /*
2357  * This function is based on probe() function in virtio_pci.c
2358  * It returns 0 on success.
2359  */
2360 int
2361 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
2362 {
2363 	struct virtio_hw *hw = eth_dev->data->dev_private;
2364 	uint32_t speed = RTE_ETH_SPEED_NUM_UNKNOWN;
2365 	int vectorized = 0;
2366 	int ret;
2367 
2368 	if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
2369 		PMD_INIT_LOG(ERR,
2370 			"Not sufficient headroom required = %d, avail = %d",
2371 			(int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
2372 			RTE_PKTMBUF_HEADROOM);
2373 
2374 		return -1;
2375 	}
2376 
2377 	eth_dev->dev_ops = &virtio_eth_dev_ops;
2378 
2379 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
2380 		set_rxtx_funcs(eth_dev);
2381 		return 0;
2382 	}
2383 
2384 	ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
2385 	if (ret < 0)
2386 		return ret;
2387 	hw->speed = speed;
2388 	hw->duplex = DUPLEX_UNKNOWN;
2389 
2390 	/* Allocate memory for storing MAC addresses */
2391 	eth_dev->data->mac_addrs = rte_zmalloc("virtio",
2392 				VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
2393 	if (eth_dev->data->mac_addrs == NULL) {
2394 		PMD_INIT_LOG(ERR,
2395 			"Failed to allocate %d bytes needed to store MAC addresses",
2396 			VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
2397 		return -ENOMEM;
2398 	}
2399 
2400 	rte_spinlock_init(&hw->state_lock);
2401 
2402 	/* reset device and negotiate default features */
2403 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
2404 	if (ret < 0)
2405 		goto err_virtio_init;
2406 
2407 	if (vectorized) {
2408 		if (!virtio_with_packed_queue(hw)) {
2409 			hw->use_vec_rx = 1;
2410 		} else {
2411 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
2412 			hw->use_vec_rx = 1;
2413 			hw->use_vec_tx = 1;
2414 #else
2415 			PMD_DRV_LOG(INFO,
2416 				"building environment do not support packed ring vectorized");
2417 #endif
2418 		}
2419 	}
2420 
2421 	hw->opened = 1;
2422 
2423 	return 0;
2424 
2425 err_virtio_init:
2426 	rte_free(eth_dev->data->mac_addrs);
2427 	eth_dev->data->mac_addrs = NULL;
2428 	return ret;
2429 }
2430 
2431 static uint32_t
2432 virtio_dev_speed_capa_get(uint32_t speed)
2433 {
2434 	switch (speed) {
2435 	case RTE_ETH_SPEED_NUM_10G:
2436 		return RTE_ETH_LINK_SPEED_10G;
2437 	case RTE_ETH_SPEED_NUM_20G:
2438 		return RTE_ETH_LINK_SPEED_20G;
2439 	case RTE_ETH_SPEED_NUM_25G:
2440 		return RTE_ETH_LINK_SPEED_25G;
2441 	case RTE_ETH_SPEED_NUM_40G:
2442 		return RTE_ETH_LINK_SPEED_40G;
2443 	case RTE_ETH_SPEED_NUM_50G:
2444 		return RTE_ETH_LINK_SPEED_50G;
2445 	case RTE_ETH_SPEED_NUM_56G:
2446 		return RTE_ETH_LINK_SPEED_56G;
2447 	case RTE_ETH_SPEED_NUM_100G:
2448 		return RTE_ETH_LINK_SPEED_100G;
2449 	case RTE_ETH_SPEED_NUM_200G:
2450 		return RTE_ETH_LINK_SPEED_200G;
2451 	default:
2452 		return 0;
2453 	}
2454 }
2455 
2456 static int vectorized_check_handler(__rte_unused const char *key,
2457 		const char *value, void *ret_val)
2458 {
2459 	if (strcmp(value, "1") == 0)
2460 		*(int *)ret_val = 1;
2461 	else
2462 		*(int *)ret_val = 0;
2463 
2464 	return 0;
2465 }
2466 
2467 #define VIRTIO_ARG_SPEED      "speed"
2468 #define VIRTIO_ARG_VECTORIZED "vectorized"
2469 
2470 static int
2471 link_speed_handler(const char *key __rte_unused,
2472 		const char *value, void *ret_val)
2473 {
2474 	uint32_t val;
2475 	if (!value || !ret_val)
2476 		return -EINVAL;
2477 	val = strtoul(value, NULL, 0);
2478 	/* validate input */
2479 	if (virtio_dev_speed_capa_get(val) == 0)
2480 		return -EINVAL;
2481 	*(uint32_t *)ret_val = val;
2482 
2483 	return 0;
2484 }
2485 
2486 
2487 static int
2488 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2489 {
2490 	struct rte_kvargs *kvlist;
2491 	int ret = 0;
2492 
2493 	if (devargs == NULL)
2494 		return 0;
2495 
2496 	kvlist = rte_kvargs_parse(devargs->args, NULL);
2497 	if (kvlist == NULL) {
2498 		PMD_INIT_LOG(ERR, "error when parsing param");
2499 		return 0;
2500 	}
2501 
2502 	if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2503 		ret = rte_kvargs_process(kvlist,
2504 					VIRTIO_ARG_SPEED,
2505 					link_speed_handler, speed);
2506 		if (ret < 0) {
2507 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2508 					VIRTIO_ARG_SPEED);
2509 			goto exit;
2510 		}
2511 	}
2512 
2513 	if (vectorized &&
2514 		rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2515 		ret = rte_kvargs_process(kvlist,
2516 				VIRTIO_ARG_VECTORIZED,
2517 				vectorized_check_handler, vectorized);
2518 		if (ret < 0) {
2519 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2520 					VIRTIO_ARG_VECTORIZED);
2521 			goto exit;
2522 		}
2523 	}
2524 
2525 exit:
2526 	rte_kvargs_free(kvlist);
2527 	return ret;
2528 }
2529 
2530 static uint8_t
2531 rx_offload_enabled(struct virtio_hw *hw)
2532 {
2533 	return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2534 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2535 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2536 }
2537 
2538 static uint8_t
2539 tx_offload_enabled(struct virtio_hw *hw)
2540 {
2541 	return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2542 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2543 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2544 }
2545 
2546 /*
2547  * Configure virtio device
2548  * It returns 0 on success.
2549  */
2550 static int
2551 virtio_dev_configure(struct rte_eth_dev *dev)
2552 {
2553 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2554 	const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2555 	struct virtio_hw *hw = dev->data->dev_private;
2556 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2557 		hw->vtnet_hdr_size;
2558 	uint64_t rx_offloads = rxmode->offloads;
2559 	uint64_t tx_offloads = txmode->offloads;
2560 	uint64_t req_features;
2561 	int ret;
2562 
2563 	PMD_INIT_LOG(DEBUG, "configure");
2564 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2565 
2566 	if (rxmode->mq_mode != RTE_ETH_MQ_RX_NONE && rxmode->mq_mode != RTE_ETH_MQ_RX_RSS) {
2567 		PMD_DRV_LOG(ERR,
2568 			"Unsupported Rx multi queue mode %d",
2569 			rxmode->mq_mode);
2570 		return -EINVAL;
2571 	}
2572 
2573 	if (txmode->mq_mode != RTE_ETH_MQ_TX_NONE) {
2574 		PMD_DRV_LOG(ERR,
2575 			"Unsupported Tx multi queue mode %d",
2576 			txmode->mq_mode);
2577 		return -EINVAL;
2578 	}
2579 
2580 	if (dev->data->dev_conf.intr_conf.rxq) {
2581 		ret = virtio_init_device(dev, hw->req_guest_features);
2582 		if (ret < 0)
2583 			return ret;
2584 	}
2585 
2586 	if (rxmode->mq_mode == RTE_ETH_MQ_RX_RSS)
2587 		req_features |= (1ULL << VIRTIO_NET_F_RSS);
2588 
2589 	if (rxmode->mtu > hw->max_mtu)
2590 		req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2591 
2592 	hw->max_rx_pkt_len = ether_hdr_len + rxmode->mtu;
2593 
2594 	if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2595 			   RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
2596 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2597 
2598 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
2599 		req_features |=
2600 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2601 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2602 
2603 	if (tx_offloads & (RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2604 			   RTE_ETH_TX_OFFLOAD_TCP_CKSUM))
2605 		req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2606 
2607 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)
2608 		req_features |=
2609 			(1ULL << VIRTIO_NET_F_HOST_TSO4) |
2610 			(1ULL << VIRTIO_NET_F_HOST_TSO6);
2611 
2612 	/* if request features changed, reinit the device */
2613 	if (req_features != hw->req_guest_features) {
2614 		ret = virtio_init_device(dev, req_features);
2615 		if (ret < 0)
2616 			return ret;
2617 	}
2618 
2619 	if ((rxmode->mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
2620 			!virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2621 		PMD_DRV_LOG(ERR, "RSS support requested but not supported by the device");
2622 		return -ENOTSUP;
2623 	}
2624 
2625 	if ((rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2626 			    RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) &&
2627 		!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2628 		PMD_DRV_LOG(ERR,
2629 			"rx checksum not available on this host");
2630 		return -ENOTSUP;
2631 	}
2632 
2633 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) &&
2634 		(!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2635 		 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2636 		PMD_DRV_LOG(ERR,
2637 			"Large Receive Offload not available on this host");
2638 		return -ENOTSUP;
2639 	}
2640 
2641 	/* start control queue */
2642 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2643 		virtio_dev_cq_start(dev);
2644 
2645 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2646 		hw->vlan_strip = 1;
2647 
2648 	hw->rx_ol_scatter = (rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
2649 
2650 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2651 			!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2652 		PMD_DRV_LOG(ERR,
2653 			    "vlan filtering not available on this host");
2654 		return -ENOTSUP;
2655 	}
2656 
2657 	hw->has_tx_offload = tx_offload_enabled(hw);
2658 	hw->has_rx_offload = rx_offload_enabled(hw);
2659 
2660 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2661 		/* Enable vector (0) for Link State Interrupt */
2662 		if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2663 				VIRTIO_MSI_NO_VECTOR) {
2664 			PMD_DRV_LOG(ERR, "failed to set config vector");
2665 			return -EBUSY;
2666 		}
2667 
2668 	if (virtio_with_packed_queue(hw)) {
2669 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2670 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2671 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2672 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2673 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2674 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2675 			PMD_DRV_LOG(INFO,
2676 				"disabled packed ring vectorized path for requirements not met");
2677 			hw->use_vec_rx = 0;
2678 			hw->use_vec_tx = 0;
2679 		}
2680 #elif defined(RTE_ARCH_ARM)
2681 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2682 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2683 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2684 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2685 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2686 			PMD_DRV_LOG(INFO,
2687 				"disabled packed ring vectorized path for requirements not met");
2688 			hw->use_vec_rx = 0;
2689 			hw->use_vec_tx = 0;
2690 		}
2691 #else
2692 		hw->use_vec_rx = 0;
2693 		hw->use_vec_tx = 0;
2694 #endif
2695 
2696 		if (hw->use_vec_rx) {
2697 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2698 				PMD_DRV_LOG(INFO,
2699 					"disabled packed ring vectorized rx for mrg_rxbuf enabled");
2700 				hw->use_vec_rx = 0;
2701 			}
2702 
2703 			if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
2704 				PMD_DRV_LOG(INFO,
2705 					"disabled packed ring vectorized rx for TCP_LRO enabled");
2706 				hw->use_vec_rx = 0;
2707 			}
2708 		}
2709 	} else {
2710 		if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2711 			hw->use_inorder_tx = 1;
2712 			hw->use_inorder_rx = 1;
2713 			hw->use_vec_rx = 0;
2714 		}
2715 
2716 		if (hw->use_vec_rx) {
2717 #if defined RTE_ARCH_ARM
2718 			if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2719 				PMD_DRV_LOG(INFO,
2720 					"disabled split ring vectorized path for requirement not met");
2721 				hw->use_vec_rx = 0;
2722 			}
2723 #endif
2724 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2725 				PMD_DRV_LOG(INFO,
2726 					"disabled split ring vectorized rx for mrg_rxbuf enabled");
2727 				hw->use_vec_rx = 0;
2728 			}
2729 
2730 			if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2731 					   RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
2732 					   RTE_ETH_RX_OFFLOAD_TCP_LRO |
2733 					   RTE_ETH_RX_OFFLOAD_VLAN_STRIP)) {
2734 				PMD_DRV_LOG(INFO,
2735 					"disabled split ring vectorized rx for offloading enabled");
2736 				hw->use_vec_rx = 0;
2737 			}
2738 
2739 			if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2740 				PMD_DRV_LOG(INFO,
2741 					"disabled split ring vectorized rx, max SIMD bitwidth too low");
2742 				hw->use_vec_rx = 0;
2743 			}
2744 		}
2745 	}
2746 
2747 	return 0;
2748 }
2749 
2750 
2751 static int
2752 virtio_dev_start(struct rte_eth_dev *dev)
2753 {
2754 	uint16_t nb_queues, i;
2755 	struct virtqueue *vq;
2756 	struct virtio_hw *hw = dev->data->dev_private;
2757 	int ret;
2758 
2759 	/* Finish the initialization of the queues */
2760 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2761 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
2762 		if (ret < 0)
2763 			return ret;
2764 	}
2765 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2766 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
2767 		if (ret < 0)
2768 			return ret;
2769 	}
2770 
2771 	/* check if lsc interrupt feature is enabled */
2772 	if (dev->data->dev_conf.intr_conf.lsc) {
2773 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2774 			PMD_DRV_LOG(ERR, "link status not supported by host");
2775 			return -ENOTSUP;
2776 		}
2777 	}
2778 
2779 	/* Enable uio/vfio intr/eventfd mapping: although we already did that
2780 	 * in device configure, but it could be unmapped  when device is
2781 	 * stopped.
2782 	 */
2783 	if (dev->data->dev_conf.intr_conf.lsc ||
2784 	    dev->data->dev_conf.intr_conf.rxq) {
2785 		virtio_intr_disable(dev);
2786 
2787 		/* Setup interrupt callback  */
2788 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2789 			rte_intr_callback_register(dev->intr_handle,
2790 						   virtio_interrupt_handler,
2791 						   dev);
2792 
2793 		if (virtio_intr_enable(dev) < 0) {
2794 			PMD_DRV_LOG(ERR, "interrupt enable failed");
2795 			return -EIO;
2796 		}
2797 	}
2798 
2799 	/*Notify the backend
2800 	 *Otherwise the tap backend might already stop its queue due to fullness.
2801 	 *vhost backend will have no chance to be waked up
2802 	 */
2803 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2804 	if (hw->max_queue_pairs > 1) {
2805 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2806 			return -EINVAL;
2807 	}
2808 
2809 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2810 
2811 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2812 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2813 		/* Flush the old packets */
2814 		virtqueue_rxvq_flush(vq);
2815 		virtqueue_notify(vq);
2816 	}
2817 
2818 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2819 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2820 		virtqueue_notify(vq);
2821 	}
2822 
2823 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2824 
2825 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2826 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2827 		VIRTQUEUE_DUMP(vq);
2828 	}
2829 
2830 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2831 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2832 		VIRTQUEUE_DUMP(vq);
2833 	}
2834 
2835 	set_rxtx_funcs(dev);
2836 	hw->started = 1;
2837 
2838 	/* Initialize Link state */
2839 	virtio_dev_link_update(dev, 0);
2840 
2841 	return 0;
2842 }
2843 
2844 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2845 {
2846 	struct virtio_hw *hw = dev->data->dev_private;
2847 	uint16_t nr_vq = virtio_get_nr_vq(hw);
2848 	const char *type __rte_unused;
2849 	unsigned int i, mbuf_num = 0;
2850 	struct virtqueue *vq;
2851 	struct rte_mbuf *buf;
2852 	int queue_type;
2853 
2854 	if (hw->vqs == NULL)
2855 		return;
2856 
2857 	for (i = 0; i < nr_vq; i++) {
2858 		vq = hw->vqs[i];
2859 		if (!vq)
2860 			continue;
2861 
2862 		queue_type = virtio_get_queue_type(hw, i);
2863 		if (queue_type == VTNET_RQ)
2864 			type = "rxq";
2865 		else if (queue_type == VTNET_TQ)
2866 			type = "txq";
2867 		else
2868 			continue;
2869 
2870 		PMD_INIT_LOG(DEBUG,
2871 			"Before freeing %s[%d] used and unused buf",
2872 			type, i);
2873 		VIRTQUEUE_DUMP(vq);
2874 
2875 		while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2876 			rte_pktmbuf_free(buf);
2877 			mbuf_num++;
2878 		}
2879 
2880 		PMD_INIT_LOG(DEBUG,
2881 			"After freeing %s[%d] used and unused buf",
2882 			type, i);
2883 		VIRTQUEUE_DUMP(vq);
2884 	}
2885 
2886 	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2887 }
2888 
2889 static void
2890 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2891 {
2892 	struct virtio_hw *hw = dev->data->dev_private;
2893 	struct virtqueue *vq;
2894 	int qidx;
2895 	void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2896 
2897 	if (virtio_with_packed_queue(hw)) {
2898 		if (hw->use_vec_tx)
2899 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2900 		else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2901 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2902 		else
2903 			xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2904 	} else {
2905 		if (hw->use_inorder_tx)
2906 			xmit_cleanup = &virtio_xmit_cleanup_inorder;
2907 		else
2908 			xmit_cleanup = &virtio_xmit_cleanup;
2909 	}
2910 
2911 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2912 		vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2913 		if (vq != NULL)
2914 			xmit_cleanup(vq, virtqueue_nused(vq));
2915 	}
2916 }
2917 
2918 /*
2919  * Stop device: disable interrupt and mark link down
2920  */
2921 int
2922 virtio_dev_stop(struct rte_eth_dev *dev)
2923 {
2924 	struct virtio_hw *hw = dev->data->dev_private;
2925 	struct rte_eth_link link;
2926 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2927 
2928 	PMD_INIT_LOG(DEBUG, "stop");
2929 	dev->data->dev_started = 0;
2930 
2931 	rte_spinlock_lock(&hw->state_lock);
2932 	if (!hw->started)
2933 		goto out_unlock;
2934 	hw->started = 0;
2935 
2936 	virtio_tx_completed_cleanup(dev);
2937 
2938 	if (intr_conf->lsc || intr_conf->rxq) {
2939 		virtio_intr_disable(dev);
2940 
2941 		/* Reset interrupt callback  */
2942 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2943 			rte_intr_callback_unregister(dev->intr_handle,
2944 						     virtio_interrupt_handler,
2945 						     dev);
2946 		}
2947 	}
2948 
2949 	memset(&link, 0, sizeof(link));
2950 	rte_eth_linkstatus_set(dev, &link);
2951 out_unlock:
2952 	rte_spinlock_unlock(&hw->state_lock);
2953 
2954 	return 0;
2955 }
2956 
2957 static int
2958 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2959 {
2960 	struct rte_eth_link link;
2961 	uint16_t status;
2962 	struct virtio_hw *hw = dev->data->dev_private;
2963 
2964 	memset(&link, 0, sizeof(link));
2965 	link.link_duplex = hw->duplex;
2966 	link.link_speed  = hw->speed;
2967 	link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2968 
2969 	if (!hw->started) {
2970 		link.link_status = RTE_ETH_LINK_DOWN;
2971 		link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2972 	} else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2973 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
2974 		virtio_read_dev_config(hw,
2975 				offsetof(struct virtio_net_config, status),
2976 				&status, sizeof(status));
2977 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2978 			link.link_status = RTE_ETH_LINK_DOWN;
2979 			link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2980 			PMD_INIT_LOG(DEBUG, "Port %d is down",
2981 				     dev->data->port_id);
2982 		} else {
2983 			link.link_status = RTE_ETH_LINK_UP;
2984 			if (hw->get_speed_via_feat)
2985 				virtio_get_speed_duplex(dev, &link);
2986 			PMD_INIT_LOG(DEBUG, "Port %d is up",
2987 				     dev->data->port_id);
2988 		}
2989 	} else {
2990 		link.link_status = RTE_ETH_LINK_UP;
2991 		if (hw->get_speed_via_feat)
2992 			virtio_get_speed_duplex(dev, &link);
2993 	}
2994 
2995 	return rte_eth_linkstatus_set(dev, &link);
2996 }
2997 
2998 static int
2999 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
3000 {
3001 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
3002 	struct virtio_hw *hw = dev->data->dev_private;
3003 	uint64_t offloads = rxmode->offloads;
3004 
3005 	if (mask & RTE_ETH_VLAN_FILTER_MASK) {
3006 		if ((offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
3007 				!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
3008 
3009 			PMD_DRV_LOG(NOTICE,
3010 				"vlan filtering not available on this host");
3011 
3012 			return -ENOTSUP;
3013 		}
3014 	}
3015 
3016 	if (mask & RTE_ETH_VLAN_STRIP_MASK)
3017 		hw->vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
3018 
3019 	return 0;
3020 }
3021 
3022 static int
3023 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
3024 {
3025 	uint64_t tso_mask, host_features;
3026 	uint32_t rss_hash_types = 0;
3027 	struct virtio_hw *hw = dev->data->dev_private;
3028 	dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
3029 
3030 	dev_info->max_rx_queues =
3031 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
3032 	dev_info->max_tx_queues =
3033 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
3034 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
3035 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
3036 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
3037 	dev_info->max_mtu = hw->max_mtu;
3038 
3039 	host_features = VIRTIO_OPS(hw)->get_features(hw);
3040 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3041 	if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
3042 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SCATTER;
3043 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
3044 		dev_info->rx_offload_capa |=
3045 			RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3046 			RTE_ETH_RX_OFFLOAD_UDP_CKSUM;
3047 	}
3048 	if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
3049 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3050 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
3051 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
3052 	if ((host_features & tso_mask) == tso_mask)
3053 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3054 
3055 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
3056 				    RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
3057 	if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
3058 		dev_info->tx_offload_capa |=
3059 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
3060 			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
3061 	}
3062 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
3063 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
3064 	if ((host_features & tso_mask) == tso_mask)
3065 		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
3066 
3067 	if (host_features & (1ULL << VIRTIO_NET_F_RSS)) {
3068 		virtio_dev_get_rss_config(hw, &rss_hash_types);
3069 		dev_info->hash_key_size = VIRTIO_NET_RSS_KEY_SIZE;
3070 		dev_info->reta_size = VIRTIO_NET_RSS_RETA_SIZE;
3071 		dev_info->flow_type_rss_offloads =
3072 			virtio_to_ethdev_rss_offloads(rss_hash_types);
3073 	} else {
3074 		dev_info->hash_key_size = 0;
3075 		dev_info->reta_size = 0;
3076 		dev_info->flow_type_rss_offloads = 0;
3077 	}
3078 
3079 	if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
3080 		/*
3081 		 * According to 2.7 Packed Virtqueues,
3082 		 * 2.7.10.1 Structure Size and Alignment:
3083 		 * The Queue Size value does not have to be a power of 2.
3084 		 */
3085 		dev_info->rx_desc_lim.nb_max = UINT16_MAX;
3086 		dev_info->tx_desc_lim.nb_max = UINT16_MAX;
3087 	} else {
3088 		/*
3089 		 * According to 2.6 Split Virtqueues:
3090 		 * Queue Size value is always a power of 2. The maximum Queue
3091 		 * Size value is 32768.
3092 		 */
3093 		dev_info->rx_desc_lim.nb_max = 32768;
3094 		dev_info->tx_desc_lim.nb_max = 32768;
3095 	}
3096 	/*
3097 	 * Actual minimum is not the same for virtqueues of different kinds,
3098 	 * but to avoid tangling the code with separate branches, rely on
3099 	 * default thresholds since desc number must be at least of their size.
3100 	 */
3101 	dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
3102 					       RTE_VIRTIO_VPMD_RX_REARM_THRESH);
3103 	dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
3104 	dev_info->rx_desc_lim.nb_align = 1;
3105 	dev_info->tx_desc_lim.nb_align = 1;
3106 
3107 	return 0;
3108 }
3109 
3110 /*
3111  * It enables testpmd to collect per queue stats.
3112  */
3113 static int
3114 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
3115 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
3116 __rte_unused uint8_t is_rx)
3117 {
3118 	return 0;
3119 }
3120 
3121 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
3122 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);
3123