xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision b53d106d34b5c638f5a2cbdfee0da5bd42d4383f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30 
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38 
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47 	uint32_t *speed,
48 	int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50 				struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52 	int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54 static int virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
55 		struct rte_eth_rss_conf *rss_conf);
56 static int virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
57 		struct rte_eth_rss_conf *rss_conf);
58 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
59 			 struct rte_eth_rss_reta_entry64 *reta_conf,
60 			 uint16_t reta_size);
61 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
62 			 struct rte_eth_rss_reta_entry64 *reta_conf,
63 			 uint16_t reta_size);
64 
65 static void virtio_set_hwaddr(struct virtio_hw *hw);
66 static void virtio_get_hwaddr(struct virtio_hw *hw);
67 
68 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
69 				 struct rte_eth_stats *stats);
70 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
71 				 struct rte_eth_xstat *xstats, unsigned n);
72 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
73 				       struct rte_eth_xstat_name *xstats_names,
74 				       unsigned limit);
75 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
76 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
77 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
78 				uint16_t vlan_id, int on);
79 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
80 				struct rte_ether_addr *mac_addr,
81 				uint32_t index, uint32_t vmdq);
82 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
83 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
84 				struct rte_ether_addr *mac_addr);
85 
86 static int virtio_intr_disable(struct rte_eth_dev *dev);
87 static int virtio_get_monitor_addr(void *rx_queue,
88 				struct rte_power_monitor_cond *pmc);
89 
90 static int virtio_dev_queue_stats_mapping_set(
91 	struct rte_eth_dev *eth_dev,
92 	uint16_t queue_id,
93 	uint8_t stat_idx,
94 	uint8_t is_rx);
95 
96 static void virtio_notify_peers(struct rte_eth_dev *dev);
97 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
98 
99 struct rte_virtio_xstats_name_off {
100 	char name[RTE_ETH_XSTATS_NAME_SIZE];
101 	unsigned offset;
102 };
103 
104 /* [rt]x_qX_ is prepended to the name string here */
105 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
106 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
107 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
108 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
109 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
110 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
111 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
112 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
113 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
114 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
115 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
116 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
117 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
118 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
119 };
120 
121 /* [rt]x_qX_ is prepended to the name string here */
122 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
123 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
124 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
125 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
126 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
127 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
128 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
129 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
130 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
131 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
132 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
133 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
134 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
135 };
136 
137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
138 			    sizeof(rte_virtio_rxq_stat_strings[0]))
139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
140 			    sizeof(rte_virtio_txq_stat_strings[0]))
141 
142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
143 
144 static struct virtio_pmd_ctrl *
145 virtio_send_command_packed(struct virtnet_ctl *cvq,
146 			   struct virtio_pmd_ctrl *ctrl,
147 			   int *dlen, int pkt_num)
148 {
149 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
150 	int head;
151 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
152 	struct virtio_pmd_ctrl *result;
153 	uint16_t flags;
154 	int sum = 0;
155 	int nb_descs = 0;
156 	int k;
157 
158 	/*
159 	 * Format is enforced in qemu code:
160 	 * One TX packet for header;
161 	 * At least one TX packet per argument;
162 	 * One RX packet for ACK.
163 	 */
164 	head = vq->vq_avail_idx;
165 	flags = vq->vq_packed.cached_flags;
166 	desc[head].addr = cvq->virtio_net_hdr_mem;
167 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
168 	vq->vq_free_cnt--;
169 	nb_descs++;
170 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
171 		vq->vq_avail_idx -= vq->vq_nentries;
172 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
173 	}
174 
175 	for (k = 0; k < pkt_num; k++) {
176 		desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
177 			+ sizeof(struct virtio_net_ctrl_hdr)
178 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
179 		desc[vq->vq_avail_idx].len = dlen[k];
180 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
181 			vq->vq_packed.cached_flags;
182 		sum += dlen[k];
183 		vq->vq_free_cnt--;
184 		nb_descs++;
185 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
186 			vq->vq_avail_idx -= vq->vq_nentries;
187 			vq->vq_packed.cached_flags ^=
188 				VRING_PACKED_DESC_F_AVAIL_USED;
189 		}
190 	}
191 
192 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
193 		+ sizeof(struct virtio_net_ctrl_hdr);
194 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
195 	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
196 		vq->vq_packed.cached_flags;
197 	vq->vq_free_cnt--;
198 	nb_descs++;
199 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
200 		vq->vq_avail_idx -= vq->vq_nentries;
201 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
202 	}
203 
204 	virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
205 			vq->hw->weak_barriers);
206 
207 	virtio_wmb(vq->hw->weak_barriers);
208 	virtqueue_notify(vq);
209 
210 	/* wait for used desc in virtqueue
211 	 * desc_is_used has a load-acquire or rte_io_rmb inside
212 	 */
213 	while (!desc_is_used(&desc[head], vq))
214 		usleep(100);
215 
216 	/* now get used descriptors */
217 	vq->vq_free_cnt += nb_descs;
218 	vq->vq_used_cons_idx += nb_descs;
219 	if (vq->vq_used_cons_idx >= vq->vq_nentries) {
220 		vq->vq_used_cons_idx -= vq->vq_nentries;
221 		vq->vq_packed.used_wrap_counter ^= 1;
222 	}
223 
224 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
225 			"vq->vq_avail_idx=%d\n"
226 			"vq->vq_used_cons_idx=%d\n"
227 			"vq->vq_packed.cached_flags=0x%x\n"
228 			"vq->vq_packed.used_wrap_counter=%d",
229 			vq->vq_free_cnt,
230 			vq->vq_avail_idx,
231 			vq->vq_used_cons_idx,
232 			vq->vq_packed.cached_flags,
233 			vq->vq_packed.used_wrap_counter);
234 
235 	result = cvq->virtio_net_hdr_mz->addr;
236 	return result;
237 }
238 
239 static struct virtio_pmd_ctrl *
240 virtio_send_command_split(struct virtnet_ctl *cvq,
241 			  struct virtio_pmd_ctrl *ctrl,
242 			  int *dlen, int pkt_num)
243 {
244 	struct virtio_pmd_ctrl *result;
245 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
246 	uint32_t head, i;
247 	int k, sum = 0;
248 
249 	head = vq->vq_desc_head_idx;
250 
251 	/*
252 	 * Format is enforced in qemu code:
253 	 * One TX packet for header;
254 	 * At least one TX packet per argument;
255 	 * One RX packet for ACK.
256 	 */
257 	vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
258 	vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
259 	vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
260 	vq->vq_free_cnt--;
261 	i = vq->vq_split.ring.desc[head].next;
262 
263 	for (k = 0; k < pkt_num; k++) {
264 		vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
265 		vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
266 			+ sizeof(struct virtio_net_ctrl_hdr)
267 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
268 		vq->vq_split.ring.desc[i].len = dlen[k];
269 		sum += dlen[k];
270 		vq->vq_free_cnt--;
271 		i = vq->vq_split.ring.desc[i].next;
272 	}
273 
274 	vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
275 	vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
276 			+ sizeof(struct virtio_net_ctrl_hdr);
277 	vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
278 	vq->vq_free_cnt--;
279 
280 	vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
281 
282 	vq_update_avail_ring(vq, head);
283 	vq_update_avail_idx(vq);
284 
285 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
286 
287 	virtqueue_notify(vq);
288 
289 	while (virtqueue_nused(vq) == 0)
290 		usleep(100);
291 
292 	while (virtqueue_nused(vq)) {
293 		uint32_t idx, desc_idx, used_idx;
294 		struct vring_used_elem *uep;
295 
296 		used_idx = (uint32_t)(vq->vq_used_cons_idx
297 				& (vq->vq_nentries - 1));
298 		uep = &vq->vq_split.ring.used->ring[used_idx];
299 		idx = (uint32_t) uep->id;
300 		desc_idx = idx;
301 
302 		while (vq->vq_split.ring.desc[desc_idx].flags &
303 				VRING_DESC_F_NEXT) {
304 			desc_idx = vq->vq_split.ring.desc[desc_idx].next;
305 			vq->vq_free_cnt++;
306 		}
307 
308 		vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
309 		vq->vq_desc_head_idx = idx;
310 
311 		vq->vq_used_cons_idx++;
312 		vq->vq_free_cnt++;
313 	}
314 
315 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
316 			vq->vq_free_cnt, vq->vq_desc_head_idx);
317 
318 	result = cvq->virtio_net_hdr_mz->addr;
319 	return result;
320 }
321 
322 static int
323 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
324 		    int *dlen, int pkt_num)
325 {
326 	virtio_net_ctrl_ack status = ~0;
327 	struct virtio_pmd_ctrl *result;
328 	struct virtqueue *vq;
329 
330 	ctrl->status = status;
331 
332 	if (!cvq) {
333 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
334 		return -1;
335 	}
336 
337 	rte_spinlock_lock(&cvq->lock);
338 	vq = virtnet_cq_to_vq(cvq);
339 
340 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
341 		"vq->hw->cvq = %p vq = %p",
342 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
343 
344 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
345 		rte_spinlock_unlock(&cvq->lock);
346 		return -1;
347 	}
348 
349 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
350 		sizeof(struct virtio_pmd_ctrl));
351 
352 	if (virtio_with_packed_queue(vq->hw))
353 		result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
354 	else
355 		result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
356 
357 	rte_spinlock_unlock(&cvq->lock);
358 	return result->status;
359 }
360 
361 static int
362 virtio_set_multiple_queues_rss(struct rte_eth_dev *dev, uint16_t nb_queues)
363 {
364 	struct virtio_hw *hw = dev->data->dev_private;
365 	struct virtio_pmd_ctrl ctrl;
366 	struct virtio_net_ctrl_rss rss;
367 	int dlen, ret;
368 
369 	rss.hash_types = hw->rss_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
370 	RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(VIRTIO_NET_RSS_RETA_SIZE));
371 	rss.indirection_table_mask = VIRTIO_NET_RSS_RETA_SIZE - 1;
372 	rss.unclassified_queue = 0;
373 	memcpy(rss.indirection_table, hw->rss_reta, VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t));
374 	rss.max_tx_vq = nb_queues;
375 	rss.hash_key_length = VIRTIO_NET_RSS_KEY_SIZE;
376 	memcpy(rss.hash_key_data, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
377 
378 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
379 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_RSS_CONFIG;
380 	memcpy(ctrl.data, &rss, sizeof(rss));
381 
382 	dlen = sizeof(rss);
383 
384 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
385 	if (ret) {
386 		PMD_INIT_LOG(ERR, "RSS multiqueue configured but send command failed");
387 		return -EINVAL;
388 	}
389 
390 	return 0;
391 }
392 
393 static int
394 virtio_set_multiple_queues_auto(struct rte_eth_dev *dev, uint16_t nb_queues)
395 {
396 	struct virtio_hw *hw = dev->data->dev_private;
397 	struct virtio_pmd_ctrl ctrl;
398 	int dlen;
399 	int ret;
400 
401 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
402 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
403 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
404 
405 	dlen = sizeof(uint16_t);
406 
407 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
408 	if (ret) {
409 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
410 			  "failed, this is too late now...");
411 		return -EINVAL;
412 	}
413 
414 	return 0;
415 }
416 
417 static int
418 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
419 {
420 	struct virtio_hw *hw = dev->data->dev_private;
421 
422 	if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
423 		return virtio_set_multiple_queues_rss(dev, nb_queues);
424 	else
425 		return virtio_set_multiple_queues_auto(dev, nb_queues);
426 }
427 
428 static uint16_t
429 virtio_get_nr_vq(struct virtio_hw *hw)
430 {
431 	uint16_t nr_vq = hw->max_queue_pairs * 2;
432 
433 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
434 		nr_vq += 1;
435 
436 	return nr_vq;
437 }
438 
439 static void
440 virtio_init_vring(struct virtqueue *vq)
441 {
442 	int size = vq->vq_nentries;
443 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
444 
445 	PMD_INIT_FUNC_TRACE();
446 
447 	memset(ring_mem, 0, vq->vq_ring_size);
448 
449 	vq->vq_used_cons_idx = 0;
450 	vq->vq_desc_head_idx = 0;
451 	vq->vq_avail_idx = 0;
452 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
453 	vq->vq_free_cnt = vq->vq_nentries;
454 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
455 	if (virtio_with_packed_queue(vq->hw)) {
456 		vring_init_packed(&vq->vq_packed.ring, ring_mem,
457 				  VIRTIO_VRING_ALIGN, size);
458 		vring_desc_init_packed(vq, size);
459 	} else {
460 		struct vring *vr = &vq->vq_split.ring;
461 
462 		vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
463 		vring_desc_init_split(vr->desc, size);
464 	}
465 	/*
466 	 * Disable device(host) interrupting guest
467 	 */
468 	virtqueue_disable_intr(vq);
469 }
470 
471 static int
472 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
473 {
474 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
475 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
476 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
477 	unsigned int vq_size, size;
478 	struct virtio_hw *hw = dev->data->dev_private;
479 	struct virtnet_rx *rxvq = NULL;
480 	struct virtnet_tx *txvq = NULL;
481 	struct virtnet_ctl *cvq = NULL;
482 	struct virtqueue *vq;
483 	size_t sz_hdr_mz = 0;
484 	void *sw_ring = NULL;
485 	int queue_type = virtio_get_queue_type(hw, queue_idx);
486 	int ret;
487 	int numa_node = dev->device->numa_node;
488 	struct rte_mbuf *fake_mbuf = NULL;
489 
490 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
491 			queue_idx, numa_node);
492 
493 	/*
494 	 * Read the virtqueue size from the Queue Size field
495 	 * Always power of 2 and if 0 virtqueue does not exist
496 	 */
497 	vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
498 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
499 	if (vq_size == 0) {
500 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
501 		return -EINVAL;
502 	}
503 
504 	if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
505 		PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
506 		return -EINVAL;
507 	}
508 
509 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
510 		 dev->data->port_id, queue_idx);
511 
512 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
513 				vq_size * sizeof(struct vq_desc_extra),
514 				RTE_CACHE_LINE_SIZE);
515 	if (queue_type == VTNET_TQ) {
516 		/*
517 		 * For each xmit packet, allocate a virtio_net_hdr
518 		 * and indirect ring elements
519 		 */
520 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
521 	} else if (queue_type == VTNET_CQ) {
522 		/* Allocate a page for control vq command, data and status */
523 		sz_hdr_mz = rte_mem_page_size();
524 	}
525 
526 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
527 				numa_node);
528 	if (vq == NULL) {
529 		PMD_INIT_LOG(ERR, "can not allocate vq");
530 		return -ENOMEM;
531 	}
532 	hw->vqs[queue_idx] = vq;
533 
534 	vq->hw = hw;
535 	vq->vq_queue_index = queue_idx;
536 	vq->vq_nentries = vq_size;
537 	if (virtio_with_packed_queue(hw)) {
538 		vq->vq_packed.used_wrap_counter = 1;
539 		vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
540 		vq->vq_packed.event_flags_shadow = 0;
541 		if (queue_type == VTNET_RQ)
542 			vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
543 	}
544 
545 	/*
546 	 * Reserve a memzone for vring elements
547 	 */
548 	size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
549 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
550 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
551 		     size, vq->vq_ring_size);
552 
553 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
554 			numa_node, RTE_MEMZONE_IOVA_CONTIG,
555 			VIRTIO_VRING_ALIGN);
556 	if (mz == NULL) {
557 		if (rte_errno == EEXIST)
558 			mz = rte_memzone_lookup(vq_name);
559 		if (mz == NULL) {
560 			ret = -ENOMEM;
561 			goto free_vq;
562 		}
563 	}
564 
565 	memset(mz->addr, 0, mz->len);
566 
567 	if (hw->use_va)
568 		vq->vq_ring_mem = (uintptr_t)mz->addr;
569 	else
570 		vq->vq_ring_mem = mz->iova;
571 
572 	vq->vq_ring_virt_mem = mz->addr;
573 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
574 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
575 
576 	virtio_init_vring(vq);
577 
578 	if (sz_hdr_mz) {
579 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
580 			 dev->data->port_id, queue_idx);
581 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
582 				numa_node, RTE_MEMZONE_IOVA_CONTIG,
583 				RTE_CACHE_LINE_SIZE);
584 		if (hdr_mz == NULL) {
585 			if (rte_errno == EEXIST)
586 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
587 			if (hdr_mz == NULL) {
588 				ret = -ENOMEM;
589 				goto free_mz;
590 			}
591 		}
592 	}
593 
594 	if (queue_type == VTNET_RQ) {
595 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
596 			       sizeof(vq->sw_ring[0]);
597 
598 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
599 				RTE_CACHE_LINE_SIZE, numa_node);
600 		if (!sw_ring) {
601 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
602 			ret = -ENOMEM;
603 			goto free_hdr_mz;
604 		}
605 
606 		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
607 				RTE_CACHE_LINE_SIZE, numa_node);
608 		if (!fake_mbuf) {
609 			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
610 			ret = -ENOMEM;
611 			goto free_sw_ring;
612 		}
613 
614 		vq->sw_ring = sw_ring;
615 		rxvq = &vq->rxq;
616 		rxvq->port_id = dev->data->port_id;
617 		rxvq->mz = mz;
618 		rxvq->fake_mbuf = fake_mbuf;
619 	} else if (queue_type == VTNET_TQ) {
620 		txvq = &vq->txq;
621 		txvq->port_id = dev->data->port_id;
622 		txvq->mz = mz;
623 		txvq->virtio_net_hdr_mz = hdr_mz;
624 		if (hw->use_va)
625 			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
626 		else
627 			txvq->virtio_net_hdr_mem = hdr_mz->iova;
628 	} else if (queue_type == VTNET_CQ) {
629 		cvq = &vq->cq;
630 		cvq->mz = mz;
631 		cvq->virtio_net_hdr_mz = hdr_mz;
632 		if (hw->use_va)
633 			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
634 		else
635 			cvq->virtio_net_hdr_mem = hdr_mz->iova;
636 		memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
637 
638 		hw->cvq = cvq;
639 	}
640 
641 	if (hw->use_va)
642 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
643 	else
644 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
645 
646 	if (queue_type == VTNET_TQ) {
647 		struct virtio_tx_region *txr;
648 		unsigned int i;
649 
650 		txr = hdr_mz->addr;
651 		memset(txr, 0, vq_size * sizeof(*txr));
652 		for (i = 0; i < vq_size; i++) {
653 			/* first indirect descriptor is always the tx header */
654 			if (!virtio_with_packed_queue(hw)) {
655 				struct vring_desc *start_dp = txr[i].tx_indir;
656 				vring_desc_init_split(start_dp,
657 						      RTE_DIM(txr[i].tx_indir));
658 				start_dp->addr = txvq->virtio_net_hdr_mem
659 					+ i * sizeof(*txr)
660 					+ offsetof(struct virtio_tx_region,
661 						   tx_hdr);
662 				start_dp->len = hw->vtnet_hdr_size;
663 				start_dp->flags = VRING_DESC_F_NEXT;
664 			} else {
665 				struct vring_packed_desc *start_dp =
666 					txr[i].tx_packed_indir;
667 				vring_desc_init_indirect_packed(start_dp,
668 				      RTE_DIM(txr[i].tx_packed_indir));
669 				start_dp->addr = txvq->virtio_net_hdr_mem
670 					+ i * sizeof(*txr)
671 					+ offsetof(struct virtio_tx_region,
672 						   tx_hdr);
673 				start_dp->len = hw->vtnet_hdr_size;
674 			}
675 		}
676 	}
677 
678 	if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
679 		PMD_INIT_LOG(ERR, "setup_queue failed");
680 		ret = -EINVAL;
681 		goto clean_vq;
682 	}
683 
684 	return 0;
685 
686 clean_vq:
687 	hw->cvq = NULL;
688 	rte_free(fake_mbuf);
689 free_sw_ring:
690 	rte_free(sw_ring);
691 free_hdr_mz:
692 	rte_memzone_free(hdr_mz);
693 free_mz:
694 	rte_memzone_free(mz);
695 free_vq:
696 	rte_free(vq);
697 	hw->vqs[queue_idx] = NULL;
698 
699 	return ret;
700 }
701 
702 static void
703 virtio_free_queues(struct virtio_hw *hw)
704 {
705 	uint16_t nr_vq = virtio_get_nr_vq(hw);
706 	struct virtqueue *vq;
707 	int queue_type;
708 	uint16_t i;
709 
710 	if (hw->vqs == NULL)
711 		return;
712 
713 	for (i = 0; i < nr_vq; i++) {
714 		vq = hw->vqs[i];
715 		if (!vq)
716 			continue;
717 
718 		queue_type = virtio_get_queue_type(hw, i);
719 		if (queue_type == VTNET_RQ) {
720 			rte_free(vq->rxq.fake_mbuf);
721 			rte_free(vq->sw_ring);
722 			rte_memzone_free(vq->rxq.mz);
723 		} else if (queue_type == VTNET_TQ) {
724 			rte_memzone_free(vq->txq.mz);
725 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
726 		} else {
727 			rte_memzone_free(vq->cq.mz);
728 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
729 		}
730 
731 		rte_free(vq);
732 		hw->vqs[i] = NULL;
733 	}
734 
735 	rte_free(hw->vqs);
736 	hw->vqs = NULL;
737 }
738 
739 static int
740 virtio_alloc_queues(struct rte_eth_dev *dev)
741 {
742 	struct virtio_hw *hw = dev->data->dev_private;
743 	uint16_t nr_vq = virtio_get_nr_vq(hw);
744 	uint16_t i;
745 	int ret;
746 
747 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
748 	if (!hw->vqs) {
749 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
750 		return -ENOMEM;
751 	}
752 
753 	for (i = 0; i < nr_vq; i++) {
754 		ret = virtio_init_queue(dev, i);
755 		if (ret < 0) {
756 			virtio_free_queues(hw);
757 			return ret;
758 		}
759 	}
760 
761 	return 0;
762 }
763 
764 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
765 
766 static void
767 virtio_free_rss(struct virtio_hw *hw)
768 {
769 	rte_free(hw->rss_key);
770 	hw->rss_key = NULL;
771 
772 	rte_free(hw->rss_reta);
773 	hw->rss_reta = NULL;
774 }
775 
776 int
777 virtio_dev_close(struct rte_eth_dev *dev)
778 {
779 	struct virtio_hw *hw = dev->data->dev_private;
780 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
781 
782 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
783 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
784 		return 0;
785 
786 	if (!hw->opened)
787 		return 0;
788 	hw->opened = 0;
789 
790 	/* reset the NIC */
791 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
792 		VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
793 	if (intr_conf->rxq)
794 		virtio_queues_unbind_intr(dev);
795 
796 	if (intr_conf->lsc || intr_conf->rxq) {
797 		virtio_intr_disable(dev);
798 		rte_intr_efd_disable(dev->intr_handle);
799 		rte_intr_vec_list_free(dev->intr_handle);
800 	}
801 
802 	virtio_reset(hw);
803 	virtio_dev_free_mbufs(dev);
804 	virtio_free_queues(hw);
805 	virtio_free_rss(hw);
806 
807 	return VIRTIO_OPS(hw)->dev_close(hw);
808 }
809 
810 static int
811 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
812 {
813 	struct virtio_hw *hw = dev->data->dev_private;
814 	struct virtio_pmd_ctrl ctrl;
815 	int dlen[1];
816 	int ret;
817 
818 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
819 		PMD_INIT_LOG(INFO, "host does not support rx control");
820 		return -ENOTSUP;
821 	}
822 
823 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
824 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
825 	ctrl.data[0] = 1;
826 	dlen[0] = 1;
827 
828 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
829 	if (ret) {
830 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
831 		return -EAGAIN;
832 	}
833 
834 	return 0;
835 }
836 
837 static int
838 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
839 {
840 	struct virtio_hw *hw = dev->data->dev_private;
841 	struct virtio_pmd_ctrl ctrl;
842 	int dlen[1];
843 	int ret;
844 
845 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
846 		PMD_INIT_LOG(INFO, "host does not support rx control");
847 		return -ENOTSUP;
848 	}
849 
850 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
851 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
852 	ctrl.data[0] = 0;
853 	dlen[0] = 1;
854 
855 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
856 	if (ret) {
857 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
858 		return -EAGAIN;
859 	}
860 
861 	return 0;
862 }
863 
864 static int
865 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
866 {
867 	struct virtio_hw *hw = dev->data->dev_private;
868 	struct virtio_pmd_ctrl ctrl;
869 	int dlen[1];
870 	int ret;
871 
872 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
873 		PMD_INIT_LOG(INFO, "host does not support rx control");
874 		return -ENOTSUP;
875 	}
876 
877 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
878 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
879 	ctrl.data[0] = 1;
880 	dlen[0] = 1;
881 
882 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
883 	if (ret) {
884 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
885 		return -EAGAIN;
886 	}
887 
888 	return 0;
889 }
890 
891 static int
892 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
893 {
894 	struct virtio_hw *hw = dev->data->dev_private;
895 	struct virtio_pmd_ctrl ctrl;
896 	int dlen[1];
897 	int ret;
898 
899 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
900 		PMD_INIT_LOG(INFO, "host does not support rx control");
901 		return -ENOTSUP;
902 	}
903 
904 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
905 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
906 	ctrl.data[0] = 0;
907 	dlen[0] = 1;
908 
909 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
910 	if (ret) {
911 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
912 		return -EAGAIN;
913 	}
914 
915 	return 0;
916 }
917 
918 uint16_t
919 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
920 {
921 	return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
922 }
923 
924 bool
925 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
926 			bool rx_scatter_enabled, const char **error)
927 {
928 	if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
929 		*error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
930 		return false;
931 	}
932 
933 	return true;
934 }
935 
936 static bool
937 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
938 				      uint16_t frame_size)
939 {
940 	struct virtio_hw *hw = dev->data->dev_private;
941 	struct virtnet_rx *rxvq;
942 	struct virtqueue *vq;
943 	unsigned int qidx;
944 	uint16_t buf_size;
945 	const char *error;
946 
947 	if (hw->vqs == NULL)
948 		return true;
949 
950 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
951 		vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX];
952 		if (vq == NULL)
953 			continue;
954 
955 		rxvq = &vq->rxq;
956 		if (rxvq->mpool == NULL)
957 			continue;
958 		buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
959 
960 		if (!virtio_rx_check_scatter(frame_size, buf_size,
961 					     hw->rx_ol_scatter, &error)) {
962 			PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
963 				     qidx, error);
964 			return false;
965 		}
966 	}
967 
968 	return true;
969 }
970 
971 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
972 static int
973 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
974 {
975 	struct virtio_hw *hw = dev->data->dev_private;
976 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
977 				 hw->vtnet_hdr_size;
978 	uint32_t frame_size = mtu + ether_hdr_len;
979 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
980 
981 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
982 
983 	if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
984 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
985 			RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
986 		return -EINVAL;
987 	}
988 
989 	if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
990 		PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
991 		return -EINVAL;
992 	}
993 
994 	hw->max_rx_pkt_len = frame_size;
995 
996 	return 0;
997 }
998 
999 static int
1000 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1001 {
1002 	struct virtio_hw *hw = dev->data->dev_private;
1003 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1004 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1005 
1006 	virtqueue_enable_intr(vq);
1007 	virtio_mb(hw->weak_barriers);
1008 	return 0;
1009 }
1010 
1011 static int
1012 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1013 {
1014 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1015 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1016 
1017 	virtqueue_disable_intr(vq);
1018 	return 0;
1019 }
1020 
1021 /*
1022  * dev_ops for virtio, bare necessities for basic operation
1023  */
1024 static const struct eth_dev_ops virtio_eth_dev_ops = {
1025 	.dev_configure           = virtio_dev_configure,
1026 	.dev_start               = virtio_dev_start,
1027 	.dev_stop                = virtio_dev_stop,
1028 	.dev_close               = virtio_dev_close,
1029 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
1030 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
1031 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
1032 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
1033 	.mtu_set                 = virtio_mtu_set,
1034 	.dev_infos_get           = virtio_dev_info_get,
1035 	.stats_get               = virtio_dev_stats_get,
1036 	.xstats_get              = virtio_dev_xstats_get,
1037 	.xstats_get_names        = virtio_dev_xstats_get_names,
1038 	.stats_reset             = virtio_dev_stats_reset,
1039 	.xstats_reset            = virtio_dev_stats_reset,
1040 	.link_update             = virtio_dev_link_update,
1041 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
1042 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
1043 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
1044 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
1045 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
1046 	.rss_hash_update         = virtio_dev_rss_hash_update,
1047 	.rss_hash_conf_get       = virtio_dev_rss_hash_conf_get,
1048 	.reta_update             = virtio_dev_rss_reta_update,
1049 	.reta_query              = virtio_dev_rss_reta_query,
1050 	/* collect stats per queue */
1051 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1052 	.vlan_filter_set         = virtio_vlan_filter_set,
1053 	.mac_addr_add            = virtio_mac_addr_add,
1054 	.mac_addr_remove         = virtio_mac_addr_remove,
1055 	.mac_addr_set            = virtio_mac_addr_set,
1056 	.get_monitor_addr        = virtio_get_monitor_addr,
1057 };
1058 
1059 /*
1060  * dev_ops for virtio-user in secondary processes, as we just have
1061  * some limited supports currently.
1062  */
1063 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1064 	.dev_infos_get           = virtio_dev_info_get,
1065 	.stats_get               = virtio_dev_stats_get,
1066 	.xstats_get              = virtio_dev_xstats_get,
1067 	.xstats_get_names        = virtio_dev_xstats_get_names,
1068 	.stats_reset             = virtio_dev_stats_reset,
1069 	.xstats_reset            = virtio_dev_stats_reset,
1070 	/* collect stats per queue */
1071 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1072 };
1073 
1074 static void
1075 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1076 {
1077 	unsigned i;
1078 
1079 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1080 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1081 		if (txvq == NULL)
1082 			continue;
1083 
1084 		stats->opackets += txvq->stats.packets;
1085 		stats->obytes += txvq->stats.bytes;
1086 
1087 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1088 			stats->q_opackets[i] = txvq->stats.packets;
1089 			stats->q_obytes[i] = txvq->stats.bytes;
1090 		}
1091 	}
1092 
1093 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1094 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1095 		if (rxvq == NULL)
1096 			continue;
1097 
1098 		stats->ipackets += rxvq->stats.packets;
1099 		stats->ibytes += rxvq->stats.bytes;
1100 		stats->ierrors += rxvq->stats.errors;
1101 
1102 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1103 			stats->q_ipackets[i] = rxvq->stats.packets;
1104 			stats->q_ibytes[i] = rxvq->stats.bytes;
1105 		}
1106 	}
1107 
1108 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1109 }
1110 
1111 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1112 				       struct rte_eth_xstat_name *xstats_names,
1113 				       __rte_unused unsigned limit)
1114 {
1115 	unsigned i;
1116 	unsigned count = 0;
1117 	unsigned t;
1118 
1119 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1120 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1121 
1122 	if (xstats_names != NULL) {
1123 		/* Note: limit checked in rte_eth_xstats_names() */
1124 
1125 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1126 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1127 			if (rxvq == NULL)
1128 				continue;
1129 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1130 				snprintf(xstats_names[count].name,
1131 					sizeof(xstats_names[count].name),
1132 					"rx_q%u_%s", i,
1133 					rte_virtio_rxq_stat_strings[t].name);
1134 				count++;
1135 			}
1136 		}
1137 
1138 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1139 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
1140 			if (txvq == NULL)
1141 				continue;
1142 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1143 				snprintf(xstats_names[count].name,
1144 					sizeof(xstats_names[count].name),
1145 					"tx_q%u_%s", i,
1146 					rte_virtio_txq_stat_strings[t].name);
1147 				count++;
1148 			}
1149 		}
1150 		return count;
1151 	}
1152 	return nstats;
1153 }
1154 
1155 static int
1156 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1157 		      unsigned n)
1158 {
1159 	unsigned i;
1160 	unsigned count = 0;
1161 
1162 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1163 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1164 
1165 	if (n < nstats)
1166 		return nstats;
1167 
1168 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1169 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1170 
1171 		if (rxvq == NULL)
1172 			continue;
1173 
1174 		unsigned t;
1175 
1176 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1177 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1178 				rte_virtio_rxq_stat_strings[t].offset);
1179 			xstats[count].id = count;
1180 			count++;
1181 		}
1182 	}
1183 
1184 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1185 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1186 
1187 		if (txvq == NULL)
1188 			continue;
1189 
1190 		unsigned t;
1191 
1192 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1193 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
1194 				rte_virtio_txq_stat_strings[t].offset);
1195 			xstats[count].id = count;
1196 			count++;
1197 		}
1198 	}
1199 
1200 	return count;
1201 }
1202 
1203 static int
1204 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1205 {
1206 	virtio_update_stats(dev, stats);
1207 
1208 	return 0;
1209 }
1210 
1211 static int
1212 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1213 {
1214 	unsigned int i;
1215 
1216 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1217 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1218 		if (txvq == NULL)
1219 			continue;
1220 
1221 		txvq->stats.packets = 0;
1222 		txvq->stats.bytes = 0;
1223 		txvq->stats.multicast = 0;
1224 		txvq->stats.broadcast = 0;
1225 		memset(txvq->stats.size_bins, 0,
1226 		       sizeof(txvq->stats.size_bins[0]) * 8);
1227 	}
1228 
1229 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1230 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1231 		if (rxvq == NULL)
1232 			continue;
1233 
1234 		rxvq->stats.packets = 0;
1235 		rxvq->stats.bytes = 0;
1236 		rxvq->stats.errors = 0;
1237 		rxvq->stats.multicast = 0;
1238 		rxvq->stats.broadcast = 0;
1239 		memset(rxvq->stats.size_bins, 0,
1240 		       sizeof(rxvq->stats.size_bins[0]) * 8);
1241 	}
1242 
1243 	return 0;
1244 }
1245 
1246 static void
1247 virtio_set_hwaddr(struct virtio_hw *hw)
1248 {
1249 	virtio_write_dev_config(hw,
1250 			offsetof(struct virtio_net_config, mac),
1251 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1252 }
1253 
1254 static void
1255 virtio_get_hwaddr(struct virtio_hw *hw)
1256 {
1257 	if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1258 		virtio_read_dev_config(hw,
1259 			offsetof(struct virtio_net_config, mac),
1260 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1261 	} else {
1262 		rte_eth_random_addr(&hw->mac_addr[0]);
1263 		virtio_set_hwaddr(hw);
1264 	}
1265 }
1266 
1267 static int
1268 virtio_mac_table_set(struct virtio_hw *hw,
1269 		     const struct virtio_net_ctrl_mac *uc,
1270 		     const struct virtio_net_ctrl_mac *mc)
1271 {
1272 	struct virtio_pmd_ctrl ctrl;
1273 	int err, len[2];
1274 
1275 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1276 		PMD_DRV_LOG(INFO, "host does not support mac table");
1277 		return -1;
1278 	}
1279 
1280 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1281 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1282 
1283 	len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1284 	memcpy(ctrl.data, uc, len[0]);
1285 
1286 	len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1287 	memcpy(ctrl.data + len[0], mc, len[1]);
1288 
1289 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1290 	if (err != 0)
1291 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1292 	return err;
1293 }
1294 
1295 static int
1296 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1297 		    uint32_t index, uint32_t vmdq __rte_unused)
1298 {
1299 	struct virtio_hw *hw = dev->data->dev_private;
1300 	const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1301 	unsigned int i;
1302 	struct virtio_net_ctrl_mac *uc, *mc;
1303 
1304 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1305 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1306 		return -EINVAL;
1307 	}
1308 
1309 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1310 		sizeof(uc->entries));
1311 	uc->entries = 0;
1312 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1313 		sizeof(mc->entries));
1314 	mc->entries = 0;
1315 
1316 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1317 		const struct rte_ether_addr *addr
1318 			= (i == index) ? mac_addr : addrs + i;
1319 		struct virtio_net_ctrl_mac *tbl
1320 			= rte_is_multicast_ether_addr(addr) ? mc : uc;
1321 
1322 		memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1323 	}
1324 
1325 	return virtio_mac_table_set(hw, uc, mc);
1326 }
1327 
1328 static void
1329 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1330 {
1331 	struct virtio_hw *hw = dev->data->dev_private;
1332 	struct rte_ether_addr *addrs = dev->data->mac_addrs;
1333 	struct virtio_net_ctrl_mac *uc, *mc;
1334 	unsigned int i;
1335 
1336 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1337 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1338 		return;
1339 	}
1340 
1341 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1342 		sizeof(uc->entries));
1343 	uc->entries = 0;
1344 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1345 		sizeof(mc->entries));
1346 	mc->entries = 0;
1347 
1348 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1349 		struct virtio_net_ctrl_mac *tbl;
1350 
1351 		if (i == index || rte_is_zero_ether_addr(addrs + i))
1352 			continue;
1353 
1354 		tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1355 		memcpy(&tbl->macs[tbl->entries++], addrs + i,
1356 			RTE_ETHER_ADDR_LEN);
1357 	}
1358 
1359 	virtio_mac_table_set(hw, uc, mc);
1360 }
1361 
1362 static int
1363 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1364 {
1365 	struct virtio_hw *hw = dev->data->dev_private;
1366 
1367 	memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1368 
1369 	/* Use atomic update if available */
1370 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1371 		struct virtio_pmd_ctrl ctrl;
1372 		int len = RTE_ETHER_ADDR_LEN;
1373 
1374 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1375 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1376 
1377 		memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1378 		return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1379 	}
1380 
1381 	if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1382 		return -ENOTSUP;
1383 
1384 	virtio_set_hwaddr(hw);
1385 	return 0;
1386 }
1387 
1388 #define CLB_VAL_IDX 0
1389 #define CLB_MSK_IDX 1
1390 #define CLB_MATCH_IDX 2
1391 static int
1392 virtio_monitor_callback(const uint64_t value,
1393 		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1394 {
1395 	const uint64_t m = opaque[CLB_MSK_IDX];
1396 	const uint64_t v = opaque[CLB_VAL_IDX];
1397 	const uint64_t c = opaque[CLB_MATCH_IDX];
1398 
1399 	if (c)
1400 		return (value & m) == v ? -1 : 0;
1401 	else
1402 		return (value & m) == v ? 0 : -1;
1403 }
1404 
1405 static int
1406 virtio_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1407 {
1408 	struct virtnet_rx *rxvq = rx_queue;
1409 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1410 	struct virtio_hw *hw;
1411 
1412 	if (vq == NULL)
1413 		return -EINVAL;
1414 
1415 	hw = vq->hw;
1416 	if (virtio_with_packed_queue(hw)) {
1417 		struct vring_packed_desc *desc;
1418 		desc = vq->vq_packed.ring.desc;
1419 		pmc->addr = &desc[vq->vq_used_cons_idx].flags;
1420 		if (vq->vq_packed.used_wrap_counter)
1421 			pmc->opaque[CLB_VAL_IDX] =
1422 						VRING_PACKED_DESC_F_AVAIL_USED;
1423 		else
1424 			pmc->opaque[CLB_VAL_IDX] = 0;
1425 		pmc->opaque[CLB_MSK_IDX] = VRING_PACKED_DESC_F_AVAIL_USED;
1426 		pmc->opaque[CLB_MATCH_IDX] = 1;
1427 		pmc->size = sizeof(desc[vq->vq_used_cons_idx].flags);
1428 	} else {
1429 		pmc->addr = &vq->vq_split.ring.used->idx;
1430 		pmc->opaque[CLB_VAL_IDX] = vq->vq_used_cons_idx
1431 					& (vq->vq_nentries - 1);
1432 		pmc->opaque[CLB_MSK_IDX] = vq->vq_nentries - 1;
1433 		pmc->opaque[CLB_MATCH_IDX] = 0;
1434 		pmc->size = sizeof(vq->vq_split.ring.used->idx);
1435 	}
1436 	pmc->fn = virtio_monitor_callback;
1437 
1438 	return 0;
1439 }
1440 
1441 static int
1442 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1443 {
1444 	struct virtio_hw *hw = dev->data->dev_private;
1445 	struct virtio_pmd_ctrl ctrl;
1446 	int len;
1447 
1448 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1449 		return -ENOTSUP;
1450 
1451 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1452 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1453 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1454 	len = sizeof(vlan_id);
1455 
1456 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1457 }
1458 
1459 static int
1460 virtio_intr_unmask(struct rte_eth_dev *dev)
1461 {
1462 	struct virtio_hw *hw = dev->data->dev_private;
1463 
1464 	if (rte_intr_ack(dev->intr_handle) < 0)
1465 		return -1;
1466 
1467 	if (VIRTIO_OPS(hw)->intr_detect)
1468 		VIRTIO_OPS(hw)->intr_detect(hw);
1469 
1470 	return 0;
1471 }
1472 
1473 static int
1474 virtio_intr_enable(struct rte_eth_dev *dev)
1475 {
1476 	struct virtio_hw *hw = dev->data->dev_private;
1477 
1478 	if (rte_intr_enable(dev->intr_handle) < 0)
1479 		return -1;
1480 
1481 	if (VIRTIO_OPS(hw)->intr_detect)
1482 		VIRTIO_OPS(hw)->intr_detect(hw);
1483 
1484 	return 0;
1485 }
1486 
1487 static int
1488 virtio_intr_disable(struct rte_eth_dev *dev)
1489 {
1490 	struct virtio_hw *hw = dev->data->dev_private;
1491 
1492 	if (rte_intr_disable(dev->intr_handle) < 0)
1493 		return -1;
1494 
1495 	if (VIRTIO_OPS(hw)->intr_detect)
1496 		VIRTIO_OPS(hw)->intr_detect(hw);
1497 
1498 	return 0;
1499 }
1500 
1501 static int
1502 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1503 {
1504 	uint64_t host_features;
1505 
1506 	/* Prepare guest_features: feature that driver wants to support */
1507 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1508 		req_features);
1509 
1510 	/* Read device(host) feature bits */
1511 	host_features = VIRTIO_OPS(hw)->get_features(hw);
1512 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1513 		host_features);
1514 
1515 	/* If supported, ensure MTU value is valid before acknowledging it. */
1516 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1517 		struct virtio_net_config config;
1518 
1519 		virtio_read_dev_config(hw,
1520 			offsetof(struct virtio_net_config, mtu),
1521 			&config.mtu, sizeof(config.mtu));
1522 
1523 		if (config.mtu < RTE_ETHER_MIN_MTU)
1524 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1525 	}
1526 
1527 	/*
1528 	 * Negotiate features: Subset of device feature bits are written back
1529 	 * guest feature bits.
1530 	 */
1531 	hw->guest_features = req_features;
1532 	hw->guest_features = virtio_negotiate_features(hw, host_features);
1533 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1534 		hw->guest_features);
1535 
1536 	if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1537 		return -1;
1538 
1539 	if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1540 		virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1541 
1542 		if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1543 			PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1544 			return -1;
1545 		}
1546 	}
1547 
1548 	hw->req_guest_features = req_features;
1549 
1550 	return 0;
1551 }
1552 
1553 int
1554 virtio_dev_pause(struct rte_eth_dev *dev)
1555 {
1556 	struct virtio_hw *hw = dev->data->dev_private;
1557 
1558 	rte_spinlock_lock(&hw->state_lock);
1559 
1560 	if (hw->started == 0) {
1561 		/* Device is just stopped. */
1562 		rte_spinlock_unlock(&hw->state_lock);
1563 		return -1;
1564 	}
1565 	hw->started = 0;
1566 	/*
1567 	 * Prevent the worker threads from touching queues to avoid contention,
1568 	 * 1 ms should be enough for the ongoing Tx function to finish.
1569 	 */
1570 	rte_delay_ms(1);
1571 	return 0;
1572 }
1573 
1574 /*
1575  * Recover hw state to let the worker threads continue.
1576  */
1577 void
1578 virtio_dev_resume(struct rte_eth_dev *dev)
1579 {
1580 	struct virtio_hw *hw = dev->data->dev_private;
1581 
1582 	hw->started = 1;
1583 	rte_spinlock_unlock(&hw->state_lock);
1584 }
1585 
1586 /*
1587  * Should be called only after device is paused.
1588  */
1589 int
1590 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1591 		int nb_pkts)
1592 {
1593 	struct virtio_hw *hw = dev->data->dev_private;
1594 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1595 	int ret;
1596 
1597 	hw->inject_pkts = tx_pkts;
1598 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1599 	hw->inject_pkts = NULL;
1600 
1601 	return ret;
1602 }
1603 
1604 static void
1605 virtio_notify_peers(struct rte_eth_dev *dev)
1606 {
1607 	struct virtio_hw *hw = dev->data->dev_private;
1608 	struct virtnet_rx *rxvq;
1609 	struct rte_mbuf *rarp_mbuf;
1610 
1611 	if (!dev->data->rx_queues)
1612 		return;
1613 
1614 	rxvq = dev->data->rx_queues[0];
1615 	if (!rxvq)
1616 		return;
1617 
1618 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1619 			(struct rte_ether_addr *)hw->mac_addr);
1620 	if (rarp_mbuf == NULL) {
1621 		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1622 		return;
1623 	}
1624 
1625 	/* If virtio port just stopped, no need to send RARP */
1626 	if (virtio_dev_pause(dev) < 0) {
1627 		rte_pktmbuf_free(rarp_mbuf);
1628 		return;
1629 	}
1630 
1631 	virtio_inject_pkts(dev, &rarp_mbuf, 1);
1632 	virtio_dev_resume(dev);
1633 }
1634 
1635 static void
1636 virtio_ack_link_announce(struct rte_eth_dev *dev)
1637 {
1638 	struct virtio_hw *hw = dev->data->dev_private;
1639 	struct virtio_pmd_ctrl ctrl;
1640 
1641 	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1642 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1643 
1644 	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1645 }
1646 
1647 /*
1648  * Process virtio config changed interrupt. Call the callback
1649  * if link state changed, generate gratuitous RARP packet if
1650  * the status indicates an ANNOUNCE.
1651  */
1652 void
1653 virtio_interrupt_handler(void *param)
1654 {
1655 	struct rte_eth_dev *dev = param;
1656 	struct virtio_hw *hw = dev->data->dev_private;
1657 	uint8_t isr;
1658 	uint16_t status;
1659 
1660 	/* Read interrupt status which clears interrupt */
1661 	isr = virtio_get_isr(hw);
1662 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1663 
1664 	if (virtio_intr_unmask(dev) < 0)
1665 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1666 
1667 	if (isr & VIRTIO_ISR_CONFIG) {
1668 		if (virtio_dev_link_update(dev, 0) == 0)
1669 			rte_eth_dev_callback_process(dev,
1670 						     RTE_ETH_EVENT_INTR_LSC,
1671 						     NULL);
1672 
1673 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1674 			virtio_read_dev_config(hw,
1675 				offsetof(struct virtio_net_config, status),
1676 				&status, sizeof(status));
1677 			if (status & VIRTIO_NET_S_ANNOUNCE) {
1678 				virtio_notify_peers(dev);
1679 				if (hw->cvq)
1680 					virtio_ack_link_announce(dev);
1681 			}
1682 		}
1683 	}
1684 }
1685 
1686 /* set rx and tx handlers according to what is supported */
1687 static void
1688 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1689 {
1690 	struct virtio_hw *hw = eth_dev->data->dev_private;
1691 
1692 	eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1693 	if (virtio_with_packed_queue(hw)) {
1694 		PMD_INIT_LOG(INFO,
1695 			"virtio: using packed ring %s Tx path on port %u",
1696 			hw->use_vec_tx ? "vectorized" : "standard",
1697 			eth_dev->data->port_id);
1698 		if (hw->use_vec_tx)
1699 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1700 		else
1701 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1702 	} else {
1703 		if (hw->use_inorder_tx) {
1704 			PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1705 				eth_dev->data->port_id);
1706 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1707 		} else {
1708 			PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1709 				eth_dev->data->port_id);
1710 			eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1711 		}
1712 	}
1713 
1714 	if (virtio_with_packed_queue(hw)) {
1715 		if (hw->use_vec_rx) {
1716 			PMD_INIT_LOG(INFO,
1717 				"virtio: using packed ring vectorized Rx path on port %u",
1718 				eth_dev->data->port_id);
1719 			eth_dev->rx_pkt_burst =
1720 				&virtio_recv_pkts_packed_vec;
1721 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1722 			PMD_INIT_LOG(INFO,
1723 				"virtio: using packed ring mergeable buffer Rx path on port %u",
1724 				eth_dev->data->port_id);
1725 			eth_dev->rx_pkt_burst =
1726 				&virtio_recv_mergeable_pkts_packed;
1727 		} else {
1728 			PMD_INIT_LOG(INFO,
1729 				"virtio: using packed ring standard Rx path on port %u",
1730 				eth_dev->data->port_id);
1731 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1732 		}
1733 	} else {
1734 		if (hw->use_vec_rx) {
1735 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1736 				eth_dev->data->port_id);
1737 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1738 		} else if (hw->use_inorder_rx) {
1739 			PMD_INIT_LOG(INFO,
1740 				"virtio: using inorder Rx path on port %u",
1741 				eth_dev->data->port_id);
1742 			eth_dev->rx_pkt_burst =	&virtio_recv_pkts_inorder;
1743 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1744 			PMD_INIT_LOG(INFO,
1745 				"virtio: using mergeable buffer Rx path on port %u",
1746 				eth_dev->data->port_id);
1747 			eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1748 		} else {
1749 			PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1750 				eth_dev->data->port_id);
1751 			eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1752 		}
1753 	}
1754 
1755 }
1756 
1757 /* Only support 1:1 queue/interrupt mapping so far.
1758  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1759  * interrupt vectors (<N+1).
1760  */
1761 static int
1762 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1763 {
1764 	uint32_t i;
1765 	struct virtio_hw *hw = dev->data->dev_private;
1766 
1767 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1768 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1769 		if (rte_intr_vec_list_index_set(dev->intr_handle, i,
1770 						       i + 1))
1771 			return -rte_errno;
1772 		if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1773 						 VIRTIO_MSI_NO_VECTOR) {
1774 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1775 			return -EBUSY;
1776 		}
1777 	}
1778 
1779 	return 0;
1780 }
1781 
1782 static void
1783 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1784 {
1785 	uint32_t i;
1786 	struct virtio_hw *hw = dev->data->dev_private;
1787 
1788 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1789 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1790 		VIRTIO_OPS(hw)->set_queue_irq(hw,
1791 					     hw->vqs[i * VTNET_CQ],
1792 					     VIRTIO_MSI_NO_VECTOR);
1793 }
1794 
1795 static int
1796 virtio_configure_intr(struct rte_eth_dev *dev)
1797 {
1798 	struct virtio_hw *hw = dev->data->dev_private;
1799 
1800 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1801 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1802 		return -ENOTSUP;
1803 	}
1804 
1805 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1806 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1807 		return -1;
1808 	}
1809 
1810 	if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
1811 				    hw->max_queue_pairs)) {
1812 		PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1813 			     hw->max_queue_pairs);
1814 		return -ENOMEM;
1815 	}
1816 
1817 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1818 		/* Re-register callback to update max_intr */
1819 		rte_intr_callback_unregister(dev->intr_handle,
1820 					     virtio_interrupt_handler,
1821 					     dev);
1822 		rte_intr_callback_register(dev->intr_handle,
1823 					   virtio_interrupt_handler,
1824 					   dev);
1825 	}
1826 
1827 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1828 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1829 	 * And for legacy devices, this should be done before queue/vec binding
1830 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1831 	 * (22) will be ignored.
1832 	 */
1833 	if (virtio_intr_enable(dev) < 0) {
1834 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1835 		return -1;
1836 	}
1837 
1838 	if (virtio_queues_bind_intr(dev) < 0) {
1839 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1840 		return -1;
1841 	}
1842 
1843 	return 0;
1844 }
1845 
1846 static void
1847 virtio_get_speed_duplex(struct rte_eth_dev *eth_dev,
1848 			struct rte_eth_link *link)
1849 {
1850 	struct virtio_hw *hw = eth_dev->data->dev_private;
1851 	struct virtio_net_config *config;
1852 	struct virtio_net_config local_config;
1853 
1854 	config = &local_config;
1855 	virtio_read_dev_config(hw,
1856 		offsetof(struct virtio_net_config, speed),
1857 		&config->speed, sizeof(config->speed));
1858 	virtio_read_dev_config(hw,
1859 		offsetof(struct virtio_net_config, duplex),
1860 		&config->duplex, sizeof(config->duplex));
1861 	hw->speed = config->speed;
1862 	hw->duplex = config->duplex;
1863 	if (link != NULL) {
1864 		link->link_duplex = hw->duplex;
1865 		link->link_speed  = hw->speed;
1866 	}
1867 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1868 		     hw->speed, hw->duplex);
1869 }
1870 
1871 static uint64_t
1872 ethdev_to_virtio_rss_offloads(uint64_t ethdev_hash_types)
1873 {
1874 	uint64_t virtio_hash_types = 0;
1875 
1876 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1877 				RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1878 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV4;
1879 
1880 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1881 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV4;
1882 
1883 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1884 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV4;
1885 
1886 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1887 				RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1888 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV6;
1889 
1890 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1891 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV6;
1892 
1893 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1894 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV6;
1895 
1896 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_EX)
1897 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IP_EX;
1898 
1899 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_TCP_EX)
1900 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCP_EX;
1901 
1902 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_UDP_EX)
1903 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDP_EX;
1904 
1905 	return virtio_hash_types;
1906 }
1907 
1908 static uint64_t
1909 virtio_to_ethdev_rss_offloads(uint64_t virtio_hash_types)
1910 {
1911 	uint64_t rss_offloads = 0;
1912 
1913 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV4)
1914 		rss_offloads |= RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1915 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER;
1916 
1917 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV4)
1918 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
1919 
1920 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV4)
1921 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
1922 
1923 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV6)
1924 		rss_offloads |= RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1925 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
1926 
1927 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV6)
1928 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
1929 
1930 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV6)
1931 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
1932 
1933 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IP_EX)
1934 		rss_offloads |= RTE_ETH_RSS_IPV6_EX;
1935 
1936 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCP_EX)
1937 		rss_offloads |= RTE_ETH_RSS_IPV6_TCP_EX;
1938 
1939 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDP_EX)
1940 		rss_offloads |= RTE_ETH_RSS_IPV6_UDP_EX;
1941 
1942 	return rss_offloads;
1943 }
1944 
1945 static int
1946 virtio_dev_get_rss_config(struct virtio_hw *hw, uint32_t *rss_hash_types)
1947 {
1948 	struct virtio_net_config local_config;
1949 	struct virtio_net_config *config = &local_config;
1950 
1951 	virtio_read_dev_config(hw,
1952 			offsetof(struct virtio_net_config, rss_max_key_size),
1953 			&config->rss_max_key_size,
1954 			sizeof(config->rss_max_key_size));
1955 	if (config->rss_max_key_size < VIRTIO_NET_RSS_KEY_SIZE) {
1956 		PMD_INIT_LOG(ERR, "Invalid device RSS max key size (%u)",
1957 				config->rss_max_key_size);
1958 		return -EINVAL;
1959 	}
1960 
1961 	virtio_read_dev_config(hw,
1962 			offsetof(struct virtio_net_config,
1963 				rss_max_indirection_table_length),
1964 			&config->rss_max_indirection_table_length,
1965 			sizeof(config->rss_max_indirection_table_length));
1966 	if (config->rss_max_indirection_table_length < VIRTIO_NET_RSS_RETA_SIZE) {
1967 		PMD_INIT_LOG(ERR, "Invalid device RSS max reta size (%u)",
1968 				config->rss_max_indirection_table_length);
1969 		return -EINVAL;
1970 	}
1971 
1972 	virtio_read_dev_config(hw,
1973 			offsetof(struct virtio_net_config, supported_hash_types),
1974 			&config->supported_hash_types,
1975 			sizeof(config->supported_hash_types));
1976 	if ((config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK) == 0) {
1977 		PMD_INIT_LOG(ERR, "Invalid device RSS hash types (0x%x)",
1978 				config->supported_hash_types);
1979 		return -EINVAL;
1980 	}
1981 
1982 	*rss_hash_types = config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
1983 
1984 	PMD_INIT_LOG(DEBUG, "Device RSS config:");
1985 	PMD_INIT_LOG(DEBUG, "\t-Max key size: %u", config->rss_max_key_size);
1986 	PMD_INIT_LOG(DEBUG, "\t-Max reta size: %u", config->rss_max_indirection_table_length);
1987 	PMD_INIT_LOG(DEBUG, "\t-Supported hash types: 0x%x", *rss_hash_types);
1988 
1989 	return 0;
1990 }
1991 
1992 static int
1993 virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
1994 		struct rte_eth_rss_conf *rss_conf)
1995 {
1996 	struct virtio_hw *hw = dev->data->dev_private;
1997 	char old_rss_key[VIRTIO_NET_RSS_KEY_SIZE];
1998 	uint32_t old_hash_types;
1999 	uint16_t nb_queues;
2000 	int ret;
2001 
2002 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2003 		return -ENOTSUP;
2004 
2005 	if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(VIRTIO_NET_HASH_TYPE_MASK))
2006 		return -EINVAL;
2007 
2008 	old_hash_types = hw->rss_hash_types;
2009 	hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2010 
2011 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2012 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2013 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2014 					VIRTIO_NET_RSS_KEY_SIZE);
2015 			ret = -EINVAL;
2016 			goto restore_types;
2017 		}
2018 		memcpy(old_rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2019 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2020 	}
2021 
2022 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2023 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2024 	if (ret < 0) {
2025 		PMD_INIT_LOG(ERR, "Failed to apply new RSS config to the device");
2026 		goto restore_key;
2027 	}
2028 
2029 	return 0;
2030 restore_key:
2031 	memcpy(hw->rss_key, old_rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2032 restore_types:
2033 	hw->rss_hash_types = old_hash_types;
2034 
2035 	return ret;
2036 }
2037 
2038 static int
2039 virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2040 		struct rte_eth_rss_conf *rss_conf)
2041 {
2042 	struct virtio_hw *hw = dev->data->dev_private;
2043 
2044 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2045 		return -ENOTSUP;
2046 
2047 	if (rss_conf->rss_key && rss_conf->rss_key_len >= VIRTIO_NET_RSS_KEY_SIZE)
2048 		memcpy(rss_conf->rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2049 	rss_conf->rss_key_len = VIRTIO_NET_RSS_KEY_SIZE;
2050 	rss_conf->rss_hf = virtio_to_ethdev_rss_offloads(hw->rss_hash_types);
2051 
2052 	return 0;
2053 }
2054 
2055 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
2056 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2057 			 uint16_t reta_size)
2058 {
2059 	struct virtio_hw *hw = dev->data->dev_private;
2060 	uint16_t nb_queues;
2061 	uint16_t old_reta[VIRTIO_NET_RSS_RETA_SIZE];
2062 	int idx, pos, i, ret;
2063 
2064 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2065 		return -ENOTSUP;
2066 
2067 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2068 		return -EINVAL;
2069 
2070 	memcpy(old_reta, hw->rss_reta, sizeof(old_reta));
2071 
2072 	for (i = 0; i < reta_size; i++) {
2073 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2074 		pos = i % RTE_ETH_RETA_GROUP_SIZE;
2075 
2076 		if (((reta_conf[idx].mask >> pos) & 0x1) == 0)
2077 			continue;
2078 
2079 		hw->rss_reta[i] = reta_conf[idx].reta[pos];
2080 	}
2081 
2082 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2083 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2084 	if (ret < 0) {
2085 		PMD_INIT_LOG(ERR, "Failed to apply new RETA to the device");
2086 		memcpy(hw->rss_reta, old_reta, sizeof(old_reta));
2087 	}
2088 
2089 	hw->rss_rx_queues = dev->data->nb_rx_queues;
2090 
2091 	return ret;
2092 }
2093 
2094 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
2095 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2096 			 uint16_t reta_size)
2097 {
2098 	struct virtio_hw *hw = dev->data->dev_private;
2099 	int idx, i;
2100 
2101 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2102 		return -ENOTSUP;
2103 
2104 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2105 		return -EINVAL;
2106 
2107 	for (i = 0; i < reta_size; i++) {
2108 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2109 		reta_conf[idx].reta[i % RTE_ETH_RETA_GROUP_SIZE] = hw->rss_reta[i];
2110 	}
2111 
2112 	return 0;
2113 }
2114 
2115 /*
2116  * As default RSS hash key, it uses the default key of the
2117  * Intel IXGBE devices. It can be updated by the application
2118  * with any 40B key value.
2119  */
2120 static uint8_t rss_intel_key[VIRTIO_NET_RSS_KEY_SIZE] = {
2121 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2122 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2123 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2124 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2125 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2126 };
2127 
2128 static int
2129 virtio_dev_rss_init(struct rte_eth_dev *eth_dev)
2130 {
2131 	struct virtio_hw *hw = eth_dev->data->dev_private;
2132 	uint16_t nb_rx_queues = eth_dev->data->nb_rx_queues;
2133 	struct rte_eth_rss_conf *rss_conf;
2134 	int ret, i;
2135 
2136 	if (!nb_rx_queues) {
2137 		PMD_INIT_LOG(ERR, "Cannot init RSS if no Rx queues");
2138 		return -EINVAL;
2139 	}
2140 
2141 	rss_conf = &eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
2142 
2143 	ret = virtio_dev_get_rss_config(hw, &hw->rss_hash_types);
2144 	if (ret)
2145 		return ret;
2146 
2147 	if (rss_conf->rss_hf) {
2148 		/*  Ensure requested hash types are supported by the device */
2149 		if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(hw->rss_hash_types))
2150 			return -EINVAL;
2151 
2152 		hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2153 	}
2154 
2155 	if (!hw->rss_key) {
2156 		/* Setup default RSS key if not already setup by the user */
2157 		hw->rss_key = rte_malloc_socket("rss_key",
2158 				VIRTIO_NET_RSS_KEY_SIZE, 0,
2159 				eth_dev->device->numa_node);
2160 		if (!hw->rss_key) {
2161 			PMD_INIT_LOG(ERR, "Failed to allocate RSS key");
2162 			return -1;
2163 		}
2164 	}
2165 
2166 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2167 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2168 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2169 					VIRTIO_NET_RSS_KEY_SIZE);
2170 			return -EINVAL;
2171 		}
2172 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2173 	} else {
2174 		memcpy(hw->rss_key, rss_intel_key, VIRTIO_NET_RSS_KEY_SIZE);
2175 	}
2176 
2177 	if (!hw->rss_reta) {
2178 		/* Setup default RSS reta if not already setup by the user */
2179 		hw->rss_reta = rte_zmalloc_socket("rss_reta",
2180 				VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t), 0,
2181 				eth_dev->device->numa_node);
2182 		if (!hw->rss_reta) {
2183 			PMD_INIT_LOG(ERR, "Failed to allocate RSS reta");
2184 			return -1;
2185 		}
2186 
2187 		hw->rss_rx_queues = 0;
2188 	}
2189 
2190 	/* Re-initialize the RSS reta if the number of RX queues has changed */
2191 	if (hw->rss_rx_queues != nb_rx_queues) {
2192 		for (i = 0; i < VIRTIO_NET_RSS_RETA_SIZE; i++)
2193 			hw->rss_reta[i] = i % nb_rx_queues;
2194 		hw->rss_rx_queues = nb_rx_queues;
2195 	}
2196 
2197 	return 0;
2198 }
2199 
2200 #define DUPLEX_UNKNOWN   0xff
2201 /* reset device and renegotiate features if needed */
2202 static int
2203 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
2204 {
2205 	struct virtio_hw *hw = eth_dev->data->dev_private;
2206 	struct virtio_net_config *config;
2207 	struct virtio_net_config local_config;
2208 	int ret;
2209 
2210 	/* Reset the device although not necessary at startup */
2211 	virtio_reset(hw);
2212 
2213 	if (hw->vqs) {
2214 		virtio_dev_free_mbufs(eth_dev);
2215 		virtio_free_queues(hw);
2216 	}
2217 
2218 	/* Tell the host we've noticed this device. */
2219 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
2220 
2221 	/* Tell the host we've known how to drive the device. */
2222 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
2223 	if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
2224 		return -1;
2225 
2226 	hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
2227 
2228 	/* If host does not support both status and MSI-X then disable LSC */
2229 	if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
2230 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
2231 	else
2232 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2233 
2234 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2235 
2236 	/* Setting up rx_header size for the device */
2237 	if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
2238 	    virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2239 	    virtio_with_packed_queue(hw))
2240 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2241 	else
2242 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
2243 
2244 	/* Copy the permanent MAC address to: virtio_hw */
2245 	virtio_get_hwaddr(hw);
2246 	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
2247 			&eth_dev->data->mac_addrs[0]);
2248 	PMD_INIT_LOG(DEBUG,
2249 		     "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2250 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2251 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2252 
2253 	hw->get_speed_via_feat = hw->speed == RTE_ETH_SPEED_NUM_UNKNOWN &&
2254 			     virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX);
2255 	if (hw->get_speed_via_feat)
2256 		virtio_get_speed_duplex(eth_dev, NULL);
2257 	if (hw->duplex == DUPLEX_UNKNOWN)
2258 		hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
2259 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
2260 		hw->speed, hw->duplex);
2261 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
2262 		config = &local_config;
2263 
2264 		virtio_read_dev_config(hw,
2265 			offsetof(struct virtio_net_config, mac),
2266 			&config->mac, sizeof(config->mac));
2267 
2268 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2269 			virtio_read_dev_config(hw,
2270 				offsetof(struct virtio_net_config, status),
2271 				&config->status, sizeof(config->status));
2272 		} else {
2273 			PMD_INIT_LOG(DEBUG,
2274 				     "VIRTIO_NET_F_STATUS is not supported");
2275 			config->status = 0;
2276 		}
2277 
2278 		if (virtio_with_feature(hw, VIRTIO_NET_F_MQ) ||
2279 				virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2280 			virtio_read_dev_config(hw,
2281 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
2282 				&config->max_virtqueue_pairs,
2283 				sizeof(config->max_virtqueue_pairs));
2284 		} else {
2285 			PMD_INIT_LOG(DEBUG,
2286 				     "Neither VIRTIO_NET_F_MQ nor VIRTIO_NET_F_RSS are supported");
2287 			config->max_virtqueue_pairs = 1;
2288 		}
2289 
2290 		hw->max_queue_pairs = config->max_virtqueue_pairs;
2291 
2292 		if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
2293 			virtio_read_dev_config(hw,
2294 				offsetof(struct virtio_net_config, mtu),
2295 				&config->mtu,
2296 				sizeof(config->mtu));
2297 
2298 			/*
2299 			 * MTU value has already been checked at negotiation
2300 			 * time, but check again in case it has changed since
2301 			 * then, which should not happen.
2302 			 */
2303 			if (config->mtu < RTE_ETHER_MIN_MTU) {
2304 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
2305 						config->mtu);
2306 				return -1;
2307 			}
2308 
2309 			hw->max_mtu = config->mtu;
2310 			/* Set initial MTU to maximum one supported by vhost */
2311 			eth_dev->data->mtu = config->mtu;
2312 
2313 		} else {
2314 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2315 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
2316 		}
2317 
2318 		hw->rss_hash_types = 0;
2319 		if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2320 			if (virtio_dev_rss_init(eth_dev))
2321 				return -1;
2322 
2323 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
2324 				config->max_virtqueue_pairs);
2325 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
2326 		PMD_INIT_LOG(DEBUG,
2327 				"PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2328 				config->mac[0], config->mac[1],
2329 				config->mac[2], config->mac[3],
2330 				config->mac[4], config->mac[5]);
2331 	} else {
2332 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
2333 		hw->max_queue_pairs = 1;
2334 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2335 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
2336 	}
2337 
2338 	ret = virtio_alloc_queues(eth_dev);
2339 	if (ret < 0)
2340 		return ret;
2341 
2342 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
2343 		if (virtio_configure_intr(eth_dev) < 0) {
2344 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
2345 			virtio_free_queues(hw);
2346 			return -1;
2347 		}
2348 	}
2349 
2350 	virtio_reinit_complete(hw);
2351 
2352 	return 0;
2353 }
2354 
2355 /*
2356  * This function is based on probe() function in virtio_pci.c
2357  * It returns 0 on success.
2358  */
2359 int
2360 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
2361 {
2362 	struct virtio_hw *hw = eth_dev->data->dev_private;
2363 	uint32_t speed = RTE_ETH_SPEED_NUM_UNKNOWN;
2364 	int vectorized = 0;
2365 	int ret;
2366 
2367 	if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
2368 		PMD_INIT_LOG(ERR,
2369 			"Not sufficient headroom required = %d, avail = %d",
2370 			(int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
2371 			RTE_PKTMBUF_HEADROOM);
2372 
2373 		return -1;
2374 	}
2375 
2376 	eth_dev->dev_ops = &virtio_eth_dev_ops;
2377 
2378 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
2379 		set_rxtx_funcs(eth_dev);
2380 		return 0;
2381 	}
2382 
2383 	ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
2384 	if (ret < 0)
2385 		return ret;
2386 	hw->speed = speed;
2387 	hw->duplex = DUPLEX_UNKNOWN;
2388 
2389 	/* Allocate memory for storing MAC addresses */
2390 	eth_dev->data->mac_addrs = rte_zmalloc("virtio",
2391 				VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
2392 	if (eth_dev->data->mac_addrs == NULL) {
2393 		PMD_INIT_LOG(ERR,
2394 			"Failed to allocate %d bytes needed to store MAC addresses",
2395 			VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
2396 		return -ENOMEM;
2397 	}
2398 
2399 	rte_spinlock_init(&hw->state_lock);
2400 
2401 	/* reset device and negotiate default features */
2402 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
2403 	if (ret < 0)
2404 		goto err_virtio_init;
2405 
2406 	if (vectorized) {
2407 		if (!virtio_with_packed_queue(hw)) {
2408 			hw->use_vec_rx = 1;
2409 		} else {
2410 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
2411 			hw->use_vec_rx = 1;
2412 			hw->use_vec_tx = 1;
2413 #else
2414 			PMD_DRV_LOG(INFO,
2415 				"building environment do not support packed ring vectorized");
2416 #endif
2417 		}
2418 	}
2419 
2420 	hw->opened = 1;
2421 
2422 	return 0;
2423 
2424 err_virtio_init:
2425 	rte_free(eth_dev->data->mac_addrs);
2426 	eth_dev->data->mac_addrs = NULL;
2427 	return ret;
2428 }
2429 
2430 static uint32_t
2431 virtio_dev_speed_capa_get(uint32_t speed)
2432 {
2433 	switch (speed) {
2434 	case RTE_ETH_SPEED_NUM_10G:
2435 		return RTE_ETH_LINK_SPEED_10G;
2436 	case RTE_ETH_SPEED_NUM_20G:
2437 		return RTE_ETH_LINK_SPEED_20G;
2438 	case RTE_ETH_SPEED_NUM_25G:
2439 		return RTE_ETH_LINK_SPEED_25G;
2440 	case RTE_ETH_SPEED_NUM_40G:
2441 		return RTE_ETH_LINK_SPEED_40G;
2442 	case RTE_ETH_SPEED_NUM_50G:
2443 		return RTE_ETH_LINK_SPEED_50G;
2444 	case RTE_ETH_SPEED_NUM_56G:
2445 		return RTE_ETH_LINK_SPEED_56G;
2446 	case RTE_ETH_SPEED_NUM_100G:
2447 		return RTE_ETH_LINK_SPEED_100G;
2448 	case RTE_ETH_SPEED_NUM_200G:
2449 		return RTE_ETH_LINK_SPEED_200G;
2450 	default:
2451 		return 0;
2452 	}
2453 }
2454 
2455 static int vectorized_check_handler(__rte_unused const char *key,
2456 		const char *value, void *ret_val)
2457 {
2458 	if (strcmp(value, "1") == 0)
2459 		*(int *)ret_val = 1;
2460 	else
2461 		*(int *)ret_val = 0;
2462 
2463 	return 0;
2464 }
2465 
2466 #define VIRTIO_ARG_SPEED      "speed"
2467 #define VIRTIO_ARG_VECTORIZED "vectorized"
2468 
2469 static int
2470 link_speed_handler(const char *key __rte_unused,
2471 		const char *value, void *ret_val)
2472 {
2473 	uint32_t val;
2474 	if (!value || !ret_val)
2475 		return -EINVAL;
2476 	val = strtoul(value, NULL, 0);
2477 	/* validate input */
2478 	if (virtio_dev_speed_capa_get(val) == 0)
2479 		return -EINVAL;
2480 	*(uint32_t *)ret_val = val;
2481 
2482 	return 0;
2483 }
2484 
2485 
2486 static int
2487 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2488 {
2489 	struct rte_kvargs *kvlist;
2490 	int ret = 0;
2491 
2492 	if (devargs == NULL)
2493 		return 0;
2494 
2495 	kvlist = rte_kvargs_parse(devargs->args, NULL);
2496 	if (kvlist == NULL) {
2497 		PMD_INIT_LOG(ERR, "error when parsing param");
2498 		return 0;
2499 	}
2500 
2501 	if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2502 		ret = rte_kvargs_process(kvlist,
2503 					VIRTIO_ARG_SPEED,
2504 					link_speed_handler, speed);
2505 		if (ret < 0) {
2506 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2507 					VIRTIO_ARG_SPEED);
2508 			goto exit;
2509 		}
2510 	}
2511 
2512 	if (vectorized &&
2513 		rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2514 		ret = rte_kvargs_process(kvlist,
2515 				VIRTIO_ARG_VECTORIZED,
2516 				vectorized_check_handler, vectorized);
2517 		if (ret < 0) {
2518 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2519 					VIRTIO_ARG_VECTORIZED);
2520 			goto exit;
2521 		}
2522 	}
2523 
2524 exit:
2525 	rte_kvargs_free(kvlist);
2526 	return ret;
2527 }
2528 
2529 static uint8_t
2530 rx_offload_enabled(struct virtio_hw *hw)
2531 {
2532 	return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2533 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2534 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2535 }
2536 
2537 static uint8_t
2538 tx_offload_enabled(struct virtio_hw *hw)
2539 {
2540 	return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2541 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2542 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2543 }
2544 
2545 /*
2546  * Configure virtio device
2547  * It returns 0 on success.
2548  */
2549 static int
2550 virtio_dev_configure(struct rte_eth_dev *dev)
2551 {
2552 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2553 	const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2554 	struct virtio_hw *hw = dev->data->dev_private;
2555 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2556 		hw->vtnet_hdr_size;
2557 	uint64_t rx_offloads = rxmode->offloads;
2558 	uint64_t tx_offloads = txmode->offloads;
2559 	uint64_t req_features;
2560 	int ret;
2561 
2562 	PMD_INIT_LOG(DEBUG, "configure");
2563 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2564 
2565 	if (rxmode->mq_mode != RTE_ETH_MQ_RX_NONE && rxmode->mq_mode != RTE_ETH_MQ_RX_RSS) {
2566 		PMD_DRV_LOG(ERR,
2567 			"Unsupported Rx multi queue mode %d",
2568 			rxmode->mq_mode);
2569 		return -EINVAL;
2570 	}
2571 
2572 	if (txmode->mq_mode != RTE_ETH_MQ_TX_NONE) {
2573 		PMD_DRV_LOG(ERR,
2574 			"Unsupported Tx multi queue mode %d",
2575 			txmode->mq_mode);
2576 		return -EINVAL;
2577 	}
2578 
2579 	if (dev->data->dev_conf.intr_conf.rxq) {
2580 		ret = virtio_init_device(dev, hw->req_guest_features);
2581 		if (ret < 0)
2582 			return ret;
2583 	}
2584 
2585 	if (rxmode->mq_mode == RTE_ETH_MQ_RX_RSS)
2586 		req_features |= (1ULL << VIRTIO_NET_F_RSS);
2587 
2588 	if (rxmode->mtu > hw->max_mtu)
2589 		req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2590 
2591 	hw->max_rx_pkt_len = ether_hdr_len + rxmode->mtu;
2592 
2593 	if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2594 			   RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
2595 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2596 
2597 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
2598 		req_features |=
2599 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2600 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2601 
2602 	if (tx_offloads & (RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2603 			   RTE_ETH_TX_OFFLOAD_TCP_CKSUM))
2604 		req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2605 
2606 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)
2607 		req_features |=
2608 			(1ULL << VIRTIO_NET_F_HOST_TSO4) |
2609 			(1ULL << VIRTIO_NET_F_HOST_TSO6);
2610 
2611 	/* if request features changed, reinit the device */
2612 	if (req_features != hw->req_guest_features) {
2613 		ret = virtio_init_device(dev, req_features);
2614 		if (ret < 0)
2615 			return ret;
2616 	}
2617 
2618 	if ((rxmode->mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
2619 			!virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2620 		PMD_DRV_LOG(ERR, "RSS support requested but not supported by the device");
2621 		return -ENOTSUP;
2622 	}
2623 
2624 	if ((rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2625 			    RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) &&
2626 		!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2627 		PMD_DRV_LOG(ERR,
2628 			"rx checksum not available on this host");
2629 		return -ENOTSUP;
2630 	}
2631 
2632 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) &&
2633 		(!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2634 		 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2635 		PMD_DRV_LOG(ERR,
2636 			"Large Receive Offload not available on this host");
2637 		return -ENOTSUP;
2638 	}
2639 
2640 	/* start control queue */
2641 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2642 		virtio_dev_cq_start(dev);
2643 
2644 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2645 		hw->vlan_strip = 1;
2646 
2647 	hw->rx_ol_scatter = (rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
2648 
2649 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2650 			!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2651 		PMD_DRV_LOG(ERR,
2652 			    "vlan filtering not available on this host");
2653 		return -ENOTSUP;
2654 	}
2655 
2656 	hw->has_tx_offload = tx_offload_enabled(hw);
2657 	hw->has_rx_offload = rx_offload_enabled(hw);
2658 
2659 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2660 		/* Enable vector (0) for Link State Intrerrupt */
2661 		if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2662 				VIRTIO_MSI_NO_VECTOR) {
2663 			PMD_DRV_LOG(ERR, "failed to set config vector");
2664 			return -EBUSY;
2665 		}
2666 
2667 	if (virtio_with_packed_queue(hw)) {
2668 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2669 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2670 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2671 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2672 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2673 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2674 			PMD_DRV_LOG(INFO,
2675 				"disabled packed ring vectorized path for requirements not met");
2676 			hw->use_vec_rx = 0;
2677 			hw->use_vec_tx = 0;
2678 		}
2679 #elif defined(RTE_ARCH_ARM)
2680 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2681 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2682 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2683 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2684 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2685 			PMD_DRV_LOG(INFO,
2686 				"disabled packed ring vectorized path for requirements not met");
2687 			hw->use_vec_rx = 0;
2688 			hw->use_vec_tx = 0;
2689 		}
2690 #else
2691 		hw->use_vec_rx = 0;
2692 		hw->use_vec_tx = 0;
2693 #endif
2694 
2695 		if (hw->use_vec_rx) {
2696 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2697 				PMD_DRV_LOG(INFO,
2698 					"disabled packed ring vectorized rx for mrg_rxbuf enabled");
2699 				hw->use_vec_rx = 0;
2700 			}
2701 
2702 			if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
2703 				PMD_DRV_LOG(INFO,
2704 					"disabled packed ring vectorized rx for TCP_LRO enabled");
2705 				hw->use_vec_rx = 0;
2706 			}
2707 		}
2708 	} else {
2709 		if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2710 			hw->use_inorder_tx = 1;
2711 			hw->use_inorder_rx = 1;
2712 			hw->use_vec_rx = 0;
2713 		}
2714 
2715 		if (hw->use_vec_rx) {
2716 #if defined RTE_ARCH_ARM
2717 			if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2718 				PMD_DRV_LOG(INFO,
2719 					"disabled split ring vectorized path for requirement not met");
2720 				hw->use_vec_rx = 0;
2721 			}
2722 #endif
2723 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2724 				PMD_DRV_LOG(INFO,
2725 					"disabled split ring vectorized rx for mrg_rxbuf enabled");
2726 				hw->use_vec_rx = 0;
2727 			}
2728 
2729 			if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2730 					   RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
2731 					   RTE_ETH_RX_OFFLOAD_TCP_LRO |
2732 					   RTE_ETH_RX_OFFLOAD_VLAN_STRIP)) {
2733 				PMD_DRV_LOG(INFO,
2734 					"disabled split ring vectorized rx for offloading enabled");
2735 				hw->use_vec_rx = 0;
2736 			}
2737 
2738 			if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2739 				PMD_DRV_LOG(INFO,
2740 					"disabled split ring vectorized rx, max SIMD bitwidth too low");
2741 				hw->use_vec_rx = 0;
2742 			}
2743 		}
2744 	}
2745 
2746 	return 0;
2747 }
2748 
2749 
2750 static int
2751 virtio_dev_start(struct rte_eth_dev *dev)
2752 {
2753 	uint16_t nb_queues, i;
2754 	struct virtqueue *vq;
2755 	struct virtio_hw *hw = dev->data->dev_private;
2756 	int ret;
2757 
2758 	/* Finish the initialization of the queues */
2759 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2760 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
2761 		if (ret < 0)
2762 			return ret;
2763 	}
2764 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2765 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
2766 		if (ret < 0)
2767 			return ret;
2768 	}
2769 
2770 	/* check if lsc interrupt feature is enabled */
2771 	if (dev->data->dev_conf.intr_conf.lsc) {
2772 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2773 			PMD_DRV_LOG(ERR, "link status not supported by host");
2774 			return -ENOTSUP;
2775 		}
2776 	}
2777 
2778 	/* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2779 	 * in device configure, but it could be unmapped  when device is
2780 	 * stopped.
2781 	 */
2782 	if (dev->data->dev_conf.intr_conf.lsc ||
2783 	    dev->data->dev_conf.intr_conf.rxq) {
2784 		virtio_intr_disable(dev);
2785 
2786 		/* Setup interrupt callback  */
2787 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2788 			rte_intr_callback_register(dev->intr_handle,
2789 						   virtio_interrupt_handler,
2790 						   dev);
2791 
2792 		if (virtio_intr_enable(dev) < 0) {
2793 			PMD_DRV_LOG(ERR, "interrupt enable failed");
2794 			return -EIO;
2795 		}
2796 	}
2797 
2798 	/*Notify the backend
2799 	 *Otherwise the tap backend might already stop its queue due to fullness.
2800 	 *vhost backend will have no chance to be waked up
2801 	 */
2802 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2803 	if (hw->max_queue_pairs > 1) {
2804 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2805 			return -EINVAL;
2806 	}
2807 
2808 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2809 
2810 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2811 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2812 		/* Flush the old packets */
2813 		virtqueue_rxvq_flush(vq);
2814 		virtqueue_notify(vq);
2815 	}
2816 
2817 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2818 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2819 		virtqueue_notify(vq);
2820 	}
2821 
2822 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2823 
2824 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2825 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2826 		VIRTQUEUE_DUMP(vq);
2827 	}
2828 
2829 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2830 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2831 		VIRTQUEUE_DUMP(vq);
2832 	}
2833 
2834 	set_rxtx_funcs(dev);
2835 	hw->started = 1;
2836 
2837 	/* Initialize Link state */
2838 	virtio_dev_link_update(dev, 0);
2839 
2840 	return 0;
2841 }
2842 
2843 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2844 {
2845 	struct virtio_hw *hw = dev->data->dev_private;
2846 	uint16_t nr_vq = virtio_get_nr_vq(hw);
2847 	const char *type __rte_unused;
2848 	unsigned int i, mbuf_num = 0;
2849 	struct virtqueue *vq;
2850 	struct rte_mbuf *buf;
2851 	int queue_type;
2852 
2853 	if (hw->vqs == NULL)
2854 		return;
2855 
2856 	for (i = 0; i < nr_vq; i++) {
2857 		vq = hw->vqs[i];
2858 		if (!vq)
2859 			continue;
2860 
2861 		queue_type = virtio_get_queue_type(hw, i);
2862 		if (queue_type == VTNET_RQ)
2863 			type = "rxq";
2864 		else if (queue_type == VTNET_TQ)
2865 			type = "txq";
2866 		else
2867 			continue;
2868 
2869 		PMD_INIT_LOG(DEBUG,
2870 			"Before freeing %s[%d] used and unused buf",
2871 			type, i);
2872 		VIRTQUEUE_DUMP(vq);
2873 
2874 		while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2875 			rte_pktmbuf_free(buf);
2876 			mbuf_num++;
2877 		}
2878 
2879 		PMD_INIT_LOG(DEBUG,
2880 			"After freeing %s[%d] used and unused buf",
2881 			type, i);
2882 		VIRTQUEUE_DUMP(vq);
2883 	}
2884 
2885 	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2886 }
2887 
2888 static void
2889 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2890 {
2891 	struct virtio_hw *hw = dev->data->dev_private;
2892 	struct virtqueue *vq;
2893 	int qidx;
2894 	void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2895 
2896 	if (virtio_with_packed_queue(hw)) {
2897 		if (hw->use_vec_tx)
2898 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2899 		else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2900 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2901 		else
2902 			xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2903 	} else {
2904 		if (hw->use_inorder_tx)
2905 			xmit_cleanup = &virtio_xmit_cleanup_inorder;
2906 		else
2907 			xmit_cleanup = &virtio_xmit_cleanup;
2908 	}
2909 
2910 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2911 		vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2912 		if (vq != NULL)
2913 			xmit_cleanup(vq, virtqueue_nused(vq));
2914 	}
2915 }
2916 
2917 /*
2918  * Stop device: disable interrupt and mark link down
2919  */
2920 int
2921 virtio_dev_stop(struct rte_eth_dev *dev)
2922 {
2923 	struct virtio_hw *hw = dev->data->dev_private;
2924 	struct rte_eth_link link;
2925 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2926 
2927 	PMD_INIT_LOG(DEBUG, "stop");
2928 	dev->data->dev_started = 0;
2929 
2930 	rte_spinlock_lock(&hw->state_lock);
2931 	if (!hw->started)
2932 		goto out_unlock;
2933 	hw->started = 0;
2934 
2935 	virtio_tx_completed_cleanup(dev);
2936 
2937 	if (intr_conf->lsc || intr_conf->rxq) {
2938 		virtio_intr_disable(dev);
2939 
2940 		/* Reset interrupt callback  */
2941 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2942 			rte_intr_callback_unregister(dev->intr_handle,
2943 						     virtio_interrupt_handler,
2944 						     dev);
2945 		}
2946 	}
2947 
2948 	memset(&link, 0, sizeof(link));
2949 	rte_eth_linkstatus_set(dev, &link);
2950 out_unlock:
2951 	rte_spinlock_unlock(&hw->state_lock);
2952 
2953 	return 0;
2954 }
2955 
2956 static int
2957 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2958 {
2959 	struct rte_eth_link link;
2960 	uint16_t status;
2961 	struct virtio_hw *hw = dev->data->dev_private;
2962 
2963 	memset(&link, 0, sizeof(link));
2964 	link.link_duplex = hw->duplex;
2965 	link.link_speed  = hw->speed;
2966 	link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2967 
2968 	if (!hw->started) {
2969 		link.link_status = RTE_ETH_LINK_DOWN;
2970 		link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2971 	} else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2972 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
2973 		virtio_read_dev_config(hw,
2974 				offsetof(struct virtio_net_config, status),
2975 				&status, sizeof(status));
2976 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2977 			link.link_status = RTE_ETH_LINK_DOWN;
2978 			link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2979 			PMD_INIT_LOG(DEBUG, "Port %d is down",
2980 				     dev->data->port_id);
2981 		} else {
2982 			link.link_status = RTE_ETH_LINK_UP;
2983 			if (hw->get_speed_via_feat)
2984 				virtio_get_speed_duplex(dev, &link);
2985 			PMD_INIT_LOG(DEBUG, "Port %d is up",
2986 				     dev->data->port_id);
2987 		}
2988 	} else {
2989 		link.link_status = RTE_ETH_LINK_UP;
2990 		if (hw->get_speed_via_feat)
2991 			virtio_get_speed_duplex(dev, &link);
2992 	}
2993 
2994 	return rte_eth_linkstatus_set(dev, &link);
2995 }
2996 
2997 static int
2998 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2999 {
3000 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
3001 	struct virtio_hw *hw = dev->data->dev_private;
3002 	uint64_t offloads = rxmode->offloads;
3003 
3004 	if (mask & RTE_ETH_VLAN_FILTER_MASK) {
3005 		if ((offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
3006 				!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
3007 
3008 			PMD_DRV_LOG(NOTICE,
3009 				"vlan filtering not available on this host");
3010 
3011 			return -ENOTSUP;
3012 		}
3013 	}
3014 
3015 	if (mask & RTE_ETH_VLAN_STRIP_MASK)
3016 		hw->vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
3017 
3018 	return 0;
3019 }
3020 
3021 static int
3022 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
3023 {
3024 	uint64_t tso_mask, host_features;
3025 	uint32_t rss_hash_types = 0;
3026 	struct virtio_hw *hw = dev->data->dev_private;
3027 	dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
3028 
3029 	dev_info->max_rx_queues =
3030 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
3031 	dev_info->max_tx_queues =
3032 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
3033 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
3034 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
3035 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
3036 	dev_info->max_mtu = hw->max_mtu;
3037 
3038 	host_features = VIRTIO_OPS(hw)->get_features(hw);
3039 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3040 	if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
3041 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SCATTER;
3042 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
3043 		dev_info->rx_offload_capa |=
3044 			RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3045 			RTE_ETH_RX_OFFLOAD_UDP_CKSUM;
3046 	}
3047 	if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
3048 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3049 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
3050 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
3051 	if ((host_features & tso_mask) == tso_mask)
3052 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3053 
3054 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
3055 				    RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
3056 	if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
3057 		dev_info->tx_offload_capa |=
3058 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
3059 			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
3060 	}
3061 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
3062 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
3063 	if ((host_features & tso_mask) == tso_mask)
3064 		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
3065 
3066 	if (host_features & (1ULL << VIRTIO_NET_F_RSS)) {
3067 		virtio_dev_get_rss_config(hw, &rss_hash_types);
3068 		dev_info->hash_key_size = VIRTIO_NET_RSS_KEY_SIZE;
3069 		dev_info->reta_size = VIRTIO_NET_RSS_RETA_SIZE;
3070 		dev_info->flow_type_rss_offloads =
3071 			virtio_to_ethdev_rss_offloads(rss_hash_types);
3072 	} else {
3073 		dev_info->hash_key_size = 0;
3074 		dev_info->reta_size = 0;
3075 		dev_info->flow_type_rss_offloads = 0;
3076 	}
3077 
3078 	if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
3079 		/*
3080 		 * According to 2.7 Packed Virtqueues,
3081 		 * 2.7.10.1 Structure Size and Alignment:
3082 		 * The Queue Size value does not have to be a power of 2.
3083 		 */
3084 		dev_info->rx_desc_lim.nb_max = UINT16_MAX;
3085 		dev_info->tx_desc_lim.nb_max = UINT16_MAX;
3086 	} else {
3087 		/*
3088 		 * According to 2.6 Split Virtqueues:
3089 		 * Queue Size value is always a power of 2. The maximum Queue
3090 		 * Size value is 32768.
3091 		 */
3092 		dev_info->rx_desc_lim.nb_max = 32768;
3093 		dev_info->tx_desc_lim.nb_max = 32768;
3094 	}
3095 	/*
3096 	 * Actual minimum is not the same for virtqueues of different kinds,
3097 	 * but to avoid tangling the code with separate branches, rely on
3098 	 * default thresholds since desc number must be at least of their size.
3099 	 */
3100 	dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
3101 					       RTE_VIRTIO_VPMD_RX_REARM_THRESH);
3102 	dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
3103 	dev_info->rx_desc_lim.nb_align = 1;
3104 	dev_info->tx_desc_lim.nb_align = 1;
3105 
3106 	return 0;
3107 }
3108 
3109 /*
3110  * It enables testpmd to collect per queue stats.
3111  */
3112 static int
3113 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
3114 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
3115 __rte_unused uint8_t is_rx)
3116 {
3117 	return 0;
3118 }
3119 
3120 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
3121 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);
3122