xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision 09442498ef736d0a96632cf8b8c15d8ca78a6468)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <errno.h>
10 #include <unistd.h>
11 
12 #include <ethdev_driver.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_ether.h>
19 #include <rte_ip.h>
20 #include <rte_arp.h>
21 #include <rte_common.h>
22 #include <rte_errno.h>
23 #include <rte_cpuflags.h>
24 #include <rte_vect.h>
25 #include <rte_memory.h>
26 #include <rte_eal_paging.h>
27 #include <rte_eal.h>
28 #include <dev_driver.h>
29 #include <rte_cycles.h>
30 #include <rte_kvargs.h>
31 
32 #include "virtio_ethdev.h"
33 #include "virtio.h"
34 #include "virtio_logs.h"
35 #include "virtqueue.h"
36 #include "virtio_rxtx.h"
37 #include "virtio_rxtx_simple.h"
38 #include "virtio_user/virtio_user_dev.h"
39 
40 static int  virtio_dev_configure(struct rte_eth_dev *dev);
41 static int  virtio_dev_start(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
43 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
45 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
46 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
47 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
48 	uint32_t *speed,
49 	int *vectorized);
50 static int virtio_dev_info_get(struct rte_eth_dev *dev,
51 				struct rte_eth_dev_info *dev_info);
52 static int virtio_dev_link_update(struct rte_eth_dev *dev,
53 	int wait_to_complete);
54 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
55 static int virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
56 		struct rte_eth_rss_conf *rss_conf);
57 static int virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
58 		struct rte_eth_rss_conf *rss_conf);
59 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
60 			 struct rte_eth_rss_reta_entry64 *reta_conf,
61 			 uint16_t reta_size);
62 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
63 			 struct rte_eth_rss_reta_entry64 *reta_conf,
64 			 uint16_t reta_size);
65 
66 static void virtio_set_hwaddr(struct virtio_hw *hw);
67 static void virtio_get_hwaddr(struct virtio_hw *hw);
68 
69 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
70 				 struct rte_eth_stats *stats);
71 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
72 				 struct rte_eth_xstat *xstats, unsigned n);
73 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
74 				       struct rte_eth_xstat_name *xstats_names,
75 				       unsigned limit);
76 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
77 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
78 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
79 				uint16_t vlan_id, int on);
80 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
81 				struct rte_ether_addr *mac_addr,
82 				uint32_t index, uint32_t vmdq);
83 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
84 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
85 				struct rte_ether_addr *mac_addr);
86 
87 static int virtio_intr_disable(struct rte_eth_dev *dev);
88 static int virtio_get_monitor_addr(void *rx_queue,
89 				struct rte_power_monitor_cond *pmc);
90 
91 static int virtio_dev_queue_stats_mapping_set(
92 	struct rte_eth_dev *eth_dev,
93 	uint16_t queue_id,
94 	uint8_t stat_idx,
95 	uint8_t is_rx);
96 
97 static void virtio_notify_peers(struct rte_eth_dev *dev);
98 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
99 
100 struct rte_virtio_xstats_name_off {
101 	char name[RTE_ETH_XSTATS_NAME_SIZE];
102 	unsigned offset;
103 };
104 
105 /* [rt]x_qX_ is prepended to the name string here */
106 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
107 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
108 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
109 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
110 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
111 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
112 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
113 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
114 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
115 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
116 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
117 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
118 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
119 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
120 };
121 
122 /* [rt]x_qX_ is prepended to the name string here */
123 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
124 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
125 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
126 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
127 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
128 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
129 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
130 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
131 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
132 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
133 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
134 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
135 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
136 };
137 
138 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
139 			    sizeof(rte_virtio_rxq_stat_strings[0]))
140 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
141 			    sizeof(rte_virtio_txq_stat_strings[0]))
142 
143 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
144 
145 static struct virtio_pmd_ctrl *
146 virtio_send_command_packed(struct virtnet_ctl *cvq,
147 			   struct virtio_pmd_ctrl *ctrl,
148 			   int *dlen, int pkt_num)
149 {
150 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
151 	int head;
152 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
153 	struct virtio_pmd_ctrl *result;
154 	uint16_t flags;
155 	int sum = 0;
156 	int nb_descs = 0;
157 	int k;
158 
159 	/*
160 	 * Format is enforced in qemu code:
161 	 * One TX packet for header;
162 	 * At least one TX packet per argument;
163 	 * One RX packet for ACK.
164 	 */
165 	head = vq->vq_avail_idx;
166 	flags = vq->vq_packed.cached_flags;
167 	desc[head].addr = cvq->virtio_net_hdr_mem;
168 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
169 	vq->vq_free_cnt--;
170 	nb_descs++;
171 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
172 		vq->vq_avail_idx -= vq->vq_nentries;
173 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
174 	}
175 
176 	for (k = 0; k < pkt_num; k++) {
177 		desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
178 			+ sizeof(struct virtio_net_ctrl_hdr)
179 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
180 		desc[vq->vq_avail_idx].len = dlen[k];
181 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
182 			vq->vq_packed.cached_flags;
183 		sum += dlen[k];
184 		vq->vq_free_cnt--;
185 		nb_descs++;
186 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
187 			vq->vq_avail_idx -= vq->vq_nentries;
188 			vq->vq_packed.cached_flags ^=
189 				VRING_PACKED_DESC_F_AVAIL_USED;
190 		}
191 	}
192 
193 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
194 		+ sizeof(struct virtio_net_ctrl_hdr);
195 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
196 	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
197 		vq->vq_packed.cached_flags;
198 	vq->vq_free_cnt--;
199 	nb_descs++;
200 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
201 		vq->vq_avail_idx -= vq->vq_nentries;
202 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
203 	}
204 
205 	virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
206 			vq->hw->weak_barriers);
207 
208 	virtio_wmb(vq->hw->weak_barriers);
209 	virtqueue_notify(vq);
210 
211 	/* wait for used desc in virtqueue
212 	 * desc_is_used has a load-acquire or rte_io_rmb inside
213 	 */
214 	while (!desc_is_used(&desc[head], vq))
215 		usleep(100);
216 
217 	/* now get used descriptors */
218 	vq->vq_free_cnt += nb_descs;
219 	vq->vq_used_cons_idx += nb_descs;
220 	if (vq->vq_used_cons_idx >= vq->vq_nentries) {
221 		vq->vq_used_cons_idx -= vq->vq_nentries;
222 		vq->vq_packed.used_wrap_counter ^= 1;
223 	}
224 
225 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
226 			"vq->vq_avail_idx=%d\n"
227 			"vq->vq_used_cons_idx=%d\n"
228 			"vq->vq_packed.cached_flags=0x%x\n"
229 			"vq->vq_packed.used_wrap_counter=%d",
230 			vq->vq_free_cnt,
231 			vq->vq_avail_idx,
232 			vq->vq_used_cons_idx,
233 			vq->vq_packed.cached_flags,
234 			vq->vq_packed.used_wrap_counter);
235 
236 	result = cvq->virtio_net_hdr_mz->addr;
237 	return result;
238 }
239 
240 static struct virtio_pmd_ctrl *
241 virtio_send_command_split(struct virtnet_ctl *cvq,
242 			  struct virtio_pmd_ctrl *ctrl,
243 			  int *dlen, int pkt_num)
244 {
245 	struct virtio_pmd_ctrl *result;
246 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
247 	uint32_t head, i;
248 	int k, sum = 0;
249 
250 	head = vq->vq_desc_head_idx;
251 
252 	/*
253 	 * Format is enforced in qemu code:
254 	 * One TX packet for header;
255 	 * At least one TX packet per argument;
256 	 * One RX packet for ACK.
257 	 */
258 	vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
259 	vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
260 	vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
261 	vq->vq_free_cnt--;
262 	i = vq->vq_split.ring.desc[head].next;
263 
264 	for (k = 0; k < pkt_num; k++) {
265 		vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
266 		vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
267 			+ sizeof(struct virtio_net_ctrl_hdr)
268 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
269 		vq->vq_split.ring.desc[i].len = dlen[k];
270 		sum += dlen[k];
271 		vq->vq_free_cnt--;
272 		i = vq->vq_split.ring.desc[i].next;
273 	}
274 
275 	vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
276 	vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
277 			+ sizeof(struct virtio_net_ctrl_hdr);
278 	vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
279 	vq->vq_free_cnt--;
280 
281 	vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
282 
283 	vq_update_avail_ring(vq, head);
284 	vq_update_avail_idx(vq);
285 
286 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
287 
288 	virtqueue_notify(vq);
289 
290 	while (virtqueue_nused(vq) == 0)
291 		usleep(100);
292 
293 	while (virtqueue_nused(vq)) {
294 		uint32_t idx, desc_idx, used_idx;
295 		struct vring_used_elem *uep;
296 
297 		used_idx = (uint32_t)(vq->vq_used_cons_idx
298 				& (vq->vq_nentries - 1));
299 		uep = &vq->vq_split.ring.used->ring[used_idx];
300 		idx = (uint32_t) uep->id;
301 		desc_idx = idx;
302 
303 		while (vq->vq_split.ring.desc[desc_idx].flags &
304 				VRING_DESC_F_NEXT) {
305 			desc_idx = vq->vq_split.ring.desc[desc_idx].next;
306 			vq->vq_free_cnt++;
307 		}
308 
309 		vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
310 		vq->vq_desc_head_idx = idx;
311 
312 		vq->vq_used_cons_idx++;
313 		vq->vq_free_cnt++;
314 	}
315 
316 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
317 			vq->vq_free_cnt, vq->vq_desc_head_idx);
318 
319 	result = cvq->virtio_net_hdr_mz->addr;
320 	return result;
321 }
322 
323 static int
324 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
325 		    int *dlen, int pkt_num)
326 {
327 	virtio_net_ctrl_ack status = ~0;
328 	struct virtio_pmd_ctrl *result;
329 	struct virtqueue *vq;
330 
331 	ctrl->status = status;
332 
333 	if (!cvq) {
334 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
335 		return -1;
336 	}
337 
338 	rte_spinlock_lock(&cvq->lock);
339 	vq = virtnet_cq_to_vq(cvq);
340 
341 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
342 		"vq->hw->cvq = %p vq = %p",
343 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
344 
345 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
346 		rte_spinlock_unlock(&cvq->lock);
347 		return -1;
348 	}
349 
350 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
351 		sizeof(struct virtio_pmd_ctrl));
352 
353 	if (virtio_with_packed_queue(vq->hw))
354 		result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
355 	else
356 		result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
357 
358 	rte_spinlock_unlock(&cvq->lock);
359 	return result->status;
360 }
361 
362 static int
363 virtio_set_multiple_queues_rss(struct rte_eth_dev *dev, uint16_t nb_queues)
364 {
365 	struct virtio_hw *hw = dev->data->dev_private;
366 	struct virtio_pmd_ctrl ctrl;
367 	struct virtio_net_ctrl_rss rss;
368 	int dlen, ret;
369 
370 	rss.hash_types = hw->rss_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
371 	RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(VIRTIO_NET_RSS_RETA_SIZE));
372 	rss.indirection_table_mask = VIRTIO_NET_RSS_RETA_SIZE - 1;
373 	rss.unclassified_queue = 0;
374 	memcpy(rss.indirection_table, hw->rss_reta, VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t));
375 	rss.max_tx_vq = nb_queues;
376 	rss.hash_key_length = VIRTIO_NET_RSS_KEY_SIZE;
377 	memcpy(rss.hash_key_data, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
378 
379 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
380 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_RSS_CONFIG;
381 	memcpy(ctrl.data, &rss, sizeof(rss));
382 
383 	dlen = sizeof(rss);
384 
385 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
386 	if (ret) {
387 		PMD_INIT_LOG(ERR, "RSS multiqueue configured but send command failed");
388 		return -EINVAL;
389 	}
390 
391 	return 0;
392 }
393 
394 static int
395 virtio_set_multiple_queues_auto(struct rte_eth_dev *dev, uint16_t nb_queues)
396 {
397 	struct virtio_hw *hw = dev->data->dev_private;
398 	struct virtio_pmd_ctrl ctrl;
399 	int dlen;
400 	int ret;
401 
402 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
403 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
404 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
405 
406 	dlen = sizeof(uint16_t);
407 
408 	ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
409 	if (ret) {
410 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
411 			  "failed, this is too late now...");
412 		return -EINVAL;
413 	}
414 
415 	return 0;
416 }
417 
418 static int
419 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
420 {
421 	struct virtio_hw *hw = dev->data->dev_private;
422 
423 	if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
424 		return virtio_set_multiple_queues_rss(dev, nb_queues);
425 	else
426 		return virtio_set_multiple_queues_auto(dev, nb_queues);
427 }
428 
429 static uint16_t
430 virtio_get_nr_vq(struct virtio_hw *hw)
431 {
432 	uint16_t nr_vq = hw->max_queue_pairs * 2;
433 
434 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
435 		nr_vq += 1;
436 
437 	return nr_vq;
438 }
439 
440 static void
441 virtio_init_vring(struct virtqueue *vq)
442 {
443 	int size = vq->vq_nentries;
444 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
445 
446 	PMD_INIT_FUNC_TRACE();
447 
448 	memset(ring_mem, 0, vq->vq_ring_size);
449 
450 	vq->vq_used_cons_idx = 0;
451 	vq->vq_desc_head_idx = 0;
452 	vq->vq_avail_idx = 0;
453 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
454 	vq->vq_free_cnt = vq->vq_nentries;
455 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
456 	if (virtio_with_packed_queue(vq->hw)) {
457 		vring_init_packed(&vq->vq_packed.ring, ring_mem,
458 				  VIRTIO_VRING_ALIGN, size);
459 		vring_desc_init_packed(vq, size);
460 	} else {
461 		struct vring *vr = &vq->vq_split.ring;
462 
463 		vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
464 		vring_desc_init_split(vr->desc, size);
465 	}
466 	/*
467 	 * Disable device(host) interrupting guest
468 	 */
469 	virtqueue_disable_intr(vq);
470 }
471 
472 static int
473 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
474 {
475 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
476 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
477 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
478 	unsigned int vq_size, size;
479 	struct virtio_hw *hw = dev->data->dev_private;
480 	struct virtnet_rx *rxvq = NULL;
481 	struct virtnet_tx *txvq = NULL;
482 	struct virtnet_ctl *cvq = NULL;
483 	struct virtqueue *vq;
484 	size_t sz_hdr_mz = 0;
485 	void *sw_ring = NULL;
486 	int queue_type = virtio_get_queue_type(hw, queue_idx);
487 	int ret;
488 	int numa_node = dev->device->numa_node;
489 	struct rte_mbuf *fake_mbuf = NULL;
490 
491 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
492 			queue_idx, numa_node);
493 
494 	/*
495 	 * Read the virtqueue size from the Queue Size field
496 	 * Always power of 2 and if 0 virtqueue does not exist
497 	 */
498 	vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
499 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
500 	if (vq_size == 0) {
501 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
502 		return -EINVAL;
503 	}
504 
505 	if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
506 		PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
507 		return -EINVAL;
508 	}
509 
510 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
511 		 dev->data->port_id, queue_idx);
512 
513 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
514 				vq_size * sizeof(struct vq_desc_extra),
515 				RTE_CACHE_LINE_SIZE);
516 	if (queue_type == VTNET_TQ) {
517 		/*
518 		 * For each xmit packet, allocate a virtio_net_hdr
519 		 * and indirect ring elements
520 		 */
521 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
522 	} else if (queue_type == VTNET_CQ) {
523 		/* Allocate a page for control vq command, data and status */
524 		sz_hdr_mz = rte_mem_page_size();
525 	}
526 
527 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
528 				numa_node);
529 	if (vq == NULL) {
530 		PMD_INIT_LOG(ERR, "can not allocate vq");
531 		return -ENOMEM;
532 	}
533 	hw->vqs[queue_idx] = vq;
534 
535 	vq->hw = hw;
536 	vq->vq_queue_index = queue_idx;
537 	vq->vq_nentries = vq_size;
538 	if (virtio_with_packed_queue(hw)) {
539 		vq->vq_packed.used_wrap_counter = 1;
540 		vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
541 		vq->vq_packed.event_flags_shadow = 0;
542 		if (queue_type == VTNET_RQ)
543 			vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
544 	}
545 
546 	/*
547 	 * Reserve a memzone for vring elements
548 	 */
549 	size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
550 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
551 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
552 		     size, vq->vq_ring_size);
553 
554 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
555 			numa_node, RTE_MEMZONE_IOVA_CONTIG,
556 			VIRTIO_VRING_ALIGN);
557 	if (mz == NULL) {
558 		if (rte_errno == EEXIST)
559 			mz = rte_memzone_lookup(vq_name);
560 		if (mz == NULL) {
561 			ret = -ENOMEM;
562 			goto free_vq;
563 		}
564 	}
565 
566 	memset(mz->addr, 0, mz->len);
567 
568 	if (hw->use_va)
569 		vq->vq_ring_mem = (uintptr_t)mz->addr;
570 	else
571 		vq->vq_ring_mem = mz->iova;
572 
573 	vq->vq_ring_virt_mem = mz->addr;
574 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
575 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
576 
577 	virtio_init_vring(vq);
578 
579 	if (sz_hdr_mz) {
580 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
581 			 dev->data->port_id, queue_idx);
582 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
583 				numa_node, RTE_MEMZONE_IOVA_CONTIG,
584 				RTE_CACHE_LINE_SIZE);
585 		if (hdr_mz == NULL) {
586 			if (rte_errno == EEXIST)
587 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
588 			if (hdr_mz == NULL) {
589 				ret = -ENOMEM;
590 				goto free_mz;
591 			}
592 		}
593 	}
594 
595 	if (queue_type == VTNET_RQ) {
596 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
597 			       sizeof(vq->sw_ring[0]);
598 
599 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
600 				RTE_CACHE_LINE_SIZE, numa_node);
601 		if (!sw_ring) {
602 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
603 			ret = -ENOMEM;
604 			goto free_hdr_mz;
605 		}
606 
607 		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
608 				RTE_CACHE_LINE_SIZE, numa_node);
609 		if (!fake_mbuf) {
610 			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
611 			ret = -ENOMEM;
612 			goto free_sw_ring;
613 		}
614 
615 		vq->sw_ring = sw_ring;
616 		rxvq = &vq->rxq;
617 		rxvq->port_id = dev->data->port_id;
618 		rxvq->mz = mz;
619 		rxvq->fake_mbuf = fake_mbuf;
620 	} else if (queue_type == VTNET_TQ) {
621 		txvq = &vq->txq;
622 		txvq->port_id = dev->data->port_id;
623 		txvq->mz = mz;
624 		txvq->virtio_net_hdr_mz = hdr_mz;
625 		if (hw->use_va)
626 			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
627 		else
628 			txvq->virtio_net_hdr_mem = hdr_mz->iova;
629 	} else if (queue_type == VTNET_CQ) {
630 		cvq = &vq->cq;
631 		cvq->mz = mz;
632 		cvq->virtio_net_hdr_mz = hdr_mz;
633 		if (hw->use_va)
634 			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
635 		else
636 			cvq->virtio_net_hdr_mem = hdr_mz->iova;
637 		memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
638 
639 		hw->cvq = cvq;
640 	}
641 
642 	if (hw->use_va)
643 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
644 	else
645 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
646 
647 	if (queue_type == VTNET_TQ) {
648 		struct virtio_tx_region *txr;
649 		unsigned int i;
650 
651 		txr = hdr_mz->addr;
652 		memset(txr, 0, vq_size * sizeof(*txr));
653 		for (i = 0; i < vq_size; i++) {
654 			/* first indirect descriptor is always the tx header */
655 			if (!virtio_with_packed_queue(hw)) {
656 				struct vring_desc *start_dp = txr[i].tx_indir;
657 				vring_desc_init_split(start_dp,
658 						      RTE_DIM(txr[i].tx_indir));
659 				start_dp->addr = txvq->virtio_net_hdr_mem
660 					+ i * sizeof(*txr)
661 					+ offsetof(struct virtio_tx_region,
662 						   tx_hdr);
663 				start_dp->len = hw->vtnet_hdr_size;
664 				start_dp->flags = VRING_DESC_F_NEXT;
665 			} else {
666 				struct vring_packed_desc *start_dp =
667 					txr[i].tx_packed_indir;
668 				vring_desc_init_indirect_packed(start_dp,
669 				      RTE_DIM(txr[i].tx_packed_indir));
670 				start_dp->addr = txvq->virtio_net_hdr_mem
671 					+ i * sizeof(*txr)
672 					+ offsetof(struct virtio_tx_region,
673 						   tx_hdr);
674 				start_dp->len = hw->vtnet_hdr_size;
675 			}
676 		}
677 	}
678 
679 	if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
680 		PMD_INIT_LOG(ERR, "setup_queue failed");
681 		ret = -EINVAL;
682 		goto clean_vq;
683 	}
684 
685 	return 0;
686 
687 clean_vq:
688 	hw->cvq = NULL;
689 	rte_free(fake_mbuf);
690 free_sw_ring:
691 	rte_free(sw_ring);
692 free_hdr_mz:
693 	rte_memzone_free(hdr_mz);
694 free_mz:
695 	rte_memzone_free(mz);
696 free_vq:
697 	rte_free(vq);
698 	hw->vqs[queue_idx] = NULL;
699 
700 	return ret;
701 }
702 
703 static void
704 virtio_free_queues(struct virtio_hw *hw)
705 {
706 	uint16_t nr_vq = virtio_get_nr_vq(hw);
707 	struct virtqueue *vq;
708 	int queue_type;
709 	uint16_t i;
710 
711 	if (hw->vqs == NULL)
712 		return;
713 
714 	for (i = 0; i < nr_vq; i++) {
715 		vq = hw->vqs[i];
716 		if (!vq)
717 			continue;
718 
719 		queue_type = virtio_get_queue_type(hw, i);
720 		if (queue_type == VTNET_RQ) {
721 			rte_free(vq->rxq.fake_mbuf);
722 			rte_free(vq->sw_ring);
723 			rte_memzone_free(vq->rxq.mz);
724 		} else if (queue_type == VTNET_TQ) {
725 			rte_memzone_free(vq->txq.mz);
726 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
727 		} else {
728 			rte_memzone_free(vq->cq.mz);
729 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
730 		}
731 
732 		rte_free(vq);
733 		hw->vqs[i] = NULL;
734 	}
735 
736 	rte_free(hw->vqs);
737 	hw->vqs = NULL;
738 }
739 
740 static int
741 virtio_alloc_queues(struct rte_eth_dev *dev)
742 {
743 	struct virtio_hw *hw = dev->data->dev_private;
744 	uint16_t nr_vq = virtio_get_nr_vq(hw);
745 	uint16_t i;
746 	int ret;
747 
748 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
749 	if (!hw->vqs) {
750 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
751 		return -ENOMEM;
752 	}
753 
754 	for (i = 0; i < nr_vq; i++) {
755 		ret = virtio_init_queue(dev, i);
756 		if (ret < 0) {
757 			virtio_free_queues(hw);
758 			return ret;
759 		}
760 	}
761 
762 	return 0;
763 }
764 
765 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
766 
767 static void
768 virtio_free_rss(struct virtio_hw *hw)
769 {
770 	rte_free(hw->rss_key);
771 	hw->rss_key = NULL;
772 
773 	rte_free(hw->rss_reta);
774 	hw->rss_reta = NULL;
775 }
776 
777 int
778 virtio_dev_close(struct rte_eth_dev *dev)
779 {
780 	struct virtio_hw *hw = dev->data->dev_private;
781 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
782 
783 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
784 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
785 		return 0;
786 
787 	if (!hw->opened)
788 		return 0;
789 	hw->opened = 0;
790 
791 	/* reset the NIC */
792 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
793 		VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
794 	if (intr_conf->rxq)
795 		virtio_queues_unbind_intr(dev);
796 
797 	if (intr_conf->lsc || intr_conf->rxq) {
798 		virtio_intr_disable(dev);
799 		rte_intr_efd_disable(dev->intr_handle);
800 		rte_intr_vec_list_free(dev->intr_handle);
801 	}
802 
803 	virtio_reset(hw);
804 	virtio_dev_free_mbufs(dev);
805 	virtio_free_queues(hw);
806 	virtio_free_rss(hw);
807 
808 	return VIRTIO_OPS(hw)->dev_close(hw);
809 }
810 
811 static int
812 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
813 {
814 	struct virtio_hw *hw = dev->data->dev_private;
815 	struct virtio_pmd_ctrl ctrl;
816 	int dlen[1];
817 	int ret;
818 
819 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
820 		PMD_INIT_LOG(INFO, "host does not support rx control");
821 		return -ENOTSUP;
822 	}
823 
824 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
825 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
826 	ctrl.data[0] = 1;
827 	dlen[0] = 1;
828 
829 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
830 	if (ret) {
831 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
832 		return -EAGAIN;
833 	}
834 
835 	return 0;
836 }
837 
838 static int
839 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
840 {
841 	struct virtio_hw *hw = dev->data->dev_private;
842 	struct virtio_pmd_ctrl ctrl;
843 	int dlen[1];
844 	int ret;
845 
846 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
847 		PMD_INIT_LOG(INFO, "host does not support rx control");
848 		return -ENOTSUP;
849 	}
850 
851 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
852 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
853 	ctrl.data[0] = 0;
854 	dlen[0] = 1;
855 
856 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
857 	if (ret) {
858 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
859 		return -EAGAIN;
860 	}
861 
862 	return 0;
863 }
864 
865 static int
866 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
867 {
868 	struct virtio_hw *hw = dev->data->dev_private;
869 	struct virtio_pmd_ctrl ctrl;
870 	int dlen[1];
871 	int ret;
872 
873 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
874 		PMD_INIT_LOG(INFO, "host does not support rx control");
875 		return -ENOTSUP;
876 	}
877 
878 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
879 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
880 	ctrl.data[0] = 1;
881 	dlen[0] = 1;
882 
883 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
884 	if (ret) {
885 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
886 		return -EAGAIN;
887 	}
888 
889 	return 0;
890 }
891 
892 static int
893 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
894 {
895 	struct virtio_hw *hw = dev->data->dev_private;
896 	struct virtio_pmd_ctrl ctrl;
897 	int dlen[1];
898 	int ret;
899 
900 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
901 		PMD_INIT_LOG(INFO, "host does not support rx control");
902 		return -ENOTSUP;
903 	}
904 
905 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
906 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
907 	ctrl.data[0] = 0;
908 	dlen[0] = 1;
909 
910 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
911 	if (ret) {
912 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
913 		return -EAGAIN;
914 	}
915 
916 	return 0;
917 }
918 
919 uint16_t
920 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
921 {
922 	return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
923 }
924 
925 bool
926 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
927 			bool rx_scatter_enabled, const char **error)
928 {
929 	if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
930 		*error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
931 		return false;
932 	}
933 
934 	return true;
935 }
936 
937 static bool
938 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
939 				      uint16_t frame_size)
940 {
941 	struct virtio_hw *hw = dev->data->dev_private;
942 	struct virtnet_rx *rxvq;
943 	struct virtqueue *vq;
944 	unsigned int qidx;
945 	uint16_t buf_size;
946 	const char *error;
947 
948 	if (hw->vqs == NULL)
949 		return true;
950 
951 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
952 		vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX];
953 		if (vq == NULL)
954 			continue;
955 
956 		rxvq = &vq->rxq;
957 		if (rxvq->mpool == NULL)
958 			continue;
959 		buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
960 
961 		if (!virtio_rx_check_scatter(frame_size, buf_size,
962 					     hw->rx_ol_scatter, &error)) {
963 			PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
964 				     qidx, error);
965 			return false;
966 		}
967 	}
968 
969 	return true;
970 }
971 
972 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
973 static int
974 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
975 {
976 	struct virtio_hw *hw = dev->data->dev_private;
977 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
978 				 hw->vtnet_hdr_size;
979 	uint32_t frame_size = mtu + ether_hdr_len;
980 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
981 
982 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
983 
984 	if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
985 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
986 			RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
987 		return -EINVAL;
988 	}
989 
990 	if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
991 		PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
992 		return -EINVAL;
993 	}
994 
995 	hw->max_rx_pkt_len = frame_size;
996 
997 	return 0;
998 }
999 
1000 static int
1001 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1002 {
1003 	struct virtio_hw *hw = dev->data->dev_private;
1004 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1005 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1006 
1007 	virtqueue_enable_intr(vq);
1008 	virtio_mb(hw->weak_barriers);
1009 	return 0;
1010 }
1011 
1012 static int
1013 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1014 {
1015 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1016 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1017 
1018 	virtqueue_disable_intr(vq);
1019 	return 0;
1020 }
1021 
1022 static int
1023 virtio_dev_priv_dump(struct rte_eth_dev *dev, FILE *f)
1024 {
1025 	struct virtio_hw *hw = dev->data->dev_private;
1026 
1027 	fprintf(f, "guest_features: 0x%" PRIx64 "\n", hw->guest_features);
1028 	fprintf(f, "vtnet_hdr_size: %u\n", hw->vtnet_hdr_size);
1029 	fprintf(f, "use_vec: rx-%u tx-%u\n", hw->use_vec_rx, hw->use_vec_tx);
1030 	fprintf(f, "use_inorder: rx-%u tx-%u\n", hw->use_inorder_rx, hw->use_inorder_tx);
1031 	fprintf(f, "intr_lsc: %u\n", hw->intr_lsc);
1032 	fprintf(f, "max_mtu: %u\n", hw->max_mtu);
1033 	fprintf(f, "max_rx_pkt_len: %zu\n", hw->max_rx_pkt_len);
1034 	fprintf(f, "max_queue_pairs: %u\n", hw->max_queue_pairs);
1035 	fprintf(f, "req_guest_features: 0x%" PRIx64 "\n", hw->req_guest_features);
1036 
1037 	return 0;
1038 }
1039 
1040 /*
1041  * dev_ops for virtio, bare necessities for basic operation
1042  */
1043 static const struct eth_dev_ops virtio_eth_dev_ops = {
1044 	.dev_configure           = virtio_dev_configure,
1045 	.dev_start               = virtio_dev_start,
1046 	.dev_stop                = virtio_dev_stop,
1047 	.dev_close               = virtio_dev_close,
1048 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
1049 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
1050 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
1051 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
1052 	.mtu_set                 = virtio_mtu_set,
1053 	.dev_infos_get           = virtio_dev_info_get,
1054 	.stats_get               = virtio_dev_stats_get,
1055 	.xstats_get              = virtio_dev_xstats_get,
1056 	.xstats_get_names        = virtio_dev_xstats_get_names,
1057 	.stats_reset             = virtio_dev_stats_reset,
1058 	.xstats_reset            = virtio_dev_stats_reset,
1059 	.link_update             = virtio_dev_link_update,
1060 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
1061 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
1062 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
1063 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
1064 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
1065 	.rss_hash_update         = virtio_dev_rss_hash_update,
1066 	.rss_hash_conf_get       = virtio_dev_rss_hash_conf_get,
1067 	.reta_update             = virtio_dev_rss_reta_update,
1068 	.reta_query              = virtio_dev_rss_reta_query,
1069 	/* collect stats per queue */
1070 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1071 	.vlan_filter_set         = virtio_vlan_filter_set,
1072 	.mac_addr_add            = virtio_mac_addr_add,
1073 	.mac_addr_remove         = virtio_mac_addr_remove,
1074 	.mac_addr_set            = virtio_mac_addr_set,
1075 	.get_monitor_addr        = virtio_get_monitor_addr,
1076 	.eth_dev_priv_dump       = virtio_dev_priv_dump,
1077 };
1078 
1079 /*
1080  * dev_ops for virtio-user in secondary processes, as we just have
1081  * some limited supports currently.
1082  */
1083 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1084 	.dev_infos_get           = virtio_dev_info_get,
1085 	.stats_get               = virtio_dev_stats_get,
1086 	.xstats_get              = virtio_dev_xstats_get,
1087 	.xstats_get_names        = virtio_dev_xstats_get_names,
1088 	.stats_reset             = virtio_dev_stats_reset,
1089 	.xstats_reset            = virtio_dev_stats_reset,
1090 	/* collect stats per queue */
1091 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1092 };
1093 
1094 static void
1095 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1096 {
1097 	unsigned i;
1098 
1099 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1100 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1101 		if (txvq == NULL)
1102 			continue;
1103 
1104 		stats->opackets += txvq->stats.packets;
1105 		stats->obytes += txvq->stats.bytes;
1106 
1107 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1108 			stats->q_opackets[i] = txvq->stats.packets;
1109 			stats->q_obytes[i] = txvq->stats.bytes;
1110 		}
1111 	}
1112 
1113 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1114 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1115 		if (rxvq == NULL)
1116 			continue;
1117 
1118 		stats->ipackets += rxvq->stats.packets;
1119 		stats->ibytes += rxvq->stats.bytes;
1120 		stats->ierrors += rxvq->stats.errors;
1121 
1122 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1123 			stats->q_ipackets[i] = rxvq->stats.packets;
1124 			stats->q_ibytes[i] = rxvq->stats.bytes;
1125 		}
1126 	}
1127 
1128 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1129 }
1130 
1131 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1132 				       struct rte_eth_xstat_name *xstats_names,
1133 				       __rte_unused unsigned limit)
1134 {
1135 	unsigned i;
1136 	unsigned count = 0;
1137 	unsigned t;
1138 
1139 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1140 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1141 
1142 	if (xstats_names != NULL) {
1143 		/* Note: limit checked in rte_eth_xstats_names() */
1144 
1145 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1146 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1147 			if (rxvq == NULL)
1148 				continue;
1149 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1150 				snprintf(xstats_names[count].name,
1151 					sizeof(xstats_names[count].name),
1152 					"rx_q%u_%s", i,
1153 					rte_virtio_rxq_stat_strings[t].name);
1154 				count++;
1155 			}
1156 		}
1157 
1158 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1159 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
1160 			if (txvq == NULL)
1161 				continue;
1162 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1163 				snprintf(xstats_names[count].name,
1164 					sizeof(xstats_names[count].name),
1165 					"tx_q%u_%s", i,
1166 					rte_virtio_txq_stat_strings[t].name);
1167 				count++;
1168 			}
1169 		}
1170 		return count;
1171 	}
1172 	return nstats;
1173 }
1174 
1175 static int
1176 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1177 		      unsigned n)
1178 {
1179 	unsigned i;
1180 	unsigned count = 0;
1181 
1182 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1183 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1184 
1185 	if (n < nstats)
1186 		return nstats;
1187 
1188 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1189 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1190 
1191 		if (rxvq == NULL)
1192 			continue;
1193 
1194 		unsigned t;
1195 
1196 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1197 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1198 				rte_virtio_rxq_stat_strings[t].offset);
1199 			xstats[count].id = count;
1200 			count++;
1201 		}
1202 	}
1203 
1204 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1205 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1206 
1207 		if (txvq == NULL)
1208 			continue;
1209 
1210 		unsigned t;
1211 
1212 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1213 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
1214 				rte_virtio_txq_stat_strings[t].offset);
1215 			xstats[count].id = count;
1216 			count++;
1217 		}
1218 	}
1219 
1220 	return count;
1221 }
1222 
1223 static int
1224 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1225 {
1226 	virtio_update_stats(dev, stats);
1227 
1228 	return 0;
1229 }
1230 
1231 static int
1232 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1233 {
1234 	unsigned int i;
1235 
1236 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1237 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1238 		if (txvq == NULL)
1239 			continue;
1240 
1241 		txvq->stats.packets = 0;
1242 		txvq->stats.bytes = 0;
1243 		txvq->stats.multicast = 0;
1244 		txvq->stats.broadcast = 0;
1245 		memset(txvq->stats.size_bins, 0,
1246 		       sizeof(txvq->stats.size_bins[0]) * 8);
1247 	}
1248 
1249 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1250 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1251 		if (rxvq == NULL)
1252 			continue;
1253 
1254 		rxvq->stats.packets = 0;
1255 		rxvq->stats.bytes = 0;
1256 		rxvq->stats.errors = 0;
1257 		rxvq->stats.multicast = 0;
1258 		rxvq->stats.broadcast = 0;
1259 		memset(rxvq->stats.size_bins, 0,
1260 		       sizeof(rxvq->stats.size_bins[0]) * 8);
1261 	}
1262 
1263 	return 0;
1264 }
1265 
1266 static void
1267 virtio_set_hwaddr(struct virtio_hw *hw)
1268 {
1269 	virtio_write_dev_config(hw,
1270 			offsetof(struct virtio_net_config, mac),
1271 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1272 }
1273 
1274 static void
1275 virtio_get_hwaddr(struct virtio_hw *hw)
1276 {
1277 	if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1278 		virtio_read_dev_config(hw,
1279 			offsetof(struct virtio_net_config, mac),
1280 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1281 	} else {
1282 		rte_eth_random_addr(&hw->mac_addr[0]);
1283 		virtio_set_hwaddr(hw);
1284 	}
1285 }
1286 
1287 static int
1288 virtio_mac_table_set(struct virtio_hw *hw,
1289 		     const struct virtio_net_ctrl_mac *uc,
1290 		     const struct virtio_net_ctrl_mac *mc)
1291 {
1292 	struct virtio_pmd_ctrl ctrl;
1293 	int err, len[2];
1294 
1295 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1296 		PMD_DRV_LOG(INFO, "host does not support mac table");
1297 		return -1;
1298 	}
1299 
1300 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1301 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1302 
1303 	len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1304 	memcpy(ctrl.data, uc, len[0]);
1305 
1306 	len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1307 	memcpy(ctrl.data + len[0], mc, len[1]);
1308 
1309 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1310 	if (err != 0)
1311 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1312 	return err;
1313 }
1314 
1315 static int
1316 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1317 		    uint32_t index, uint32_t vmdq __rte_unused)
1318 {
1319 	struct virtio_hw *hw = dev->data->dev_private;
1320 	const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1321 	unsigned int i;
1322 	struct virtio_net_ctrl_mac *uc, *mc;
1323 
1324 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1325 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1326 		return -EINVAL;
1327 	}
1328 
1329 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1330 		sizeof(uc->entries));
1331 	uc->entries = 0;
1332 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1333 		sizeof(mc->entries));
1334 	mc->entries = 0;
1335 
1336 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1337 		const struct rte_ether_addr *addr
1338 			= (i == index) ? mac_addr : addrs + i;
1339 		struct virtio_net_ctrl_mac *tbl
1340 			= rte_is_multicast_ether_addr(addr) ? mc : uc;
1341 
1342 		memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1343 	}
1344 
1345 	return virtio_mac_table_set(hw, uc, mc);
1346 }
1347 
1348 static void
1349 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1350 {
1351 	struct virtio_hw *hw = dev->data->dev_private;
1352 	struct rte_ether_addr *addrs = dev->data->mac_addrs;
1353 	struct virtio_net_ctrl_mac *uc, *mc;
1354 	unsigned int i;
1355 
1356 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1357 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1358 		return;
1359 	}
1360 
1361 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1362 		sizeof(uc->entries));
1363 	uc->entries = 0;
1364 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1365 		sizeof(mc->entries));
1366 	mc->entries = 0;
1367 
1368 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1369 		struct virtio_net_ctrl_mac *tbl;
1370 
1371 		if (i == index || rte_is_zero_ether_addr(addrs + i))
1372 			continue;
1373 
1374 		tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1375 		memcpy(&tbl->macs[tbl->entries++], addrs + i,
1376 			RTE_ETHER_ADDR_LEN);
1377 	}
1378 
1379 	virtio_mac_table_set(hw, uc, mc);
1380 }
1381 
1382 static int
1383 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1384 {
1385 	struct virtio_hw *hw = dev->data->dev_private;
1386 
1387 	memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1388 
1389 	/* Use atomic update if available */
1390 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1391 		struct virtio_pmd_ctrl ctrl;
1392 		int len = RTE_ETHER_ADDR_LEN;
1393 
1394 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1395 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1396 
1397 		memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1398 		return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1399 	}
1400 
1401 	if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1402 		return -ENOTSUP;
1403 
1404 	virtio_set_hwaddr(hw);
1405 	return 0;
1406 }
1407 
1408 #define CLB_VAL_IDX 0
1409 #define CLB_MSK_IDX 1
1410 #define CLB_MATCH_IDX 2
1411 static int
1412 virtio_monitor_callback(const uint64_t value,
1413 		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1414 {
1415 	const uint64_t m = opaque[CLB_MSK_IDX];
1416 	const uint64_t v = opaque[CLB_VAL_IDX];
1417 	const uint64_t c = opaque[CLB_MATCH_IDX];
1418 
1419 	if (c)
1420 		return (value & m) == v ? -1 : 0;
1421 	else
1422 		return (value & m) == v ? 0 : -1;
1423 }
1424 
1425 static int
1426 virtio_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1427 {
1428 	struct virtnet_rx *rxvq = rx_queue;
1429 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1430 	struct virtio_hw *hw;
1431 
1432 	if (vq == NULL)
1433 		return -EINVAL;
1434 
1435 	hw = vq->hw;
1436 	if (virtio_with_packed_queue(hw)) {
1437 		struct vring_packed_desc *desc;
1438 		desc = vq->vq_packed.ring.desc;
1439 		pmc->addr = &desc[vq->vq_used_cons_idx].flags;
1440 		if (vq->vq_packed.used_wrap_counter)
1441 			pmc->opaque[CLB_VAL_IDX] =
1442 						VRING_PACKED_DESC_F_AVAIL_USED;
1443 		else
1444 			pmc->opaque[CLB_VAL_IDX] = 0;
1445 		pmc->opaque[CLB_MSK_IDX] = VRING_PACKED_DESC_F_AVAIL_USED;
1446 		pmc->opaque[CLB_MATCH_IDX] = 1;
1447 		pmc->size = sizeof(desc[vq->vq_used_cons_idx].flags);
1448 	} else {
1449 		pmc->addr = &vq->vq_split.ring.used->idx;
1450 		pmc->opaque[CLB_VAL_IDX] = vq->vq_used_cons_idx
1451 					& (vq->vq_nentries - 1);
1452 		pmc->opaque[CLB_MSK_IDX] = vq->vq_nentries - 1;
1453 		pmc->opaque[CLB_MATCH_IDX] = 0;
1454 		pmc->size = sizeof(vq->vq_split.ring.used->idx);
1455 	}
1456 	pmc->fn = virtio_monitor_callback;
1457 
1458 	return 0;
1459 }
1460 
1461 static int
1462 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1463 {
1464 	struct virtio_hw *hw = dev->data->dev_private;
1465 	struct virtio_pmd_ctrl ctrl;
1466 	int len;
1467 
1468 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1469 		return -ENOTSUP;
1470 
1471 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1472 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1473 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1474 	len = sizeof(vlan_id);
1475 
1476 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1477 }
1478 
1479 static int
1480 virtio_intr_unmask(struct rte_eth_dev *dev)
1481 {
1482 	struct virtio_hw *hw = dev->data->dev_private;
1483 
1484 	if (rte_intr_ack(dev->intr_handle) < 0)
1485 		return -1;
1486 
1487 	if (VIRTIO_OPS(hw)->intr_detect)
1488 		VIRTIO_OPS(hw)->intr_detect(hw);
1489 
1490 	return 0;
1491 }
1492 
1493 static int
1494 virtio_intr_enable(struct rte_eth_dev *dev)
1495 {
1496 	struct virtio_hw *hw = dev->data->dev_private;
1497 
1498 	if (rte_intr_enable(dev->intr_handle) < 0)
1499 		return -1;
1500 
1501 	if (VIRTIO_OPS(hw)->intr_detect)
1502 		VIRTIO_OPS(hw)->intr_detect(hw);
1503 
1504 	return 0;
1505 }
1506 
1507 static int
1508 virtio_intr_disable(struct rte_eth_dev *dev)
1509 {
1510 	struct virtio_hw *hw = dev->data->dev_private;
1511 
1512 	if (rte_intr_disable(dev->intr_handle) < 0)
1513 		return -1;
1514 
1515 	if (VIRTIO_OPS(hw)->intr_detect)
1516 		VIRTIO_OPS(hw)->intr_detect(hw);
1517 
1518 	return 0;
1519 }
1520 
1521 static int
1522 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1523 {
1524 	uint64_t host_features;
1525 
1526 	/* Prepare guest_features: feature that driver wants to support */
1527 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1528 		req_features);
1529 
1530 	/* Read device(host) feature bits */
1531 	host_features = VIRTIO_OPS(hw)->get_features(hw);
1532 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1533 		host_features);
1534 
1535 	/* If supported, ensure MTU value is valid before acknowledging it. */
1536 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1537 		struct virtio_net_config config;
1538 
1539 		virtio_read_dev_config(hw,
1540 			offsetof(struct virtio_net_config, mtu),
1541 			&config.mtu, sizeof(config.mtu));
1542 
1543 		if (config.mtu < RTE_ETHER_MIN_MTU)
1544 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1545 	}
1546 
1547 	/*
1548 	 * Negotiate features: Subset of device feature bits are written back
1549 	 * guest feature bits.
1550 	 */
1551 	hw->guest_features = req_features;
1552 	hw->guest_features = virtio_negotiate_features(hw, host_features);
1553 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1554 		hw->guest_features);
1555 
1556 	if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1557 		return -1;
1558 
1559 	if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1560 		virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1561 
1562 		if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1563 			PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1564 			return -1;
1565 		}
1566 	}
1567 
1568 	hw->req_guest_features = req_features;
1569 
1570 	return 0;
1571 }
1572 
1573 int
1574 virtio_dev_pause(struct rte_eth_dev *dev)
1575 {
1576 	struct virtio_hw *hw = dev->data->dev_private;
1577 
1578 	rte_spinlock_lock(&hw->state_lock);
1579 
1580 	if (hw->started == 0) {
1581 		/* Device is just stopped. */
1582 		rte_spinlock_unlock(&hw->state_lock);
1583 		return -1;
1584 	}
1585 	hw->started = 0;
1586 	/*
1587 	 * Prevent the worker threads from touching queues to avoid contention,
1588 	 * 1 ms should be enough for the ongoing Tx function to finish.
1589 	 */
1590 	rte_delay_ms(1);
1591 	return 0;
1592 }
1593 
1594 /*
1595  * Recover hw state to let the worker threads continue.
1596  */
1597 void
1598 virtio_dev_resume(struct rte_eth_dev *dev)
1599 {
1600 	struct virtio_hw *hw = dev->data->dev_private;
1601 
1602 	hw->started = 1;
1603 	rte_spinlock_unlock(&hw->state_lock);
1604 }
1605 
1606 /*
1607  * Should be called only after device is paused.
1608  */
1609 int
1610 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1611 		int nb_pkts)
1612 {
1613 	struct virtio_hw *hw = dev->data->dev_private;
1614 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1615 	int ret;
1616 
1617 	hw->inject_pkts = tx_pkts;
1618 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1619 	hw->inject_pkts = NULL;
1620 
1621 	return ret;
1622 }
1623 
1624 static void
1625 virtio_notify_peers(struct rte_eth_dev *dev)
1626 {
1627 	struct virtio_hw *hw = dev->data->dev_private;
1628 	struct virtnet_rx *rxvq;
1629 	struct rte_mbuf *rarp_mbuf;
1630 
1631 	if (!dev->data->rx_queues)
1632 		return;
1633 
1634 	rxvq = dev->data->rx_queues[0];
1635 	if (!rxvq)
1636 		return;
1637 
1638 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1639 			(struct rte_ether_addr *)hw->mac_addr);
1640 	if (rarp_mbuf == NULL) {
1641 		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1642 		return;
1643 	}
1644 
1645 	/* If virtio port just stopped, no need to send RARP */
1646 	if (virtio_dev_pause(dev) < 0) {
1647 		rte_pktmbuf_free(rarp_mbuf);
1648 		return;
1649 	}
1650 
1651 	virtio_inject_pkts(dev, &rarp_mbuf, 1);
1652 	virtio_dev_resume(dev);
1653 }
1654 
1655 static void
1656 virtio_ack_link_announce(struct rte_eth_dev *dev)
1657 {
1658 	struct virtio_hw *hw = dev->data->dev_private;
1659 	struct virtio_pmd_ctrl ctrl;
1660 
1661 	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1662 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1663 
1664 	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1665 }
1666 
1667 /*
1668  * Process virtio config changed interrupt. Call the callback
1669  * if link state changed, generate gratuitous RARP packet if
1670  * the status indicates an ANNOUNCE.
1671  */
1672 void
1673 virtio_interrupt_handler(void *param)
1674 {
1675 	struct rte_eth_dev *dev = param;
1676 	struct virtio_hw *hw = dev->data->dev_private;
1677 	uint8_t isr;
1678 	uint16_t status;
1679 
1680 	/* Read interrupt status which clears interrupt */
1681 	isr = virtio_get_isr(hw);
1682 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1683 
1684 	if (virtio_intr_unmask(dev) < 0)
1685 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1686 
1687 	if (isr & VIRTIO_ISR_CONFIG) {
1688 		if (virtio_dev_link_update(dev, 0) == 0)
1689 			rte_eth_dev_callback_process(dev,
1690 						     RTE_ETH_EVENT_INTR_LSC,
1691 						     NULL);
1692 
1693 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1694 			virtio_read_dev_config(hw,
1695 				offsetof(struct virtio_net_config, status),
1696 				&status, sizeof(status));
1697 			if (status & VIRTIO_NET_S_ANNOUNCE) {
1698 				virtio_notify_peers(dev);
1699 				if (hw->cvq)
1700 					virtio_ack_link_announce(dev);
1701 			}
1702 		}
1703 	}
1704 }
1705 
1706 /* set rx and tx handlers according to what is supported */
1707 static void
1708 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1709 {
1710 	struct virtio_hw *hw = eth_dev->data->dev_private;
1711 
1712 	eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1713 	if (virtio_with_packed_queue(hw)) {
1714 		PMD_INIT_LOG(INFO,
1715 			"virtio: using packed ring %s Tx path on port %u",
1716 			hw->use_vec_tx ? "vectorized" : "standard",
1717 			eth_dev->data->port_id);
1718 		if (hw->use_vec_tx)
1719 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1720 		else
1721 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1722 	} else {
1723 		if (hw->use_inorder_tx) {
1724 			PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1725 				eth_dev->data->port_id);
1726 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1727 		} else {
1728 			PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1729 				eth_dev->data->port_id);
1730 			eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1731 		}
1732 	}
1733 
1734 	if (virtio_with_packed_queue(hw)) {
1735 		if (hw->use_vec_rx) {
1736 			PMD_INIT_LOG(INFO,
1737 				"virtio: using packed ring vectorized Rx path on port %u",
1738 				eth_dev->data->port_id);
1739 			eth_dev->rx_pkt_burst =
1740 				&virtio_recv_pkts_packed_vec;
1741 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1742 			PMD_INIT_LOG(INFO,
1743 				"virtio: using packed ring mergeable buffer Rx path on port %u",
1744 				eth_dev->data->port_id);
1745 			eth_dev->rx_pkt_burst =
1746 				&virtio_recv_mergeable_pkts_packed;
1747 		} else {
1748 			PMD_INIT_LOG(INFO,
1749 				"virtio: using packed ring standard Rx path on port %u",
1750 				eth_dev->data->port_id);
1751 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1752 		}
1753 	} else {
1754 		if (hw->use_vec_rx) {
1755 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1756 				eth_dev->data->port_id);
1757 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1758 		} else if (hw->use_inorder_rx) {
1759 			PMD_INIT_LOG(INFO,
1760 				"virtio: using inorder Rx path on port %u",
1761 				eth_dev->data->port_id);
1762 			eth_dev->rx_pkt_burst =	&virtio_recv_pkts_inorder;
1763 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1764 			PMD_INIT_LOG(INFO,
1765 				"virtio: using mergeable buffer Rx path on port %u",
1766 				eth_dev->data->port_id);
1767 			eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1768 		} else {
1769 			PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1770 				eth_dev->data->port_id);
1771 			eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1772 		}
1773 	}
1774 
1775 }
1776 
1777 /* Only support 1:1 queue/interrupt mapping so far.
1778  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1779  * interrupt vectors (<N+1).
1780  */
1781 static int
1782 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1783 {
1784 	uint32_t i;
1785 	struct virtio_hw *hw = dev->data->dev_private;
1786 
1787 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1788 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1789 		if (rte_intr_vec_list_index_set(dev->intr_handle, i,
1790 						       i + 1))
1791 			return -rte_errno;
1792 		if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1793 						 VIRTIO_MSI_NO_VECTOR) {
1794 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1795 			return -EBUSY;
1796 		}
1797 	}
1798 
1799 	return 0;
1800 }
1801 
1802 static void
1803 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1804 {
1805 	uint32_t i;
1806 	struct virtio_hw *hw = dev->data->dev_private;
1807 
1808 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1809 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1810 		VIRTIO_OPS(hw)->set_queue_irq(hw,
1811 					     hw->vqs[i * VTNET_CQ],
1812 					     VIRTIO_MSI_NO_VECTOR);
1813 }
1814 
1815 static int
1816 virtio_configure_intr(struct rte_eth_dev *dev)
1817 {
1818 	struct virtio_hw *hw = dev->data->dev_private;
1819 
1820 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1821 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1822 		return -ENOTSUP;
1823 	}
1824 
1825 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1826 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1827 		return -1;
1828 	}
1829 
1830 	if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
1831 				    hw->max_queue_pairs)) {
1832 		PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1833 			     hw->max_queue_pairs);
1834 		return -ENOMEM;
1835 	}
1836 
1837 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1838 		/* Re-register callback to update max_intr */
1839 		rte_intr_callback_unregister(dev->intr_handle,
1840 					     virtio_interrupt_handler,
1841 					     dev);
1842 		rte_intr_callback_register(dev->intr_handle,
1843 					   virtio_interrupt_handler,
1844 					   dev);
1845 	}
1846 
1847 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1848 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1849 	 * And for legacy devices, this should be done before queue/vec binding
1850 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1851 	 * (22) will be ignored.
1852 	 */
1853 	if (virtio_intr_enable(dev) < 0) {
1854 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1855 		return -1;
1856 	}
1857 
1858 	if (virtio_queues_bind_intr(dev) < 0) {
1859 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1860 		return -1;
1861 	}
1862 
1863 	return 0;
1864 }
1865 
1866 static void
1867 virtio_get_speed_duplex(struct rte_eth_dev *eth_dev,
1868 			struct rte_eth_link *link)
1869 {
1870 	struct virtio_hw *hw = eth_dev->data->dev_private;
1871 	struct virtio_net_config *config;
1872 	struct virtio_net_config local_config;
1873 
1874 	config = &local_config;
1875 	virtio_read_dev_config(hw,
1876 		offsetof(struct virtio_net_config, speed),
1877 		&config->speed, sizeof(config->speed));
1878 	virtio_read_dev_config(hw,
1879 		offsetof(struct virtio_net_config, duplex),
1880 		&config->duplex, sizeof(config->duplex));
1881 	hw->speed = config->speed;
1882 	hw->duplex = config->duplex;
1883 	if (link != NULL) {
1884 		link->link_duplex = hw->duplex;
1885 		link->link_speed  = hw->speed;
1886 	}
1887 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1888 		     hw->speed, hw->duplex);
1889 }
1890 
1891 static uint64_t
1892 ethdev_to_virtio_rss_offloads(uint64_t ethdev_hash_types)
1893 {
1894 	uint64_t virtio_hash_types = 0;
1895 
1896 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1897 				RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1898 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV4;
1899 
1900 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1901 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV4;
1902 
1903 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1904 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV4;
1905 
1906 	if (ethdev_hash_types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1907 				RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1908 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV6;
1909 
1910 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1911 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV6;
1912 
1913 	if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1914 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV6;
1915 
1916 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_EX)
1917 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IP_EX;
1918 
1919 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_TCP_EX)
1920 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCP_EX;
1921 
1922 	if (ethdev_hash_types & RTE_ETH_RSS_IPV6_UDP_EX)
1923 		virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDP_EX;
1924 
1925 	return virtio_hash_types;
1926 }
1927 
1928 static uint64_t
1929 virtio_to_ethdev_rss_offloads(uint64_t virtio_hash_types)
1930 {
1931 	uint64_t rss_offloads = 0;
1932 
1933 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV4)
1934 		rss_offloads |= RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1935 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER;
1936 
1937 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV4)
1938 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
1939 
1940 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV4)
1941 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
1942 
1943 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV6)
1944 		rss_offloads |= RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1945 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
1946 
1947 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV6)
1948 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
1949 
1950 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV6)
1951 		rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
1952 
1953 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IP_EX)
1954 		rss_offloads |= RTE_ETH_RSS_IPV6_EX;
1955 
1956 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCP_EX)
1957 		rss_offloads |= RTE_ETH_RSS_IPV6_TCP_EX;
1958 
1959 	if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDP_EX)
1960 		rss_offloads |= RTE_ETH_RSS_IPV6_UDP_EX;
1961 
1962 	return rss_offloads;
1963 }
1964 
1965 static int
1966 virtio_dev_get_rss_config(struct virtio_hw *hw, uint32_t *rss_hash_types)
1967 {
1968 	struct virtio_net_config local_config;
1969 	struct virtio_net_config *config = &local_config;
1970 
1971 	virtio_read_dev_config(hw,
1972 			offsetof(struct virtio_net_config, rss_max_key_size),
1973 			&config->rss_max_key_size,
1974 			sizeof(config->rss_max_key_size));
1975 	if (config->rss_max_key_size < VIRTIO_NET_RSS_KEY_SIZE) {
1976 		PMD_INIT_LOG(ERR, "Invalid device RSS max key size (%u)",
1977 				config->rss_max_key_size);
1978 		return -EINVAL;
1979 	}
1980 
1981 	virtio_read_dev_config(hw,
1982 			offsetof(struct virtio_net_config,
1983 				rss_max_indirection_table_length),
1984 			&config->rss_max_indirection_table_length,
1985 			sizeof(config->rss_max_indirection_table_length));
1986 	if (config->rss_max_indirection_table_length < VIRTIO_NET_RSS_RETA_SIZE) {
1987 		PMD_INIT_LOG(ERR, "Invalid device RSS max reta size (%u)",
1988 				config->rss_max_indirection_table_length);
1989 		return -EINVAL;
1990 	}
1991 
1992 	virtio_read_dev_config(hw,
1993 			offsetof(struct virtio_net_config, supported_hash_types),
1994 			&config->supported_hash_types,
1995 			sizeof(config->supported_hash_types));
1996 	if ((config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK) == 0) {
1997 		PMD_INIT_LOG(ERR, "Invalid device RSS hash types (0x%x)",
1998 				config->supported_hash_types);
1999 		return -EINVAL;
2000 	}
2001 
2002 	*rss_hash_types = config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
2003 
2004 	PMD_INIT_LOG(DEBUG, "Device RSS config:");
2005 	PMD_INIT_LOG(DEBUG, "\t-Max key size: %u", config->rss_max_key_size);
2006 	PMD_INIT_LOG(DEBUG, "\t-Max reta size: %u", config->rss_max_indirection_table_length);
2007 	PMD_INIT_LOG(DEBUG, "\t-Supported hash types: 0x%x", *rss_hash_types);
2008 
2009 	return 0;
2010 }
2011 
2012 static int
2013 virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
2014 		struct rte_eth_rss_conf *rss_conf)
2015 {
2016 	struct virtio_hw *hw = dev->data->dev_private;
2017 	char old_rss_key[VIRTIO_NET_RSS_KEY_SIZE];
2018 	uint32_t old_hash_types;
2019 	uint16_t nb_queues;
2020 	int ret;
2021 
2022 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2023 		return -ENOTSUP;
2024 
2025 	if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(VIRTIO_NET_HASH_TYPE_MASK))
2026 		return -EINVAL;
2027 
2028 	old_hash_types = hw->rss_hash_types;
2029 	hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2030 
2031 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2032 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2033 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2034 					VIRTIO_NET_RSS_KEY_SIZE);
2035 			ret = -EINVAL;
2036 			goto restore_types;
2037 		}
2038 		memcpy(old_rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2039 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2040 	}
2041 
2042 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2043 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2044 	if (ret < 0) {
2045 		PMD_INIT_LOG(ERR, "Failed to apply new RSS config to the device");
2046 		goto restore_key;
2047 	}
2048 
2049 	return 0;
2050 restore_key:
2051 	if (rss_conf->rss_key && rss_conf->rss_key_len)
2052 		memcpy(hw->rss_key, old_rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2053 restore_types:
2054 	hw->rss_hash_types = old_hash_types;
2055 
2056 	return ret;
2057 }
2058 
2059 static int
2060 virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2061 		struct rte_eth_rss_conf *rss_conf)
2062 {
2063 	struct virtio_hw *hw = dev->data->dev_private;
2064 
2065 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2066 		return -ENOTSUP;
2067 
2068 	if (rss_conf->rss_key && rss_conf->rss_key_len >= VIRTIO_NET_RSS_KEY_SIZE)
2069 		memcpy(rss_conf->rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2070 	rss_conf->rss_key_len = VIRTIO_NET_RSS_KEY_SIZE;
2071 	rss_conf->rss_hf = virtio_to_ethdev_rss_offloads(hw->rss_hash_types);
2072 
2073 	return 0;
2074 }
2075 
2076 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
2077 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2078 			 uint16_t reta_size)
2079 {
2080 	struct virtio_hw *hw = dev->data->dev_private;
2081 	uint16_t nb_queues;
2082 	uint16_t old_reta[VIRTIO_NET_RSS_RETA_SIZE];
2083 	int idx, pos, i, ret;
2084 
2085 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2086 		return -ENOTSUP;
2087 
2088 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2089 		return -EINVAL;
2090 
2091 	memcpy(old_reta, hw->rss_reta, sizeof(old_reta));
2092 
2093 	for (i = 0; i < reta_size; i++) {
2094 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2095 		pos = i % RTE_ETH_RETA_GROUP_SIZE;
2096 
2097 		if (((reta_conf[idx].mask >> pos) & 0x1) == 0)
2098 			continue;
2099 
2100 		hw->rss_reta[i] = reta_conf[idx].reta[pos];
2101 	}
2102 
2103 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2104 	ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2105 	if (ret < 0) {
2106 		PMD_INIT_LOG(ERR, "Failed to apply new RETA to the device");
2107 		memcpy(hw->rss_reta, old_reta, sizeof(old_reta));
2108 	}
2109 
2110 	hw->rss_rx_queues = dev->data->nb_rx_queues;
2111 
2112 	return ret;
2113 }
2114 
2115 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
2116 			 struct rte_eth_rss_reta_entry64 *reta_conf,
2117 			 uint16_t reta_size)
2118 {
2119 	struct virtio_hw *hw = dev->data->dev_private;
2120 	int idx, i;
2121 
2122 	if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2123 		return -ENOTSUP;
2124 
2125 	if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2126 		return -EINVAL;
2127 
2128 	for (i = 0; i < reta_size; i++) {
2129 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
2130 		reta_conf[idx].reta[i % RTE_ETH_RETA_GROUP_SIZE] = hw->rss_reta[i];
2131 	}
2132 
2133 	return 0;
2134 }
2135 
2136 /*
2137  * As default RSS hash key, it uses the default key of the
2138  * Intel IXGBE devices. It can be updated by the application
2139  * with any 40B key value.
2140  */
2141 static uint8_t rss_intel_key[VIRTIO_NET_RSS_KEY_SIZE] = {
2142 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2143 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2144 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2145 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2146 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2147 };
2148 
2149 static int
2150 virtio_dev_rss_init(struct rte_eth_dev *eth_dev)
2151 {
2152 	struct virtio_hw *hw = eth_dev->data->dev_private;
2153 	uint16_t nb_rx_queues = eth_dev->data->nb_rx_queues;
2154 	struct rte_eth_rss_conf *rss_conf;
2155 	int ret, i;
2156 
2157 	if (!nb_rx_queues) {
2158 		PMD_INIT_LOG(ERR, "Cannot init RSS if no Rx queues");
2159 		return -EINVAL;
2160 	}
2161 
2162 	rss_conf = &eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
2163 
2164 	ret = virtio_dev_get_rss_config(hw, &hw->rss_hash_types);
2165 	if (ret)
2166 		return ret;
2167 
2168 	if (rss_conf->rss_hf) {
2169 		/*  Ensure requested hash types are supported by the device */
2170 		if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(hw->rss_hash_types))
2171 			return -EINVAL;
2172 
2173 		hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2174 	}
2175 
2176 	if (!hw->rss_key) {
2177 		/* Setup default RSS key if not already setup by the user */
2178 		hw->rss_key = rte_malloc_socket("rss_key",
2179 				VIRTIO_NET_RSS_KEY_SIZE, 0,
2180 				eth_dev->device->numa_node);
2181 		if (!hw->rss_key) {
2182 			PMD_INIT_LOG(ERR, "Failed to allocate RSS key");
2183 			return -1;
2184 		}
2185 	}
2186 
2187 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
2188 		if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2189 			PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2190 					VIRTIO_NET_RSS_KEY_SIZE);
2191 			return -EINVAL;
2192 		}
2193 		memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2194 	} else {
2195 		memcpy(hw->rss_key, rss_intel_key, VIRTIO_NET_RSS_KEY_SIZE);
2196 	}
2197 
2198 	if (!hw->rss_reta) {
2199 		/* Setup default RSS reta if not already setup by the user */
2200 		hw->rss_reta = rte_zmalloc_socket("rss_reta",
2201 				VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t), 0,
2202 				eth_dev->device->numa_node);
2203 		if (!hw->rss_reta) {
2204 			PMD_INIT_LOG(ERR, "Failed to allocate RSS reta");
2205 			return -1;
2206 		}
2207 
2208 		hw->rss_rx_queues = 0;
2209 	}
2210 
2211 	/* Re-initialize the RSS reta if the number of RX queues has changed */
2212 	if (hw->rss_rx_queues != nb_rx_queues) {
2213 		for (i = 0; i < VIRTIO_NET_RSS_RETA_SIZE; i++)
2214 			hw->rss_reta[i] = i % nb_rx_queues;
2215 		hw->rss_rx_queues = nb_rx_queues;
2216 	}
2217 
2218 	return 0;
2219 }
2220 
2221 #define DUPLEX_UNKNOWN   0xff
2222 /* reset device and renegotiate features if needed */
2223 static int
2224 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
2225 {
2226 	struct virtio_hw *hw = eth_dev->data->dev_private;
2227 	struct virtio_net_config *config;
2228 	struct virtio_net_config local_config;
2229 	int ret;
2230 
2231 	/* Reset the device although not necessary at startup */
2232 	virtio_reset(hw);
2233 
2234 	if (hw->vqs) {
2235 		virtio_dev_free_mbufs(eth_dev);
2236 		virtio_free_queues(hw);
2237 	}
2238 
2239 	/* Tell the host we've noticed this device. */
2240 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
2241 
2242 	/* Tell the host we've known how to drive the device. */
2243 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
2244 	if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
2245 		return -1;
2246 
2247 	hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
2248 
2249 	/* If host does not support both status and MSI-X then disable LSC */
2250 	if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
2251 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
2252 	else
2253 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2254 
2255 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2256 
2257 	/* Setting up rx_header size for the device */
2258 	if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
2259 	    virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2260 	    virtio_with_packed_queue(hw))
2261 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2262 	else
2263 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
2264 
2265 	/* Copy the permanent MAC address to: virtio_hw */
2266 	virtio_get_hwaddr(hw);
2267 	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
2268 			&eth_dev->data->mac_addrs[0]);
2269 	PMD_INIT_LOG(DEBUG,
2270 		     "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2271 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2272 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2273 
2274 	hw->get_speed_via_feat = hw->speed == RTE_ETH_SPEED_NUM_UNKNOWN &&
2275 			     virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX);
2276 	if (hw->get_speed_via_feat)
2277 		virtio_get_speed_duplex(eth_dev, NULL);
2278 	if (hw->duplex == DUPLEX_UNKNOWN)
2279 		hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
2280 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
2281 		hw->speed, hw->duplex);
2282 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
2283 		config = &local_config;
2284 
2285 		virtio_read_dev_config(hw,
2286 			offsetof(struct virtio_net_config, mac),
2287 			&config->mac, sizeof(config->mac));
2288 
2289 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2290 			virtio_read_dev_config(hw,
2291 				offsetof(struct virtio_net_config, status),
2292 				&config->status, sizeof(config->status));
2293 		} else {
2294 			PMD_INIT_LOG(DEBUG,
2295 				     "VIRTIO_NET_F_STATUS is not supported");
2296 			config->status = 0;
2297 		}
2298 
2299 		if (virtio_with_feature(hw, VIRTIO_NET_F_MQ) ||
2300 				virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2301 			virtio_read_dev_config(hw,
2302 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
2303 				&config->max_virtqueue_pairs,
2304 				sizeof(config->max_virtqueue_pairs));
2305 		} else {
2306 			PMD_INIT_LOG(DEBUG,
2307 				     "Neither VIRTIO_NET_F_MQ nor VIRTIO_NET_F_RSS are supported");
2308 			config->max_virtqueue_pairs = 1;
2309 		}
2310 
2311 		hw->max_queue_pairs = config->max_virtqueue_pairs;
2312 
2313 		if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
2314 			virtio_read_dev_config(hw,
2315 				offsetof(struct virtio_net_config, mtu),
2316 				&config->mtu,
2317 				sizeof(config->mtu));
2318 
2319 			/*
2320 			 * MTU value has already been checked at negotiation
2321 			 * time, but check again in case it has changed since
2322 			 * then, which should not happen.
2323 			 */
2324 			if (config->mtu < RTE_ETHER_MIN_MTU) {
2325 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
2326 						config->mtu);
2327 				return -1;
2328 			}
2329 
2330 			hw->max_mtu = config->mtu;
2331 			/* Set initial MTU to maximum one supported by vhost */
2332 			eth_dev->data->mtu = config->mtu;
2333 
2334 		} else {
2335 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2336 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
2337 		}
2338 
2339 		hw->rss_hash_types = 0;
2340 		if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2341 			if (virtio_dev_rss_init(eth_dev))
2342 				return -1;
2343 
2344 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
2345 				config->max_virtqueue_pairs);
2346 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
2347 		PMD_INIT_LOG(DEBUG,
2348 				"PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2349 				config->mac[0], config->mac[1],
2350 				config->mac[2], config->mac[3],
2351 				config->mac[4], config->mac[5]);
2352 	} else {
2353 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
2354 		hw->max_queue_pairs = 1;
2355 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2356 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
2357 	}
2358 
2359 	ret = virtio_alloc_queues(eth_dev);
2360 	if (ret < 0)
2361 		return ret;
2362 
2363 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
2364 		if (virtio_configure_intr(eth_dev) < 0) {
2365 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
2366 			virtio_free_queues(hw);
2367 			return -1;
2368 		}
2369 	}
2370 
2371 	virtio_reinit_complete(hw);
2372 
2373 	return 0;
2374 }
2375 
2376 /*
2377  * This function is based on probe() function in virtio_pci.c
2378  * It returns 0 on success.
2379  */
2380 int
2381 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
2382 {
2383 	struct virtio_hw *hw = eth_dev->data->dev_private;
2384 	uint32_t speed = RTE_ETH_SPEED_NUM_UNKNOWN;
2385 	int vectorized = 0;
2386 	int ret;
2387 
2388 	if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
2389 		PMD_INIT_LOG(ERR,
2390 			"Not sufficient headroom required = %d, avail = %d",
2391 			(int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
2392 			RTE_PKTMBUF_HEADROOM);
2393 
2394 		return -1;
2395 	}
2396 
2397 	eth_dev->dev_ops = &virtio_eth_dev_ops;
2398 
2399 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
2400 		set_rxtx_funcs(eth_dev);
2401 		return 0;
2402 	}
2403 
2404 	ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
2405 	if (ret < 0)
2406 		return ret;
2407 	hw->speed = speed;
2408 	hw->duplex = DUPLEX_UNKNOWN;
2409 
2410 	/* Allocate memory for storing MAC addresses */
2411 	eth_dev->data->mac_addrs = rte_zmalloc("virtio",
2412 				VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
2413 	if (eth_dev->data->mac_addrs == NULL) {
2414 		PMD_INIT_LOG(ERR,
2415 			"Failed to allocate %d bytes needed to store MAC addresses",
2416 			VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
2417 		return -ENOMEM;
2418 	}
2419 
2420 	rte_spinlock_init(&hw->state_lock);
2421 
2422 	/* reset device and negotiate default features */
2423 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
2424 	if (ret < 0)
2425 		goto err_virtio_init;
2426 
2427 	if (vectorized) {
2428 		if (!virtio_with_packed_queue(hw)) {
2429 			hw->use_vec_rx = 1;
2430 		} else {
2431 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
2432 			hw->use_vec_rx = 1;
2433 			hw->use_vec_tx = 1;
2434 #else
2435 			PMD_DRV_LOG(INFO,
2436 				"building environment do not support packed ring vectorized");
2437 #endif
2438 		}
2439 	}
2440 
2441 	hw->opened = 1;
2442 
2443 	return 0;
2444 
2445 err_virtio_init:
2446 	rte_free(eth_dev->data->mac_addrs);
2447 	eth_dev->data->mac_addrs = NULL;
2448 	return ret;
2449 }
2450 
2451 static uint32_t
2452 virtio_dev_speed_capa_get(uint32_t speed)
2453 {
2454 	switch (speed) {
2455 	case RTE_ETH_SPEED_NUM_10G:
2456 		return RTE_ETH_LINK_SPEED_10G;
2457 	case RTE_ETH_SPEED_NUM_20G:
2458 		return RTE_ETH_LINK_SPEED_20G;
2459 	case RTE_ETH_SPEED_NUM_25G:
2460 		return RTE_ETH_LINK_SPEED_25G;
2461 	case RTE_ETH_SPEED_NUM_40G:
2462 		return RTE_ETH_LINK_SPEED_40G;
2463 	case RTE_ETH_SPEED_NUM_50G:
2464 		return RTE_ETH_LINK_SPEED_50G;
2465 	case RTE_ETH_SPEED_NUM_56G:
2466 		return RTE_ETH_LINK_SPEED_56G;
2467 	case RTE_ETH_SPEED_NUM_100G:
2468 		return RTE_ETH_LINK_SPEED_100G;
2469 	case RTE_ETH_SPEED_NUM_200G:
2470 		return RTE_ETH_LINK_SPEED_200G;
2471 	default:
2472 		return 0;
2473 	}
2474 }
2475 
2476 static int vectorized_check_handler(__rte_unused const char *key,
2477 		const char *value, void *ret_val)
2478 {
2479 	if (strcmp(value, "1") == 0)
2480 		*(int *)ret_val = 1;
2481 	else
2482 		*(int *)ret_val = 0;
2483 
2484 	return 0;
2485 }
2486 
2487 #define VIRTIO_ARG_SPEED      "speed"
2488 #define VIRTIO_ARG_VECTORIZED "vectorized"
2489 
2490 static int
2491 link_speed_handler(const char *key __rte_unused,
2492 		const char *value, void *ret_val)
2493 {
2494 	uint32_t val;
2495 	if (!value || !ret_val)
2496 		return -EINVAL;
2497 	val = strtoul(value, NULL, 0);
2498 	/* validate input */
2499 	if (virtio_dev_speed_capa_get(val) == 0)
2500 		return -EINVAL;
2501 	*(uint32_t *)ret_val = val;
2502 
2503 	return 0;
2504 }
2505 
2506 
2507 static int
2508 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2509 {
2510 	struct rte_kvargs *kvlist;
2511 	int ret = 0;
2512 
2513 	if (devargs == NULL)
2514 		return 0;
2515 
2516 	kvlist = rte_kvargs_parse(devargs->args, NULL);
2517 	if (kvlist == NULL) {
2518 		PMD_INIT_LOG(ERR, "error when parsing param");
2519 		return 0;
2520 	}
2521 
2522 	if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2523 		ret = rte_kvargs_process(kvlist,
2524 					VIRTIO_ARG_SPEED,
2525 					link_speed_handler, speed);
2526 		if (ret < 0) {
2527 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2528 					VIRTIO_ARG_SPEED);
2529 			goto exit;
2530 		}
2531 	}
2532 
2533 	if (vectorized &&
2534 		rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2535 		ret = rte_kvargs_process(kvlist,
2536 				VIRTIO_ARG_VECTORIZED,
2537 				vectorized_check_handler, vectorized);
2538 		if (ret < 0) {
2539 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2540 					VIRTIO_ARG_VECTORIZED);
2541 			goto exit;
2542 		}
2543 	}
2544 
2545 exit:
2546 	rte_kvargs_free(kvlist);
2547 	return ret;
2548 }
2549 
2550 static uint8_t
2551 rx_offload_enabled(struct virtio_hw *hw)
2552 {
2553 	return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2554 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2555 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2556 }
2557 
2558 static uint8_t
2559 tx_offload_enabled(struct virtio_hw *hw)
2560 {
2561 	return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2562 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2563 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2564 }
2565 
2566 /*
2567  * Configure virtio device
2568  * It returns 0 on success.
2569  */
2570 static int
2571 virtio_dev_configure(struct rte_eth_dev *dev)
2572 {
2573 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2574 	const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2575 	struct virtio_hw *hw = dev->data->dev_private;
2576 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2577 		hw->vtnet_hdr_size;
2578 	uint64_t rx_offloads = rxmode->offloads;
2579 	uint64_t tx_offloads = txmode->offloads;
2580 	uint64_t req_features;
2581 	int ret;
2582 
2583 	PMD_INIT_LOG(DEBUG, "configure");
2584 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2585 
2586 	if (rxmode->mq_mode != RTE_ETH_MQ_RX_NONE && rxmode->mq_mode != RTE_ETH_MQ_RX_RSS) {
2587 		PMD_DRV_LOG(ERR,
2588 			"Unsupported Rx multi queue mode %d",
2589 			rxmode->mq_mode);
2590 		return -EINVAL;
2591 	}
2592 
2593 	if (txmode->mq_mode != RTE_ETH_MQ_TX_NONE) {
2594 		PMD_DRV_LOG(ERR,
2595 			"Unsupported Tx multi queue mode %d",
2596 			txmode->mq_mode);
2597 		return -EINVAL;
2598 	}
2599 
2600 	if (dev->data->dev_conf.intr_conf.rxq) {
2601 		ret = virtio_init_device(dev, hw->req_guest_features);
2602 		if (ret < 0)
2603 			return ret;
2604 	}
2605 
2606 	if (rxmode->mq_mode == RTE_ETH_MQ_RX_RSS)
2607 		req_features |= (1ULL << VIRTIO_NET_F_RSS);
2608 
2609 	if (rxmode->mtu > hw->max_mtu)
2610 		req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2611 
2612 	hw->max_rx_pkt_len = ether_hdr_len + rxmode->mtu;
2613 
2614 	if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2615 			   RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
2616 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2617 
2618 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
2619 		req_features |=
2620 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2621 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2622 
2623 	if (tx_offloads & (RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2624 			   RTE_ETH_TX_OFFLOAD_TCP_CKSUM))
2625 		req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2626 
2627 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)
2628 		req_features |=
2629 			(1ULL << VIRTIO_NET_F_HOST_TSO4) |
2630 			(1ULL << VIRTIO_NET_F_HOST_TSO6);
2631 
2632 	/* if request features changed, reinit the device */
2633 	if (req_features != hw->req_guest_features) {
2634 		ret = virtio_init_device(dev, req_features);
2635 		if (ret < 0)
2636 			return ret;
2637 	}
2638 
2639 	/* if queues are not allocated, reinit the device */
2640 	if (hw->vqs == NULL) {
2641 		ret = virtio_init_device(dev, hw->req_guest_features);
2642 		if (ret < 0)
2643 			return ret;
2644 	}
2645 
2646 	if ((rxmode->mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
2647 			!virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2648 		PMD_DRV_LOG(ERR, "RSS support requested but not supported by the device");
2649 		return -ENOTSUP;
2650 	}
2651 
2652 	if ((rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2653 			    RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) &&
2654 		!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2655 		PMD_DRV_LOG(ERR,
2656 			"rx checksum not available on this host");
2657 		return -ENOTSUP;
2658 	}
2659 
2660 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) &&
2661 		(!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2662 		 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2663 		PMD_DRV_LOG(ERR,
2664 			"Large Receive Offload not available on this host");
2665 		return -ENOTSUP;
2666 	}
2667 
2668 	/* start control queue */
2669 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2670 		virtio_dev_cq_start(dev);
2671 
2672 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2673 		hw->vlan_strip = 1;
2674 
2675 	hw->rx_ol_scatter = (rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
2676 
2677 	if ((rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2678 			!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2679 		PMD_DRV_LOG(ERR,
2680 			    "vlan filtering not available on this host");
2681 		return -ENOTSUP;
2682 	}
2683 
2684 	hw->has_tx_offload = tx_offload_enabled(hw);
2685 	hw->has_rx_offload = rx_offload_enabled(hw);
2686 
2687 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2688 		/* Enable vector (0) for Link State Interrupt */
2689 		if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2690 				VIRTIO_MSI_NO_VECTOR) {
2691 			PMD_DRV_LOG(ERR, "failed to set config vector");
2692 			return -EBUSY;
2693 		}
2694 
2695 	if (virtio_with_packed_queue(hw)) {
2696 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2697 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2698 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2699 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2700 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2701 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2702 			PMD_DRV_LOG(INFO,
2703 				"disabled packed ring vectorized path for requirements not met");
2704 			hw->use_vec_rx = 0;
2705 			hw->use_vec_tx = 0;
2706 		}
2707 #elif defined(RTE_ARCH_ARM)
2708 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2709 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2710 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2711 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2712 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2713 			PMD_DRV_LOG(INFO,
2714 				"disabled packed ring vectorized path for requirements not met");
2715 			hw->use_vec_rx = 0;
2716 			hw->use_vec_tx = 0;
2717 		}
2718 #else
2719 		hw->use_vec_rx = 0;
2720 		hw->use_vec_tx = 0;
2721 #endif
2722 
2723 		if (hw->use_vec_rx) {
2724 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2725 				PMD_DRV_LOG(INFO,
2726 					"disabled packed ring vectorized rx for mrg_rxbuf enabled");
2727 				hw->use_vec_rx = 0;
2728 			}
2729 
2730 			if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
2731 				PMD_DRV_LOG(INFO,
2732 					"disabled packed ring vectorized rx for TCP_LRO enabled");
2733 				hw->use_vec_rx = 0;
2734 			}
2735 		}
2736 	} else {
2737 		if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2738 			hw->use_inorder_tx = 1;
2739 			hw->use_inorder_rx = 1;
2740 			hw->use_vec_rx = 0;
2741 		}
2742 
2743 		if (hw->use_vec_rx) {
2744 #if defined RTE_ARCH_ARM
2745 			if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2746 				PMD_DRV_LOG(INFO,
2747 					"disabled split ring vectorized path for requirement not met");
2748 				hw->use_vec_rx = 0;
2749 			}
2750 #endif
2751 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2752 				PMD_DRV_LOG(INFO,
2753 					"disabled split ring vectorized rx for mrg_rxbuf enabled");
2754 				hw->use_vec_rx = 0;
2755 			}
2756 
2757 			if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2758 					   RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
2759 					   RTE_ETH_RX_OFFLOAD_TCP_LRO |
2760 					   RTE_ETH_RX_OFFLOAD_VLAN_STRIP)) {
2761 				PMD_DRV_LOG(INFO,
2762 					"disabled split ring vectorized rx for offloading enabled");
2763 				hw->use_vec_rx = 0;
2764 			}
2765 
2766 			if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2767 				PMD_DRV_LOG(INFO,
2768 					"disabled split ring vectorized rx, max SIMD bitwidth too low");
2769 				hw->use_vec_rx = 0;
2770 			}
2771 		}
2772 	}
2773 
2774 	return 0;
2775 }
2776 
2777 
2778 static int
2779 virtio_dev_start(struct rte_eth_dev *dev)
2780 {
2781 	uint16_t nb_queues, i;
2782 	struct virtqueue *vq;
2783 	struct virtio_hw *hw = dev->data->dev_private;
2784 	int ret;
2785 
2786 	/* Finish the initialization of the queues */
2787 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2788 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
2789 		if (ret < 0)
2790 			return ret;
2791 	}
2792 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2793 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
2794 		if (ret < 0)
2795 			return ret;
2796 	}
2797 
2798 	/* check if lsc interrupt feature is enabled */
2799 	if (dev->data->dev_conf.intr_conf.lsc) {
2800 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2801 			PMD_DRV_LOG(ERR, "link status not supported by host");
2802 			return -ENOTSUP;
2803 		}
2804 	}
2805 
2806 	/* Enable uio/vfio intr/eventfd mapping: although we already did that
2807 	 * in device configure, but it could be unmapped  when device is
2808 	 * stopped.
2809 	 */
2810 	if (dev->data->dev_conf.intr_conf.lsc ||
2811 	    dev->data->dev_conf.intr_conf.rxq) {
2812 		virtio_intr_disable(dev);
2813 
2814 		/* Setup interrupt callback  */
2815 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2816 			rte_intr_callback_register(dev->intr_handle,
2817 						   virtio_interrupt_handler,
2818 						   dev);
2819 
2820 		if (virtio_intr_enable(dev) < 0) {
2821 			PMD_DRV_LOG(ERR, "interrupt enable failed");
2822 			return -EIO;
2823 		}
2824 	}
2825 
2826 	/*Notify the backend
2827 	 *Otherwise the tap backend might already stop its queue due to fullness.
2828 	 *vhost backend will have no chance to be waked up
2829 	 */
2830 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2831 	if (hw->max_queue_pairs > 1) {
2832 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2833 			return -EINVAL;
2834 	}
2835 
2836 	PMD_INIT_LOG(DEBUG, "nb_queues=%u (port=%u)", nb_queues,
2837 		     dev->data->port_id);
2838 
2839 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2840 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2841 		/* Flush the old packets */
2842 		virtqueue_rxvq_flush(vq);
2843 		virtqueue_notify(vq);
2844 	}
2845 
2846 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2847 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2848 		virtqueue_notify(vq);
2849 	}
2850 
2851 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization (port=%u)",
2852 		     dev->data->port_id);
2853 
2854 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2855 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2856 		VIRTQUEUE_DUMP(vq);
2857 	}
2858 
2859 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2860 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2861 		VIRTQUEUE_DUMP(vq);
2862 	}
2863 
2864 	set_rxtx_funcs(dev);
2865 	hw->started = 1;
2866 
2867 	/* Initialize Link state */
2868 	virtio_dev_link_update(dev, 0);
2869 
2870 	return 0;
2871 }
2872 
2873 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2874 {
2875 	struct virtio_hw *hw = dev->data->dev_private;
2876 	uint16_t nr_vq = virtio_get_nr_vq(hw);
2877 	const char *type __rte_unused;
2878 	unsigned int i, mbuf_num = 0;
2879 	struct virtqueue *vq;
2880 	struct rte_mbuf *buf;
2881 	int queue_type;
2882 
2883 	if (hw->vqs == NULL)
2884 		return;
2885 
2886 	for (i = 0; i < nr_vq; i++) {
2887 		vq = hw->vqs[i];
2888 		if (!vq)
2889 			continue;
2890 
2891 		queue_type = virtio_get_queue_type(hw, i);
2892 		if (queue_type == VTNET_RQ)
2893 			type = "rxq";
2894 		else if (queue_type == VTNET_TQ)
2895 			type = "txq";
2896 		else
2897 			continue;
2898 
2899 		PMD_INIT_LOG(DEBUG,
2900 			"Before freeing %s[%d] used and unused buf",
2901 			type, i);
2902 		VIRTQUEUE_DUMP(vq);
2903 
2904 		while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2905 			rte_pktmbuf_free(buf);
2906 			mbuf_num++;
2907 		}
2908 
2909 		PMD_INIT_LOG(DEBUG,
2910 			"After freeing %s[%d] used and unused buf",
2911 			type, i);
2912 		VIRTQUEUE_DUMP(vq);
2913 	}
2914 
2915 	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2916 }
2917 
2918 static void
2919 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2920 {
2921 	struct virtio_hw *hw = dev->data->dev_private;
2922 	struct virtqueue *vq;
2923 	int qidx;
2924 	void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2925 
2926 	if (virtio_with_packed_queue(hw)) {
2927 		if (hw->use_vec_tx)
2928 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2929 		else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2930 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2931 		else
2932 			xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2933 	} else {
2934 		if (hw->use_inorder_tx)
2935 			xmit_cleanup = &virtio_xmit_cleanup_inorder;
2936 		else
2937 			xmit_cleanup = &virtio_xmit_cleanup;
2938 	}
2939 
2940 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2941 		vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2942 		if (vq != NULL)
2943 			xmit_cleanup(vq, virtqueue_nused(vq));
2944 	}
2945 }
2946 
2947 /*
2948  * Stop device: disable interrupt and mark link down
2949  */
2950 int
2951 virtio_dev_stop(struct rte_eth_dev *dev)
2952 {
2953 	struct virtio_hw *hw = dev->data->dev_private;
2954 	struct rte_eth_link link;
2955 	struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2956 
2957 	PMD_INIT_LOG(DEBUG, "stop");
2958 	dev->data->dev_started = 0;
2959 
2960 	rte_spinlock_lock(&hw->state_lock);
2961 	if (!hw->started)
2962 		goto out_unlock;
2963 	hw->started = 0;
2964 
2965 	virtio_tx_completed_cleanup(dev);
2966 
2967 	if (intr_conf->lsc || intr_conf->rxq) {
2968 		virtio_intr_disable(dev);
2969 
2970 		/* Reset interrupt callback  */
2971 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2972 			rte_intr_callback_unregister(dev->intr_handle,
2973 						     virtio_interrupt_handler,
2974 						     dev);
2975 		}
2976 	}
2977 
2978 	memset(&link, 0, sizeof(link));
2979 	rte_eth_linkstatus_set(dev, &link);
2980 out_unlock:
2981 	rte_spinlock_unlock(&hw->state_lock);
2982 
2983 	return 0;
2984 }
2985 
2986 static int
2987 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2988 {
2989 	struct rte_eth_link link;
2990 	uint16_t status;
2991 	struct virtio_hw *hw = dev->data->dev_private;
2992 
2993 	memset(&link, 0, sizeof(link));
2994 	link.link_duplex = hw->duplex;
2995 	link.link_speed  = hw->speed;
2996 	link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2997 
2998 	if (!hw->started) {
2999 		link.link_status = RTE_ETH_LINK_DOWN;
3000 		link.link_speed = RTE_ETH_SPEED_NUM_NONE;
3001 	} else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
3002 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
3003 		virtio_read_dev_config(hw,
3004 				offsetof(struct virtio_net_config, status),
3005 				&status, sizeof(status));
3006 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
3007 			link.link_status = RTE_ETH_LINK_DOWN;
3008 			link.link_speed = RTE_ETH_SPEED_NUM_NONE;
3009 			PMD_INIT_LOG(DEBUG, "Port %d is down",
3010 				     dev->data->port_id);
3011 		} else {
3012 			link.link_status = RTE_ETH_LINK_UP;
3013 			if (hw->get_speed_via_feat)
3014 				virtio_get_speed_duplex(dev, &link);
3015 			PMD_INIT_LOG(DEBUG, "Port %d is up",
3016 				     dev->data->port_id);
3017 		}
3018 	} else {
3019 		link.link_status = RTE_ETH_LINK_UP;
3020 		if (hw->get_speed_via_feat)
3021 			virtio_get_speed_duplex(dev, &link);
3022 	}
3023 
3024 	return rte_eth_linkstatus_set(dev, &link);
3025 }
3026 
3027 static int
3028 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
3029 {
3030 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
3031 	struct virtio_hw *hw = dev->data->dev_private;
3032 	uint64_t offloads = rxmode->offloads;
3033 
3034 	if (mask & RTE_ETH_VLAN_FILTER_MASK) {
3035 		if ((offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
3036 				!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
3037 
3038 			PMD_DRV_LOG(NOTICE,
3039 				"vlan filtering not available on this host");
3040 
3041 			return -ENOTSUP;
3042 		}
3043 	}
3044 
3045 	if (mask & RTE_ETH_VLAN_STRIP_MASK)
3046 		hw->vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
3047 
3048 	return 0;
3049 }
3050 
3051 static int
3052 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
3053 {
3054 	uint64_t tso_mask, host_features;
3055 	uint32_t rss_hash_types = 0;
3056 	struct virtio_hw *hw = dev->data->dev_private;
3057 	dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
3058 
3059 	dev_info->max_rx_queues =
3060 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
3061 	dev_info->max_tx_queues =
3062 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
3063 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
3064 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
3065 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
3066 	dev_info->max_mtu = hw->max_mtu;
3067 
3068 	host_features = VIRTIO_OPS(hw)->get_features(hw);
3069 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3070 	if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
3071 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SCATTER;
3072 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
3073 		dev_info->rx_offload_capa |=
3074 			RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3075 			RTE_ETH_RX_OFFLOAD_UDP_CKSUM;
3076 	}
3077 	if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
3078 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3079 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
3080 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
3081 	if ((host_features & tso_mask) == tso_mask)
3082 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3083 
3084 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
3085 				    RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
3086 	if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
3087 		dev_info->tx_offload_capa |=
3088 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
3089 			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
3090 	}
3091 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
3092 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
3093 	if ((host_features & tso_mask) == tso_mask)
3094 		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
3095 
3096 	if (host_features & (1ULL << VIRTIO_NET_F_RSS)) {
3097 		virtio_dev_get_rss_config(hw, &rss_hash_types);
3098 		dev_info->hash_key_size = VIRTIO_NET_RSS_KEY_SIZE;
3099 		dev_info->reta_size = VIRTIO_NET_RSS_RETA_SIZE;
3100 		dev_info->flow_type_rss_offloads =
3101 			virtio_to_ethdev_rss_offloads(rss_hash_types);
3102 	} else {
3103 		dev_info->hash_key_size = 0;
3104 		dev_info->reta_size = 0;
3105 		dev_info->flow_type_rss_offloads = 0;
3106 	}
3107 
3108 	if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
3109 		/*
3110 		 * According to 2.7 Packed Virtqueues,
3111 		 * 2.7.10.1 Structure Size and Alignment:
3112 		 * The Queue Size value does not have to be a power of 2.
3113 		 */
3114 		dev_info->rx_desc_lim.nb_max = UINT16_MAX;
3115 		dev_info->tx_desc_lim.nb_max = UINT16_MAX;
3116 	} else {
3117 		/*
3118 		 * According to 2.6 Split Virtqueues:
3119 		 * Queue Size value is always a power of 2. The maximum Queue
3120 		 * Size value is 32768.
3121 		 */
3122 		dev_info->rx_desc_lim.nb_max = 32768;
3123 		dev_info->tx_desc_lim.nb_max = 32768;
3124 	}
3125 	/*
3126 	 * Actual minimum is not the same for virtqueues of different kinds,
3127 	 * but to avoid tangling the code with separate branches, rely on
3128 	 * default thresholds since desc number must be at least of their size.
3129 	 */
3130 	dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
3131 					       RTE_VIRTIO_VPMD_RX_REARM_THRESH);
3132 	dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
3133 	dev_info->rx_desc_lim.nb_align = 1;
3134 	dev_info->tx_desc_lim.nb_align = 1;
3135 
3136 	return 0;
3137 }
3138 
3139 /*
3140  * It enables testpmd to collect per queue stats.
3141  */
3142 static int
3143 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
3144 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
3145 __rte_unused uint8_t is_rx)
3146 {
3147 	return 0;
3148 }
3149 
3150 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
3151 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);
3152