xref: /dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c (revision 592ab76f9f0f41993bebb44da85c37750a93ece9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4 
5 #include <sys/queue.h>
6 #include <stdio.h>
7 #include <errno.h>
8 #include <stdint.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <stdarg.h>
12 #include <fcntl.h>
13 #include <inttypes.h>
14 #include <rte_byteorder.h>
15 #include <rte_common.h>
16 #include <rte_cycles.h>
17 
18 #include <rte_interrupts.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_pci.h>
22 #include <rte_bus_pci.h>
23 #include <rte_branch_prediction.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_eal.h>
27 #include <rte_alarm.h>
28 #include <rte_ether.h>
29 #include <ethdev_driver.h>
30 #include <ethdev_pci.h>
31 #include <rte_string_fns.h>
32 #include <rte_malloc.h>
33 #include <rte_dev.h>
34 
35 #include "base/vmxnet3_defs.h"
36 
37 #include "vmxnet3_ring.h"
38 #include "vmxnet3_logs.h"
39 #include "vmxnet3_ethdev.h"
40 
41 #define	VMXNET3_TX_MAX_SEG	UINT8_MAX
42 
43 #define VMXNET3_TX_OFFLOAD_CAP		\
44 	(RTE_ETH_TX_OFFLOAD_VLAN_INSERT |	\
45 	 RTE_ETH_TX_OFFLOAD_TCP_CKSUM |	\
46 	 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |	\
47 	 RTE_ETH_TX_OFFLOAD_TCP_TSO |	\
48 	 RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
49 
50 #define VMXNET3_RX_OFFLOAD_CAP		\
51 	(RTE_ETH_RX_OFFLOAD_VLAN_STRIP |	\
52 	 RTE_ETH_RX_OFFLOAD_VLAN_FILTER |   \
53 	 RTE_ETH_RX_OFFLOAD_SCATTER |	\
54 	 RTE_ETH_RX_OFFLOAD_UDP_CKSUM |	\
55 	 RTE_ETH_RX_OFFLOAD_TCP_CKSUM |	\
56 	 RTE_ETH_RX_OFFLOAD_TCP_LRO |	\
57 	 RTE_ETH_RX_OFFLOAD_RSS_HASH)
58 
59 int vmxnet3_segs_dynfield_offset = -1;
60 
61 static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev);
62 static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev);
63 static int vmxnet3_dev_configure(struct rte_eth_dev *dev);
64 static int vmxnet3_dev_start(struct rte_eth_dev *dev);
65 static int vmxnet3_dev_stop(struct rte_eth_dev *dev);
66 static int vmxnet3_dev_close(struct rte_eth_dev *dev);
67 static int vmxnet3_dev_reset(struct rte_eth_dev *dev);
68 static void vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set);
69 static int vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev);
70 static int vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev);
71 static int vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev);
72 static int vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev);
73 static int __vmxnet3_dev_link_update(struct rte_eth_dev *dev,
74 				     int wait_to_complete);
75 static int vmxnet3_dev_link_update(struct rte_eth_dev *dev,
76 				   int wait_to_complete);
77 static void vmxnet3_hw_stats_save(struct vmxnet3_hw *hw);
78 static int vmxnet3_dev_stats_get(struct rte_eth_dev *dev,
79 				  struct rte_eth_stats *stats);
80 static int vmxnet3_dev_stats_reset(struct rte_eth_dev *dev);
81 static int vmxnet3_dev_xstats_get_names(struct rte_eth_dev *dev,
82 					struct rte_eth_xstat_name *xstats,
83 					unsigned int n);
84 static int vmxnet3_dev_xstats_get(struct rte_eth_dev *dev,
85 				  struct rte_eth_xstat *xstats, unsigned int n);
86 static int vmxnet3_dev_info_get(struct rte_eth_dev *dev,
87 				struct rte_eth_dev_info *dev_info);
88 static int vmxnet3_hw_ver_get(struct rte_eth_dev *dev,
89 			      char *fw_version, size_t fw_size);
90 static const uint32_t *
91 vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev);
92 static int vmxnet3_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
93 static int vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev,
94 				       uint16_t vid, int on);
95 static int vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
96 static int vmxnet3_mac_addr_set(struct rte_eth_dev *dev,
97 				 struct rte_ether_addr *mac_addr);
98 static void vmxnet3_process_events(struct rte_eth_dev *dev);
99 static void vmxnet3_interrupt_handler(void *param);
100 static int
101 vmxnet3_rss_reta_update(struct rte_eth_dev *dev,
102 			struct rte_eth_rss_reta_entry64 *reta_conf,
103 			uint16_t reta_size);
104 static int
105 vmxnet3_rss_reta_query(struct rte_eth_dev *dev,
106 		       struct rte_eth_rss_reta_entry64 *reta_conf,
107 		       uint16_t reta_size);
108 
109 static int vmxnet3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
110 						uint16_t queue_id);
111 static int vmxnet3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
112 						uint16_t queue_id);
113 
114 /*
115  * The set of PCI devices this driver supports
116  */
117 #define VMWARE_PCI_VENDOR_ID 0x15AD
118 #define VMWARE_DEV_ID_VMXNET3 0x07B0
119 static const struct rte_pci_id pci_id_vmxnet3_map[] = {
120 	{ RTE_PCI_DEVICE(VMWARE_PCI_VENDOR_ID, VMWARE_DEV_ID_VMXNET3) },
121 	{ .vendor_id = 0, /* sentinel */ },
122 };
123 
124 static const struct eth_dev_ops vmxnet3_eth_dev_ops = {
125 	.dev_configure        = vmxnet3_dev_configure,
126 	.dev_start            = vmxnet3_dev_start,
127 	.dev_stop             = vmxnet3_dev_stop,
128 	.dev_close            = vmxnet3_dev_close,
129 	.dev_reset            = vmxnet3_dev_reset,
130 	.link_update          = vmxnet3_dev_link_update,
131 	.promiscuous_enable   = vmxnet3_dev_promiscuous_enable,
132 	.promiscuous_disable  = vmxnet3_dev_promiscuous_disable,
133 	.allmulticast_enable  = vmxnet3_dev_allmulticast_enable,
134 	.allmulticast_disable = vmxnet3_dev_allmulticast_disable,
135 	.mac_addr_set         = vmxnet3_mac_addr_set,
136 	.mtu_set              = vmxnet3_dev_mtu_set,
137 	.stats_get            = vmxnet3_dev_stats_get,
138 	.stats_reset          = vmxnet3_dev_stats_reset,
139 	.xstats_get           = vmxnet3_dev_xstats_get,
140 	.xstats_get_names     = vmxnet3_dev_xstats_get_names,
141 	.dev_infos_get        = vmxnet3_dev_info_get,
142 	.fw_version_get       = vmxnet3_hw_ver_get,
143 	.dev_supported_ptypes_get = vmxnet3_dev_supported_ptypes_get,
144 	.vlan_filter_set      = vmxnet3_dev_vlan_filter_set,
145 	.vlan_offload_set     = vmxnet3_dev_vlan_offload_set,
146 	.rx_queue_setup       = vmxnet3_dev_rx_queue_setup,
147 	.rx_queue_release     = vmxnet3_dev_rx_queue_release,
148 	.rx_queue_intr_enable = vmxnet3_dev_rx_queue_intr_enable,
149 	.rx_queue_intr_disable = vmxnet3_dev_rx_queue_intr_disable,
150 	.tx_queue_setup       = vmxnet3_dev_tx_queue_setup,
151 	.tx_queue_release     = vmxnet3_dev_tx_queue_release,
152 	.reta_update          = vmxnet3_rss_reta_update,
153 	.reta_query           = vmxnet3_rss_reta_query,
154 };
155 
156 struct vmxnet3_xstats_name_off {
157 	char name[RTE_ETH_XSTATS_NAME_SIZE];
158 	unsigned int offset;
159 };
160 
161 /* tx_qX_ is prepended to the name string here */
162 static const struct vmxnet3_xstats_name_off vmxnet3_txq_stat_strings[] = {
163 	{"drop_total",         offsetof(struct vmxnet3_txq_stats, drop_total)},
164 	{"drop_too_many_segs", offsetof(struct vmxnet3_txq_stats, drop_too_many_segs)},
165 	{"drop_tso",           offsetof(struct vmxnet3_txq_stats, drop_tso)},
166 	{"tx_ring_full",       offsetof(struct vmxnet3_txq_stats, tx_ring_full)},
167 };
168 
169 /* rx_qX_ is prepended to the name string here */
170 static const struct vmxnet3_xstats_name_off vmxnet3_rxq_stat_strings[] = {
171 	{"drop_total",           offsetof(struct vmxnet3_rxq_stats, drop_total)},
172 	{"drop_err",             offsetof(struct vmxnet3_rxq_stats, drop_err)},
173 	{"drop_fcs",             offsetof(struct vmxnet3_rxq_stats, drop_fcs)},
174 	{"rx_buf_alloc_failure", offsetof(struct vmxnet3_rxq_stats, rx_buf_alloc_failure)},
175 };
176 
177 static const struct rte_memzone *
178 gpa_zone_reserve(struct rte_eth_dev *dev, uint32_t size,
179 		 const char *post_string, int socket_id,
180 		 uint16_t align, bool reuse)
181 {
182 	char z_name[RTE_MEMZONE_NAMESIZE];
183 	const struct rte_memzone *mz;
184 
185 	snprintf(z_name, sizeof(z_name), "eth_p%d_%s",
186 			dev->data->port_id, post_string);
187 
188 	mz = rte_memzone_lookup(z_name);
189 	if (!reuse) {
190 		if (mz)
191 			rte_memzone_free(mz);
192 		return rte_memzone_reserve_aligned(z_name, size, socket_id,
193 				RTE_MEMZONE_IOVA_CONTIG, align);
194 	}
195 
196 	if (mz)
197 		return mz;
198 
199 	return rte_memzone_reserve_aligned(z_name, size, socket_id,
200 			RTE_MEMZONE_IOVA_CONTIG, align);
201 }
202 
203 /*
204  * Enable the given interrupt
205  */
206 static void
207 vmxnet3_enable_intr(struct vmxnet3_hw *hw, unsigned int intr_idx)
208 {
209 	PMD_INIT_FUNC_TRACE();
210 	VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + intr_idx * 8, 0);
211 }
212 
213 /*
214  * Disable the given interrupt
215  */
216 static void
217 vmxnet3_disable_intr(struct vmxnet3_hw *hw, unsigned int intr_idx)
218 {
219 	PMD_INIT_FUNC_TRACE();
220 	VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + intr_idx * 8, 1);
221 }
222 
223 /*
224  * Simple helper to get intrCtrl and eventIntrIdx based on config and hw version
225  */
226 static void
227 vmxnet3_get_intr_ctrl_ev(struct vmxnet3_hw *hw,
228 			 uint8 **out_eventIntrIdx,
229 			 uint32 **out_intrCtrl)
230 {
231 
232 	if (VMXNET3_VERSION_GE_6(hw) && hw->queuesExtEnabled) {
233 		*out_eventIntrIdx = &hw->shared->devReadExt.intrConfExt.eventIntrIdx;
234 		*out_intrCtrl = &hw->shared->devReadExt.intrConfExt.intrCtrl;
235 	} else {
236 		*out_eventIntrIdx = &hw->shared->devRead.intrConf.eventIntrIdx;
237 		*out_intrCtrl = &hw->shared->devRead.intrConf.intrCtrl;
238 	}
239 }
240 
241 /*
242  * Disable all intrs used by the device
243  */
244 static void
245 vmxnet3_disable_all_intrs(struct vmxnet3_hw *hw)
246 {
247 	int i;
248 	uint8 *eventIntrIdx;
249 	uint32 *intrCtrl;
250 
251 	PMD_INIT_FUNC_TRACE();
252 	vmxnet3_get_intr_ctrl_ev(hw, &eventIntrIdx, &intrCtrl);
253 
254 	*intrCtrl |= rte_cpu_to_le_32(VMXNET3_IC_DISABLE_ALL);
255 
256 	for (i = 0; i < hw->intr.num_intrs; i++)
257 		vmxnet3_disable_intr(hw, i);
258 }
259 
260 /*
261  * Enable all intrs used by the device
262  */
263 static void
264 vmxnet3_enable_all_intrs(struct vmxnet3_hw *hw)
265 {
266 	uint8 *eventIntrIdx;
267 	uint32 *intrCtrl;
268 
269 	PMD_INIT_FUNC_TRACE();
270 	vmxnet3_get_intr_ctrl_ev(hw, &eventIntrIdx, &intrCtrl);
271 
272 	*intrCtrl &= rte_cpu_to_le_32(~VMXNET3_IC_DISABLE_ALL);
273 
274 	if (hw->intr.lsc_only) {
275 		vmxnet3_enable_intr(hw, *eventIntrIdx);
276 	} else {
277 		int i;
278 
279 		for (i = 0; i < hw->intr.num_intrs; i++)
280 			vmxnet3_enable_intr(hw, i);
281 	}
282 }
283 
284 /*
285  * Gets tx data ring descriptor size.
286  */
287 static uint16_t
288 eth_vmxnet3_txdata_get(struct vmxnet3_hw *hw)
289 {
290 	uint16 txdata_desc_size;
291 
292 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
293 			       VMXNET3_CMD_GET_TXDATA_DESC_SIZE);
294 	txdata_desc_size = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
295 
296 	return (txdata_desc_size < VMXNET3_TXDATA_DESC_MIN_SIZE ||
297 		txdata_desc_size > VMXNET3_TXDATA_DESC_MAX_SIZE ||
298 		txdata_desc_size & VMXNET3_TXDATA_DESC_SIZE_MASK) ?
299 		sizeof(struct Vmxnet3_TxDataDesc) : txdata_desc_size;
300 }
301 
302 /*
303  * It returns 0 on success.
304  */
305 static int
306 eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
307 {
308 	struct rte_pci_device *pci_dev;
309 	struct vmxnet3_hw *hw = eth_dev->data->dev_private;
310 	uint32_t mac_hi, mac_lo, ver;
311 	struct rte_eth_link link;
312 	static const struct rte_mbuf_dynfield vmxnet3_segs_dynfield_desc = {
313 		.name = VMXNET3_SEGS_DYNFIELD_NAME,
314 		.size = sizeof(vmxnet3_segs_dynfield_t),
315 		.align = __alignof__(vmxnet3_segs_dynfield_t),
316 	};
317 
318 	PMD_INIT_FUNC_TRACE();
319 
320 	eth_dev->dev_ops = &vmxnet3_eth_dev_ops;
321 	eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts;
322 	eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts;
323 	eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts;
324 	eth_dev->rx_queue_count = vmxnet3_dev_rx_queue_count;
325 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
326 
327 	/* extra mbuf field is required to guess MSS */
328 	vmxnet3_segs_dynfield_offset =
329 		rte_mbuf_dynfield_register(&vmxnet3_segs_dynfield_desc);
330 	if (vmxnet3_segs_dynfield_offset < 0) {
331 		PMD_INIT_LOG(ERR, "Cannot register mbuf field.");
332 		return -rte_errno;
333 	}
334 
335 	/*
336 	 * for secondary processes, we don't initialize any further as primary
337 	 * has already done this work.
338 	 */
339 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
340 		return 0;
341 
342 	rte_eth_copy_pci_info(eth_dev, pci_dev);
343 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
344 
345 	/* Vendor and Device ID need to be set before init of shared code */
346 	hw->device_id = pci_dev->id.device_id;
347 	hw->vendor_id = pci_dev->id.vendor_id;
348 	hw->hw_addr0 = (void *)pci_dev->mem_resource[0].addr;
349 	hw->hw_addr1 = (void *)pci_dev->mem_resource[1].addr;
350 
351 	hw->num_rx_queues = 1;
352 	hw->num_tx_queues = 1;
353 	hw->bufs_per_pkt = 1;
354 
355 	/* Check h/w version compatibility with driver. */
356 	ver = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_VRRS);
357 
358 	if (ver & (1 << VMXNET3_REV_6)) {
359 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS,
360 				       1 << VMXNET3_REV_6);
361 		hw->version = VMXNET3_REV_6 + 1;
362 	} else if (ver & (1 << VMXNET3_REV_5)) {
363 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS,
364 				       1 << VMXNET3_REV_5);
365 		hw->version = VMXNET3_REV_5 + 1;
366 	} else if (ver & (1 << VMXNET3_REV_4)) {
367 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS,
368 				       1 << VMXNET3_REV_4);
369 		hw->version = VMXNET3_REV_4 + 1;
370 	} else if (ver & (1 << VMXNET3_REV_3)) {
371 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS,
372 				       1 << VMXNET3_REV_3);
373 		hw->version = VMXNET3_REV_3 + 1;
374 	} else if (ver & (1 << VMXNET3_REV_2)) {
375 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS,
376 				       1 << VMXNET3_REV_2);
377 		hw->version = VMXNET3_REV_2 + 1;
378 	} else if (ver & (1 << VMXNET3_REV_1)) {
379 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_VRRS,
380 				       1 << VMXNET3_REV_1);
381 		hw->version = VMXNET3_REV_1 + 1;
382 	} else {
383 		PMD_INIT_LOG(ERR, "Incompatible hardware version: %d", ver);
384 		return -EIO;
385 	}
386 
387 	PMD_INIT_LOG(INFO, "Using device v%d", hw->version);
388 
389 	/* Check UPT version compatibility with driver. */
390 	ver = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_UVRS);
391 	PMD_INIT_LOG(DEBUG, "UPT hardware version : %d", ver);
392 	if (ver & 0x1)
393 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_UVRS, 1);
394 	else {
395 		PMD_INIT_LOG(ERR, "Incompatible UPT version.");
396 		return -EIO;
397 	}
398 
399 	/* Getting MAC Address */
400 	mac_lo = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACL);
401 	mac_hi = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACH);
402 	memcpy(hw->perm_addr, &mac_lo, 4);
403 	memcpy(hw->perm_addr + 4, &mac_hi, 2);
404 
405 	/* Allocate memory for storing MAC addresses */
406 	eth_dev->data->mac_addrs = rte_zmalloc("vmxnet3", RTE_ETHER_ADDR_LEN *
407 					       VMXNET3_MAX_MAC_ADDRS, 0);
408 	if (eth_dev->data->mac_addrs == NULL) {
409 		PMD_INIT_LOG(ERR,
410 			     "Failed to allocate %d bytes needed to store MAC addresses",
411 			     RTE_ETHER_ADDR_LEN * VMXNET3_MAX_MAC_ADDRS);
412 		return -ENOMEM;
413 	}
414 	/* Copy the permanent MAC address */
415 	rte_ether_addr_copy((struct rte_ether_addr *)hw->perm_addr,
416 			&eth_dev->data->mac_addrs[0]);
417 
418 	PMD_INIT_LOG(DEBUG, "MAC Address : " RTE_ETHER_ADDR_PRT_FMT,
419 		     hw->perm_addr[0], hw->perm_addr[1], hw->perm_addr[2],
420 		     hw->perm_addr[3], hw->perm_addr[4], hw->perm_addr[5]);
421 
422 	/* Put device in Quiesce Mode */
423 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV);
424 
425 	/* allow untagged pkts */
426 	VMXNET3_SET_VFTABLE_ENTRY(hw->shadow_vfta, 0);
427 
428 	hw->txdata_desc_size = VMXNET3_VERSION_GE_3(hw) ?
429 		eth_vmxnet3_txdata_get(hw) : sizeof(struct Vmxnet3_TxDataDesc);
430 
431 	hw->rxdata_desc_size = VMXNET3_VERSION_GE_3(hw) ?
432 		VMXNET3_DEF_RXDATA_DESC_SIZE : 0;
433 	RTE_ASSERT((hw->rxdata_desc_size & ~VMXNET3_RXDATA_DESC_SIZE_MASK) ==
434 		   hw->rxdata_desc_size);
435 
436 	/* clear shadow stats */
437 	memset(hw->saved_tx_stats, 0, sizeof(hw->saved_tx_stats));
438 	memset(hw->saved_rx_stats, 0, sizeof(hw->saved_rx_stats));
439 
440 	/* clear snapshot stats */
441 	memset(hw->snapshot_tx_stats, 0, sizeof(hw->snapshot_tx_stats));
442 	memset(hw->snapshot_rx_stats, 0, sizeof(hw->snapshot_rx_stats));
443 
444 	/* set the initial link status */
445 	memset(&link, 0, sizeof(link));
446 	link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
447 	link.link_speed = RTE_ETH_SPEED_NUM_10G;
448 	link.link_autoneg = RTE_ETH_LINK_FIXED;
449 	rte_eth_linkstatus_set(eth_dev, &link);
450 
451 	return 0;
452 }
453 
454 static int
455 eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
456 {
457 	struct vmxnet3_hw *hw = eth_dev->data->dev_private;
458 
459 	PMD_INIT_FUNC_TRACE();
460 
461 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
462 		return 0;
463 
464 	if (hw->adapter_stopped == 0) {
465 		PMD_INIT_LOG(DEBUG, "Device has not been closed.");
466 		return -EBUSY;
467 	}
468 
469 	return 0;
470 }
471 
472 static int eth_vmxnet3_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
473 	struct rte_pci_device *pci_dev)
474 {
475 	return rte_eth_dev_pci_generic_probe(pci_dev,
476 		sizeof(struct vmxnet3_hw), eth_vmxnet3_dev_init);
477 }
478 
479 static int eth_vmxnet3_pci_remove(struct rte_pci_device *pci_dev)
480 {
481 	return rte_eth_dev_pci_generic_remove(pci_dev, eth_vmxnet3_dev_uninit);
482 }
483 
484 static struct rte_pci_driver rte_vmxnet3_pmd = {
485 	.id_table = pci_id_vmxnet3_map,
486 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
487 	.probe = eth_vmxnet3_pci_probe,
488 	.remove = eth_vmxnet3_pci_remove,
489 };
490 
491 static void
492 vmxnet3_alloc_intr_resources(struct rte_eth_dev *dev)
493 {
494 	struct vmxnet3_hw *hw = dev->data->dev_private;
495 	uint32_t cfg;
496 	int nvec = 1; /* for link event */
497 
498 	/* intr settings */
499 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
500 			       VMXNET3_CMD_GET_CONF_INTR);
501 	cfg = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
502 	hw->intr.type = cfg & 0x3;
503 	hw->intr.mask_mode = (cfg >> 2) & 0x3;
504 
505 	if (hw->intr.type == VMXNET3_IT_AUTO)
506 		hw->intr.type = VMXNET3_IT_MSIX;
507 
508 	if (hw->intr.type == VMXNET3_IT_MSIX) {
509 		/* only support shared tx/rx intr */
510 		if (hw->num_tx_queues != hw->num_rx_queues)
511 			goto msix_err;
512 
513 		nvec += hw->num_rx_queues;
514 		hw->intr.num_intrs = nvec;
515 		return;
516 	}
517 
518 msix_err:
519 	/* the tx/rx queue interrupt will be disabled */
520 	hw->intr.num_intrs = 2;
521 	hw->intr.lsc_only = TRUE;
522 	PMD_INIT_LOG(INFO, "Enabled MSI-X with %d vectors", hw->intr.num_intrs);
523 }
524 
525 static int
526 vmxnet3_dev_configure(struct rte_eth_dev *dev)
527 {
528 	const struct rte_memzone *mz;
529 	struct vmxnet3_hw *hw = dev->data->dev_private;
530 	size_t size;
531 
532 	PMD_INIT_FUNC_TRACE();
533 
534 	if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
535 		dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
536 
537 	if (!VMXNET3_VERSION_GE_6(hw)) {
538 		if (!rte_is_power_of_2(dev->data->nb_rx_queues)) {
539 			PMD_INIT_LOG(ERR,
540 				     "ERROR: Number of rx queues not power of 2");
541 			return -EINVAL;
542 		}
543 	}
544 
545 	/* At this point, the number of queues requested has already
546 	 * been validated against dev_infos max queues by EAL
547 	 */
548 	if (dev->data->nb_rx_queues > VMXNET3_MAX_RX_QUEUES ||
549 	    dev->data->nb_tx_queues > VMXNET3_MAX_TX_QUEUES) {
550 		hw->queuesExtEnabled = 1;
551 	} else {
552 		hw->queuesExtEnabled = 0;
553 	}
554 
555 	size = dev->data->nb_rx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
556 		dev->data->nb_tx_queues * sizeof(struct Vmxnet3_RxQueueDesc);
557 
558 	if (size > UINT16_MAX)
559 		return -EINVAL;
560 
561 	hw->num_rx_queues = (uint8_t)dev->data->nb_rx_queues;
562 	hw->num_tx_queues = (uint8_t)dev->data->nb_tx_queues;
563 
564 	/*
565 	 * Allocate a memzone for Vmxnet3_DriverShared - Vmxnet3_DSDevRead
566 	 * on current socket
567 	 */
568 	mz = gpa_zone_reserve(dev, sizeof(struct Vmxnet3_DriverShared),
569 			      "shared", rte_socket_id(), 8, 1);
570 
571 	if (mz == NULL) {
572 		PMD_INIT_LOG(ERR, "ERROR: Creating shared zone");
573 		return -ENOMEM;
574 	}
575 	memset(mz->addr, 0, mz->len);
576 
577 	hw->shared = mz->addr;
578 	hw->sharedPA = mz->iova;
579 
580 	/*
581 	 * Allocate a memzone for Vmxnet3_RxQueueDesc - Vmxnet3_TxQueueDesc
582 	 * on current socket.
583 	 *
584 	 * We cannot reuse this memzone from previous allocation as its size
585 	 * depends on the number of tx and rx queues, which could be different
586 	 * from one config to another.
587 	 */
588 	mz = gpa_zone_reserve(dev, size, "queuedesc", rte_socket_id(),
589 			      VMXNET3_QUEUE_DESC_ALIGN, 0);
590 	if (mz == NULL) {
591 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
592 		return -ENOMEM;
593 	}
594 	memset(mz->addr, 0, mz->len);
595 
596 	hw->tqd_start = (Vmxnet3_TxQueueDesc *)mz->addr;
597 	hw->rqd_start = (Vmxnet3_RxQueueDesc *)(hw->tqd_start + hw->num_tx_queues);
598 
599 	hw->queueDescPA = mz->iova;
600 	hw->queue_desc_len = (uint16_t)size;
601 
602 	if (dev->data->dev_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_RSS) {
603 		/* Allocate memory structure for UPT1_RSSConf and configure */
604 		mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf),
605 				      "rss_conf", rte_socket_id(),
606 				      RTE_CACHE_LINE_SIZE, 1);
607 		if (mz == NULL) {
608 			PMD_INIT_LOG(ERR,
609 				     "ERROR: Creating rss_conf structure zone");
610 			return -ENOMEM;
611 		}
612 		memset(mz->addr, 0, mz->len);
613 
614 		hw->rss_conf = mz->addr;
615 		hw->rss_confPA = mz->iova;
616 	}
617 
618 	vmxnet3_alloc_intr_resources(dev);
619 
620 	return 0;
621 }
622 
623 static void
624 vmxnet3_write_mac(struct vmxnet3_hw *hw, const uint8_t *addr)
625 {
626 	uint32_t val;
627 
628 	PMD_INIT_LOG(DEBUG,
629 		     "Writing MAC Address : " RTE_ETHER_ADDR_PRT_FMT,
630 		     addr[0], addr[1], addr[2],
631 		     addr[3], addr[4], addr[5]);
632 
633 	memcpy(&val, addr, 4);
634 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_MACL, val);
635 
636 	memcpy(&val, addr + 4, 2);
637 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_MACH, val);
638 }
639 
640 /*
641  * Configure the hardware to generate MSI-X interrupts.
642  * If setting up MSIx fails, try setting up MSI (only 1 interrupt vector
643  * which will be disabled to allow lsc to work).
644  *
645  * Returns 0 on success and -1 otherwise.
646  */
647 static int
648 vmxnet3_configure_msix(struct rte_eth_dev *dev)
649 {
650 	struct vmxnet3_hw *hw = dev->data->dev_private;
651 	struct rte_intr_handle *intr_handle = dev->intr_handle;
652 	uint16_t intr_vector;
653 	int i;
654 
655 	hw->intr.event_intr_idx = 0;
656 
657 	/* only vfio-pci driver can support interrupt mode. */
658 	if (!rte_intr_cap_multiple(intr_handle) ||
659 	    dev->data->dev_conf.intr_conf.rxq == 0)
660 		return -1;
661 
662 	intr_vector = dev->data->nb_rx_queues;
663 	if (intr_vector > MAX_RX_QUEUES(hw)) {
664 		PMD_INIT_LOG(ERR, "At most %d intr queues supported",
665 			     MAX_RX_QUEUES(hw));
666 		return -ENOTSUP;
667 	}
668 
669 	if (rte_intr_efd_enable(intr_handle, intr_vector)) {
670 		PMD_INIT_LOG(ERR, "Failed to enable fastpath event fd");
671 		return -1;
672 	}
673 
674 	if (rte_intr_dp_is_en(intr_handle)) {
675 		if (rte_intr_vec_list_alloc(intr_handle, "intr_vec",
676 						   dev->data->nb_rx_queues)) {
677 			PMD_INIT_LOG(ERR, "Failed to allocate %d Rx queues intr_vec",
678 					dev->data->nb_rx_queues);
679 			rte_intr_efd_disable(intr_handle);
680 			return -ENOMEM;
681 		}
682 	}
683 
684 	if (!rte_intr_allow_others(intr_handle) &&
685 	    dev->data->dev_conf.intr_conf.lsc != 0) {
686 		PMD_INIT_LOG(ERR, "not enough intr vector to support both Rx interrupt and LSC");
687 		rte_intr_vec_list_free(intr_handle);
688 		rte_intr_efd_disable(intr_handle);
689 		return -1;
690 	}
691 
692 	/* if we cannot allocate one MSI-X vector per queue, don't enable
693 	 * interrupt mode.
694 	 */
695 	if (hw->intr.num_intrs !=
696 				(rte_intr_nb_efd_get(intr_handle) + 1)) {
697 		PMD_INIT_LOG(ERR, "Device configured with %d Rx intr vectors, expecting %d",
698 				hw->intr.num_intrs,
699 				rte_intr_nb_efd_get(intr_handle) + 1);
700 		rte_intr_vec_list_free(intr_handle);
701 		rte_intr_efd_disable(intr_handle);
702 		return -1;
703 	}
704 
705 	for (i = 0; i < dev->data->nb_rx_queues; i++)
706 		if (rte_intr_vec_list_index_set(intr_handle, i, i + 1))
707 			return -rte_errno;
708 
709 	for (i = 0; i < hw->intr.num_intrs; i++)
710 		hw->intr.mod_levels[i] = UPT1_IML_ADAPTIVE;
711 
712 	PMD_INIT_LOG(INFO, "intr type %u, mode %u, %u vectors allocated",
713 		    hw->intr.type, hw->intr.mask_mode, hw->intr.num_intrs);
714 
715 	return 0;
716 }
717 
718 static int
719 vmxnet3_dev_setup_memreg(struct rte_eth_dev *dev)
720 {
721 	struct vmxnet3_hw *hw = dev->data->dev_private;
722 	Vmxnet3_DriverShared *shared = hw->shared;
723 	Vmxnet3_CmdInfo *cmdInfo;
724 	struct rte_mempool *mp[VMXNET3_MAX_RX_QUEUES];
725 	uint8_t index[VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES];
726 	uint32_t num, i, j, size;
727 
728 	if (hw->memRegsPA == 0) {
729 		const struct rte_memzone *mz;
730 
731 		size = sizeof(Vmxnet3_MemRegs) +
732 			(VMXNET3_MAX_RX_QUEUES + VMXNET3_MAX_TX_QUEUES) *
733 			sizeof(Vmxnet3_MemoryRegion);
734 
735 		mz = gpa_zone_reserve(dev, size, "memRegs", rte_socket_id(), 8,
736 				      1);
737 		if (mz == NULL) {
738 			PMD_INIT_LOG(ERR, "ERROR: Creating memRegs zone");
739 			return -ENOMEM;
740 		}
741 		memset(mz->addr, 0, mz->len);
742 		hw->memRegs = mz->addr;
743 		hw->memRegsPA = mz->iova;
744 	}
745 
746 	num = hw->num_rx_queues;
747 
748 	for (i = 0; i < num; i++) {
749 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
750 
751 		mp[i] = rxq->mp;
752 		index[i] = 1 << i;
753 	}
754 
755 	/*
756 	 * The same mempool could be used by multiple queues. In such a case,
757 	 * remove duplicate mempool entries. Only one entry is kept with
758 	 * bitmask indicating queues that are using this mempool.
759 	 */
760 	for (i = 1; i < num; i++) {
761 		for (j = 0; j < i; j++) {
762 			if (mp[i] == mp[j]) {
763 				mp[i] = NULL;
764 				index[j] |= 1 << i;
765 				break;
766 			}
767 		}
768 	}
769 
770 	j = 0;
771 	for (i = 0; i < num; i++) {
772 		if (mp[i] == NULL)
773 			continue;
774 
775 		Vmxnet3_MemoryRegion *mr = &hw->memRegs->memRegs[j];
776 
777 		mr->startPA =
778 			(uintptr_t)STAILQ_FIRST(&mp[i]->mem_list)->iova;
779 		mr->length = STAILQ_FIRST(&mp[i]->mem_list)->len <= INT32_MAX ?
780 			STAILQ_FIRST(&mp[i]->mem_list)->len : INT32_MAX;
781 		mr->txQueueBits = index[i];
782 		mr->rxQueueBits = index[i];
783 
784 		PMD_INIT_LOG(INFO,
785 			     "index: %u startPA: %" PRIu64 " length: %u, "
786 			     "rxBits: %x",
787 			     j, mr->startPA, mr->length, mr->rxQueueBits);
788 		j++;
789 	}
790 	hw->memRegs->numRegs = j;
791 	PMD_INIT_LOG(INFO, "numRegs: %u", j);
792 
793 	size = sizeof(Vmxnet3_MemRegs) +
794 		(j - 1) * sizeof(Vmxnet3_MemoryRegion);
795 
796 	cmdInfo = &shared->cu.cmdInfo;
797 	cmdInfo->varConf.confVer = 1;
798 	cmdInfo->varConf.confLen = size;
799 	cmdInfo->varConf.confPA = hw->memRegsPA;
800 
801 	return 0;
802 }
803 
804 static int
805 vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
806 {
807 	struct rte_eth_conf port_conf = dev->data->dev_conf;
808 	struct vmxnet3_hw *hw = dev->data->dev_private;
809 	struct rte_intr_handle *intr_handle = dev->intr_handle;
810 	uint32_t mtu = dev->data->mtu;
811 	Vmxnet3_DriverShared *shared = hw->shared;
812 	Vmxnet3_DSDevRead *devRead = &shared->devRead;
813 	struct Vmxnet3_DSDevReadExt *devReadExt = &shared->devReadExt;
814 	uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
815 	uint32_t i;
816 	int ret;
817 
818 	hw->mtu = mtu;
819 
820 	shared->magic = VMXNET3_REV1_MAGIC;
821 	devRead->misc.driverInfo.version = VMXNET3_DRIVER_VERSION_NUM;
822 
823 	/* Setting up Guest OS information */
824 	devRead->misc.driverInfo.gos.gosBits   = sizeof(void *) == 4 ?
825 		VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64;
826 	devRead->misc.driverInfo.gos.gosType   = VMXNET3_GOS_TYPE_LINUX;
827 	devRead->misc.driverInfo.vmxnet3RevSpt = 1;
828 	devRead->misc.driverInfo.uptVerSpt     = 1;
829 
830 	devRead->misc.mtu = rte_le_to_cpu_32(mtu);
831 	devRead->misc.queueDescPA  = hw->queueDescPA;
832 	devRead->misc.queueDescLen = hw->queue_desc_len;
833 	devRead->misc.numTxQueues  = hw->num_tx_queues;
834 	devRead->misc.numRxQueues  = hw->num_rx_queues;
835 
836 	for (i = 0; i < hw->num_tx_queues; i++) {
837 		Vmxnet3_TxQueueDesc *tqd = &hw->tqd_start[i];
838 		vmxnet3_tx_queue_t *txq  = dev->data->tx_queues[i];
839 
840 		txq->shared = &hw->tqd_start[i];
841 
842 		tqd->ctrl.txNumDeferred  = 0;
843 		tqd->ctrl.txThreshold    = 1;
844 		tqd->conf.txRingBasePA   = txq->cmd_ring.basePA;
845 		tqd->conf.compRingBasePA = txq->comp_ring.basePA;
846 		tqd->conf.dataRingBasePA = txq->data_ring.basePA;
847 
848 		tqd->conf.txRingSize   = txq->cmd_ring.size;
849 		tqd->conf.compRingSize = txq->comp_ring.size;
850 		tqd->conf.dataRingSize = txq->data_ring.size;
851 		tqd->conf.txDataRingDescSize = txq->txdata_desc_size;
852 
853 		if (hw->intr.lsc_only)
854 			tqd->conf.intrIdx = 1;
855 		else
856 			tqd->conf.intrIdx =
857 				rte_intr_vec_list_index_get(intr_handle,
858 								   i);
859 		tqd->status.stopped = TRUE;
860 		tqd->status.error   = 0;
861 		memset(&tqd->stats, 0, sizeof(tqd->stats));
862 	}
863 
864 	for (i = 0; i < hw->num_rx_queues; i++) {
865 		Vmxnet3_RxQueueDesc *rqd  = &hw->rqd_start[i];
866 		vmxnet3_rx_queue_t *rxq   = dev->data->rx_queues[i];
867 
868 		rxq->shared = &hw->rqd_start[i];
869 
870 		rqd->conf.rxRingBasePA[0] = rxq->cmd_ring[0].basePA;
871 		rqd->conf.rxRingBasePA[1] = rxq->cmd_ring[1].basePA;
872 		rqd->conf.compRingBasePA  = rxq->comp_ring.basePA;
873 
874 		rqd->conf.rxRingSize[0]   = rxq->cmd_ring[0].size;
875 		rqd->conf.rxRingSize[1]   = rxq->cmd_ring[1].size;
876 		rqd->conf.compRingSize    = rxq->comp_ring.size;
877 
878 		if (VMXNET3_VERSION_GE_3(hw)) {
879 			rqd->conf.rxDataRingBasePA = rxq->data_ring.basePA;
880 			rqd->conf.rxDataRingDescSize = rxq->data_desc_size;
881 		}
882 
883 		if (hw->intr.lsc_only)
884 			rqd->conf.intrIdx = 1;
885 		else
886 			rqd->conf.intrIdx =
887 				rte_intr_vec_list_index_get(intr_handle,
888 								   i);
889 		rqd->status.stopped = TRUE;
890 		rqd->status.error   = 0;
891 		memset(&rqd->stats, 0, sizeof(rqd->stats));
892 	}
893 
894 	/* intr settings */
895 	if (VMXNET3_VERSION_GE_6(hw) && hw->queuesExtEnabled) {
896 		devReadExt->intrConfExt.autoMask = hw->intr.mask_mode ==
897 						   VMXNET3_IMM_AUTO;
898 		devReadExt->intrConfExt.numIntrs = hw->intr.num_intrs;
899 		for (i = 0; i < hw->intr.num_intrs; i++)
900 			devReadExt->intrConfExt.modLevels[i] =
901 				hw->intr.mod_levels[i];
902 
903 		devReadExt->intrConfExt.eventIntrIdx = hw->intr.event_intr_idx;
904 		devReadExt->intrConfExt.intrCtrl |=
905 			rte_cpu_to_le_32(VMXNET3_IC_DISABLE_ALL);
906 	} else {
907 		devRead->intrConf.autoMask = hw->intr.mask_mode ==
908 					     VMXNET3_IMM_AUTO;
909 		devRead->intrConf.numIntrs = hw->intr.num_intrs;
910 		for (i = 0; i < hw->intr.num_intrs; i++)
911 			devRead->intrConf.modLevels[i] = hw->intr.mod_levels[i];
912 
913 		devRead->intrConf.eventIntrIdx = hw->intr.event_intr_idx;
914 		devRead->intrConf.intrCtrl |= rte_cpu_to_le_32(VMXNET3_IC_DISABLE_ALL);
915 	}
916 
917 	/* RxMode set to 0 of VMXNET3_RXM_xxx */
918 	devRead->rxFilterConf.rxMode = 0;
919 
920 	/* Setting up feature flags */
921 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
922 		devRead->misc.uptFeatures |= VMXNET3_F_RXCSUM;
923 
924 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
925 		devRead->misc.uptFeatures |= VMXNET3_F_LRO;
926 		devRead->misc.maxNumRxSG = 0;
927 	}
928 
929 	if (port_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_RSS) {
930 		ret = vmxnet3_rss_configure(dev);
931 		if (ret != VMXNET3_SUCCESS)
932 			return ret;
933 
934 		devRead->misc.uptFeatures |= VMXNET3_F_RSS;
935 		devRead->rssConfDesc.confVer = 1;
936 		devRead->rssConfDesc.confLen = sizeof(struct VMXNET3_RSSConf);
937 		devRead->rssConfDesc.confPA  = hw->rss_confPA;
938 	}
939 
940 	ret = vmxnet3_dev_vlan_offload_set(dev,
941 			RTE_ETH_VLAN_STRIP_MASK | RTE_ETH_VLAN_FILTER_MASK);
942 	if (ret)
943 		return ret;
944 
945 	vmxnet3_write_mac(hw, dev->data->mac_addrs->addr_bytes);
946 
947 	return VMXNET3_SUCCESS;
948 }
949 
950 /*
951  * Configure device link speed and setup link.
952  * Must be called after eth_vmxnet3_dev_init. Other wise it might fail
953  * It returns 0 on success.
954  */
955 static int
956 vmxnet3_dev_start(struct rte_eth_dev *dev)
957 {
958 	int ret;
959 	struct vmxnet3_hw *hw = dev->data->dev_private;
960 
961 	PMD_INIT_FUNC_TRACE();
962 
963 	/* Save stats before it is reset by CMD_ACTIVATE */
964 	vmxnet3_hw_stats_save(hw);
965 
966 	/* configure MSI-X */
967 	ret = vmxnet3_configure_msix(dev);
968 	if (ret < 0) {
969 		/* revert to lsc only */
970 		hw->intr.num_intrs = 2;
971 		hw->intr.lsc_only = TRUE;
972 	}
973 
974 	ret = vmxnet3_setup_driver_shared(dev);
975 	if (ret != VMXNET3_SUCCESS)
976 		return ret;
977 
978 	/* Exchange shared data with device */
979 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAL,
980 			       VMXNET3_GET_ADDR_LO(hw->sharedPA));
981 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAH,
982 			       VMXNET3_GET_ADDR_HI(hw->sharedPA));
983 
984 	/* Activate device by register write */
985 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_ACTIVATE_DEV);
986 	ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
987 
988 	if (ret != 0) {
989 		PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL");
990 		return -EINVAL;
991 	}
992 
993 	/* Check memregs restrictions first */
994 	if (dev->data->nb_rx_queues <= VMXNET3_MAX_RX_QUEUES &&
995 	    dev->data->nb_tx_queues <= VMXNET3_MAX_TX_QUEUES) {
996 		ret = vmxnet3_dev_setup_memreg(dev);
997 		if (ret == 0) {
998 			VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
999 					VMXNET3_CMD_REGISTER_MEMREGS);
1000 			ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1001 			if (ret != 0)
1002 				PMD_INIT_LOG(DEBUG,
1003 					"Failed in setup memory region cmd\n");
1004 			ret = 0;
1005 		} else {
1006 			PMD_INIT_LOG(DEBUG, "Failed to setup memory region\n");
1007 		}
1008 	} else {
1009 		PMD_INIT_LOG(WARNING, "Memregs can't init (rx: %d, tx: %d)",
1010 			     dev->data->nb_rx_queues, dev->data->nb_tx_queues);
1011 	}
1012 
1013 	if (VMXNET3_VERSION_GE_4(hw) &&
1014 	    dev->data->dev_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_RSS) {
1015 		/* Check for additional RSS  */
1016 		ret = vmxnet3_v4_rss_configure(dev);
1017 		if (ret != VMXNET3_SUCCESS) {
1018 			PMD_INIT_LOG(ERR, "Failed to configure v4 RSS");
1019 			return ret;
1020 		}
1021 	}
1022 
1023 	/*
1024 	 * Load RX queues with blank mbufs and update next2fill index for device
1025 	 * Update RxMode of the device
1026 	 */
1027 	ret = vmxnet3_dev_rxtx_init(dev);
1028 	if (ret != VMXNET3_SUCCESS) {
1029 		PMD_INIT_LOG(ERR, "Device queue init: UNSUCCESSFUL");
1030 		return ret;
1031 	}
1032 
1033 	hw->adapter_stopped = FALSE;
1034 
1035 	/* Setting proper Rx Mode and issue Rx Mode Update command */
1036 	vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_UCAST | VMXNET3_RXM_BCAST, 1);
1037 
1038 	/* Setup interrupt callback  */
1039 	rte_intr_callback_register(dev->intr_handle,
1040 				   vmxnet3_interrupt_handler, dev);
1041 
1042 	if (rte_intr_enable(dev->intr_handle) < 0) {
1043 		PMD_INIT_LOG(ERR, "interrupt enable failed");
1044 		return -EIO;
1045 	}
1046 
1047 	/* enable all intrs */
1048 	vmxnet3_enable_all_intrs(hw);
1049 
1050 	vmxnet3_process_events(dev);
1051 
1052 	/*
1053 	 * Update link state from device since this won't be
1054 	 * done upon starting with lsc in use. This is done
1055 	 * only after enabling interrupts to avoid any race
1056 	 * where the link state could change without an
1057 	 * interrupt being fired.
1058 	 */
1059 	__vmxnet3_dev_link_update(dev, 0);
1060 
1061 	return VMXNET3_SUCCESS;
1062 }
1063 
1064 /*
1065  * Stop device: disable rx and tx functions to allow for reconfiguring.
1066  */
1067 static int
1068 vmxnet3_dev_stop(struct rte_eth_dev *dev)
1069 {
1070 	struct rte_eth_link link;
1071 	struct vmxnet3_hw *hw = dev->data->dev_private;
1072 	struct rte_intr_handle *intr_handle = dev->intr_handle;
1073 	int ret;
1074 
1075 	PMD_INIT_FUNC_TRACE();
1076 
1077 	if (hw->adapter_stopped == 1) {
1078 		PMD_INIT_LOG(DEBUG, "Device already stopped.");
1079 		return 0;
1080 	}
1081 
1082 	do {
1083 		/* Unregister has lock to make sure there is no running cb.
1084 		 * This has to happen first since vmxnet3_interrupt_handler
1085 		 * reenables interrupts by calling vmxnet3_enable_intr
1086 		 */
1087 		ret = rte_intr_callback_unregister(intr_handle,
1088 						   vmxnet3_interrupt_handler,
1089 						   (void *)-1);
1090 	} while (ret == -EAGAIN);
1091 
1092 	if (ret < 0)
1093 		PMD_DRV_LOG(ERR, "Error attempting to unregister intr cb: %d",
1094 			    ret);
1095 
1096 	PMD_INIT_LOG(DEBUG, "Disabled %d intr callbacks", ret);
1097 
1098 	/* disable interrupts */
1099 	vmxnet3_disable_all_intrs(hw);
1100 
1101 	rte_intr_disable(intr_handle);
1102 
1103 	/* Clean datapath event and queue/vector mapping */
1104 	rte_intr_efd_disable(intr_handle);
1105 	rte_intr_vec_list_free(intr_handle);
1106 
1107 	/* quiesce the device first */
1108 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV);
1109 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAL, 0);
1110 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAH, 0);
1111 
1112 	/* reset the device */
1113 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
1114 	PMD_INIT_LOG(DEBUG, "Device reset.");
1115 
1116 	vmxnet3_dev_clear_queues(dev);
1117 
1118 	/* Clear recorded link status */
1119 	memset(&link, 0, sizeof(link));
1120 	link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1121 	link.link_speed = RTE_ETH_SPEED_NUM_10G;
1122 	link.link_autoneg = RTE_ETH_LINK_FIXED;
1123 	rte_eth_linkstatus_set(dev, &link);
1124 
1125 	hw->adapter_stopped = 1;
1126 	dev->data->dev_started = 0;
1127 
1128 	return 0;
1129 }
1130 
1131 static void
1132 vmxnet3_free_queues(struct rte_eth_dev *dev)
1133 {
1134 	int i;
1135 
1136 	PMD_INIT_FUNC_TRACE();
1137 
1138 	for (i = 0; i < dev->data->nb_rx_queues; i++)
1139 		vmxnet3_dev_rx_queue_release(dev, i);
1140 	dev->data->nb_rx_queues = 0;
1141 
1142 	for (i = 0; i < dev->data->nb_tx_queues; i++)
1143 		vmxnet3_dev_tx_queue_release(dev, i);
1144 	dev->data->nb_tx_queues = 0;
1145 }
1146 
1147 /*
1148  * Reset and stop device.
1149  */
1150 static int
1151 vmxnet3_dev_close(struct rte_eth_dev *dev)
1152 {
1153 	int ret;
1154 	PMD_INIT_FUNC_TRACE();
1155 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1156 		return 0;
1157 
1158 	ret = vmxnet3_dev_stop(dev);
1159 	vmxnet3_free_queues(dev);
1160 
1161 	return ret;
1162 }
1163 
1164 static int
1165 vmxnet3_dev_reset(struct rte_eth_dev *dev)
1166 {
1167 	int ret;
1168 
1169 	ret = eth_vmxnet3_dev_uninit(dev);
1170 	if (ret)
1171 		return ret;
1172 	ret = eth_vmxnet3_dev_init(dev);
1173 	return ret;
1174 }
1175 
1176 static void
1177 vmxnet3_hw_tx_stats_get(struct vmxnet3_hw *hw, unsigned int q,
1178 			struct UPT1_TxStats *res)
1179 {
1180 #define VMXNET3_UPDATE_TX_STAT(h, i, f, r)		\
1181 		((r)->f = (h)->tqd_start[(i)].stats.f +	\
1182 			(h)->saved_tx_stats[(i)].f)
1183 
1184 	VMXNET3_UPDATE_TX_STAT(hw, q, ucastPktsTxOK, res);
1185 	VMXNET3_UPDATE_TX_STAT(hw, q, mcastPktsTxOK, res);
1186 	VMXNET3_UPDATE_TX_STAT(hw, q, bcastPktsTxOK, res);
1187 	VMXNET3_UPDATE_TX_STAT(hw, q, ucastBytesTxOK, res);
1188 	VMXNET3_UPDATE_TX_STAT(hw, q, mcastBytesTxOK, res);
1189 	VMXNET3_UPDATE_TX_STAT(hw, q, bcastBytesTxOK, res);
1190 	VMXNET3_UPDATE_TX_STAT(hw, q, pktsTxError, res);
1191 	VMXNET3_UPDATE_TX_STAT(hw, q, pktsTxDiscard, res);
1192 
1193 #undef VMXNET3_UPDATE_TX_STAT
1194 }
1195 
1196 static void
1197 vmxnet3_hw_rx_stats_get(struct vmxnet3_hw *hw, unsigned int q,
1198 			struct UPT1_RxStats *res)
1199 {
1200 #define VMXNET3_UPDATE_RX_STAT(h, i, f, r)		\
1201 		((r)->f = (h)->rqd_start[(i)].stats.f +	\
1202 			(h)->saved_rx_stats[(i)].f)
1203 
1204 	VMXNET3_UPDATE_RX_STAT(hw, q, ucastPktsRxOK, res);
1205 	VMXNET3_UPDATE_RX_STAT(hw, q, mcastPktsRxOK, res);
1206 	VMXNET3_UPDATE_RX_STAT(hw, q, bcastPktsRxOK, res);
1207 	VMXNET3_UPDATE_RX_STAT(hw, q, ucastBytesRxOK, res);
1208 	VMXNET3_UPDATE_RX_STAT(hw, q, mcastBytesRxOK, res);
1209 	VMXNET3_UPDATE_RX_STAT(hw, q, bcastBytesRxOK, res);
1210 	VMXNET3_UPDATE_RX_STAT(hw, q, pktsRxError, res);
1211 	VMXNET3_UPDATE_RX_STAT(hw, q, pktsRxOutOfBuf, res);
1212 
1213 #undef VMXNET3_UPDATE_RX_STAT
1214 }
1215 
1216 static void
1217 vmxnet3_tx_stats_get(struct vmxnet3_hw *hw, unsigned int q,
1218 					struct UPT1_TxStats *res)
1219 {
1220 		vmxnet3_hw_tx_stats_get(hw, q, res);
1221 
1222 #define VMXNET3_REDUCE_SNAPSHOT_TX_STAT(h, i, f, r)	\
1223 		((r)->f -= (h)->snapshot_tx_stats[(i)].f)
1224 
1225 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, ucastPktsTxOK, res);
1226 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, mcastPktsTxOK, res);
1227 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, bcastPktsTxOK, res);
1228 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, ucastBytesTxOK, res);
1229 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, mcastBytesTxOK, res);
1230 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, bcastBytesTxOK, res);
1231 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, pktsTxError, res);
1232 	VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, pktsTxDiscard, res);
1233 
1234 #undef VMXNET3_REDUCE_SNAPSHOT_TX_STAT
1235 }
1236 
1237 static void
1238 vmxnet3_rx_stats_get(struct vmxnet3_hw *hw, unsigned int q,
1239 					struct UPT1_RxStats *res)
1240 {
1241 		vmxnet3_hw_rx_stats_get(hw, q, res);
1242 
1243 #define VMXNET3_REDUCE_SNAPSHOT_RX_STAT(h, i, f, r)	\
1244 		((r)->f -= (h)->snapshot_rx_stats[(i)].f)
1245 
1246 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, ucastPktsRxOK, res);
1247 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, mcastPktsRxOK, res);
1248 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, bcastPktsRxOK, res);
1249 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, ucastBytesRxOK, res);
1250 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, mcastBytesRxOK, res);
1251 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, bcastBytesRxOK, res);
1252 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, pktsRxError, res);
1253 	VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, pktsRxOutOfBuf, res);
1254 
1255 #undef VMXNET3_REDUCE_SNAPSHOT_RX_STAT
1256 }
1257 
1258 static void
1259 vmxnet3_hw_stats_save(struct vmxnet3_hw *hw)
1260 {
1261 	unsigned int i;
1262 
1263 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
1264 
1265 	for (i = 0; i < hw->num_tx_queues; i++)
1266 		vmxnet3_hw_tx_stats_get(hw, i, &hw->saved_tx_stats[i]);
1267 	for (i = 0; i < hw->num_rx_queues; i++)
1268 		vmxnet3_hw_rx_stats_get(hw, i, &hw->saved_rx_stats[i]);
1269 }
1270 
1271 static int
1272 vmxnet3_dev_xstats_get_names(struct rte_eth_dev *dev,
1273 			     struct rte_eth_xstat_name *xstats_names,
1274 			     unsigned int n)
1275 {
1276 	unsigned int i, t, count = 0;
1277 	unsigned int nstats =
1278 		dev->data->nb_tx_queues * RTE_DIM(vmxnet3_txq_stat_strings) +
1279 		dev->data->nb_rx_queues * RTE_DIM(vmxnet3_rxq_stat_strings);
1280 
1281 	if (!xstats_names || n < nstats)
1282 		return nstats;
1283 
1284 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1285 		if (!dev->data->rx_queues[i])
1286 			continue;
1287 
1288 		for (t = 0; t < RTE_DIM(vmxnet3_rxq_stat_strings); t++) {
1289 			snprintf(xstats_names[count].name,
1290 				 sizeof(xstats_names[count].name),
1291 				 "rx_q%u_%s", i,
1292 				 vmxnet3_rxq_stat_strings[t].name);
1293 			count++;
1294 		}
1295 	}
1296 
1297 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1298 		if (!dev->data->tx_queues[i])
1299 			continue;
1300 
1301 		for (t = 0; t < RTE_DIM(vmxnet3_txq_stat_strings); t++) {
1302 			snprintf(xstats_names[count].name,
1303 				 sizeof(xstats_names[count].name),
1304 				 "tx_q%u_%s", i,
1305 				 vmxnet3_txq_stat_strings[t].name);
1306 			count++;
1307 		}
1308 	}
1309 
1310 	return count;
1311 }
1312 
1313 static int
1314 vmxnet3_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1315 		       unsigned int n)
1316 {
1317 	unsigned int i, t, count = 0;
1318 	unsigned int nstats =
1319 		dev->data->nb_tx_queues * RTE_DIM(vmxnet3_txq_stat_strings) +
1320 		dev->data->nb_rx_queues * RTE_DIM(vmxnet3_rxq_stat_strings);
1321 
1322 	if (n < nstats)
1323 		return nstats;
1324 
1325 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1326 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
1327 
1328 		if (rxq == NULL)
1329 			continue;
1330 
1331 		for (t = 0; t < RTE_DIM(vmxnet3_rxq_stat_strings); t++) {
1332 			xstats[count].value = *(uint64_t *)(((char *)&rxq->stats) +
1333 				vmxnet3_rxq_stat_strings[t].offset);
1334 			xstats[count].id = count;
1335 			count++;
1336 		}
1337 	}
1338 
1339 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1340 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1341 
1342 		if (txq == NULL)
1343 			continue;
1344 
1345 		for (t = 0; t < RTE_DIM(vmxnet3_txq_stat_strings); t++) {
1346 			xstats[count].value = *(uint64_t *)(((char *)&txq->stats) +
1347 				vmxnet3_txq_stat_strings[t].offset);
1348 			xstats[count].id = count;
1349 			count++;
1350 		}
1351 	}
1352 
1353 	return count;
1354 }
1355 
1356 static int
1357 vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1358 {
1359 	unsigned int i;
1360 	struct vmxnet3_hw *hw = dev->data->dev_private;
1361 	struct UPT1_TxStats txStats;
1362 	struct UPT1_RxStats rxStats;
1363 
1364 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
1365 
1366 	for (i = 0; i < hw->num_tx_queues; i++) {
1367 		vmxnet3_tx_stats_get(hw, i, &txStats);
1368 
1369 		stats->q_opackets[i] = txStats.ucastPktsTxOK +
1370 			txStats.mcastPktsTxOK +
1371 			txStats.bcastPktsTxOK;
1372 
1373 		stats->q_obytes[i] = txStats.ucastBytesTxOK +
1374 			txStats.mcastBytesTxOK +
1375 			txStats.bcastBytesTxOK;
1376 
1377 		stats->opackets += stats->q_opackets[i];
1378 		stats->obytes += stats->q_obytes[i];
1379 		stats->oerrors += txStats.pktsTxError + txStats.pktsTxDiscard;
1380 	}
1381 
1382 	for (i = 0; i < hw->num_rx_queues; i++) {
1383 		vmxnet3_rx_stats_get(hw, i, &rxStats);
1384 
1385 		stats->q_ipackets[i] = rxStats.ucastPktsRxOK +
1386 			rxStats.mcastPktsRxOK +
1387 			rxStats.bcastPktsRxOK;
1388 
1389 		stats->q_ibytes[i] = rxStats.ucastBytesRxOK +
1390 			rxStats.mcastBytesRxOK +
1391 			rxStats.bcastBytesRxOK;
1392 
1393 		stats->ipackets += stats->q_ipackets[i];
1394 		stats->ibytes += stats->q_ibytes[i];
1395 
1396 		stats->q_errors[i] = rxStats.pktsRxError;
1397 		stats->ierrors += rxStats.pktsRxError;
1398 		stats->imissed += rxStats.pktsRxOutOfBuf;
1399 	}
1400 
1401 	return 0;
1402 }
1403 
1404 static int
1405 vmxnet3_dev_stats_reset(struct rte_eth_dev *dev)
1406 {
1407 	unsigned int i;
1408 	struct vmxnet3_hw *hw = dev->data->dev_private;
1409 	struct UPT1_TxStats txStats = {0};
1410 	struct UPT1_RxStats rxStats = {0};
1411 
1412 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
1413 
1414 	RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_TX_QUEUES);
1415 
1416 	for (i = 0; i < hw->num_tx_queues; i++) {
1417 		vmxnet3_hw_tx_stats_get(hw, i, &txStats);
1418 		memcpy(&hw->snapshot_tx_stats[i], &txStats,
1419 			sizeof(hw->snapshot_tx_stats[0]));
1420 	}
1421 	for (i = 0; i < hw->num_rx_queues; i++) {
1422 		vmxnet3_hw_rx_stats_get(hw, i, &rxStats);
1423 		memcpy(&hw->snapshot_rx_stats[i], &rxStats,
1424 			sizeof(hw->snapshot_rx_stats[0]));
1425 	}
1426 
1427 	return 0;
1428 }
1429 
1430 static int
1431 vmxnet3_dev_info_get(struct rte_eth_dev *dev,
1432 		     struct rte_eth_dev_info *dev_info)
1433 {
1434 	struct vmxnet3_hw *hw = dev->data->dev_private;
1435 	int queues = 0;
1436 
1437 	if (VMXNET3_VERSION_GE_6(hw)) {
1438 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1439 				       VMXNET3_CMD_GET_MAX_QUEUES_CONF);
1440 		queues = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1441 
1442 		if (queues > 0) {
1443 			dev_info->max_rx_queues =
1444 			  RTE_MIN(VMXNET3_EXT_MAX_RX_QUEUES, ((queues >> 8) & 0xff));
1445 			dev_info->max_tx_queues =
1446 			  RTE_MIN(VMXNET3_EXT_MAX_TX_QUEUES, (queues & 0xff));
1447 		} else {
1448 			dev_info->max_rx_queues = VMXNET3_MAX_RX_QUEUES;
1449 			dev_info->max_tx_queues = VMXNET3_MAX_TX_QUEUES;
1450 		}
1451 	} else {
1452 		dev_info->max_rx_queues = VMXNET3_MAX_RX_QUEUES;
1453 		dev_info->max_tx_queues = VMXNET3_MAX_TX_QUEUES;
1454 	}
1455 
1456 	dev_info->min_rx_bufsize = 1518 + RTE_PKTMBUF_HEADROOM;
1457 	dev_info->max_rx_pktlen = 16384; /* includes CRC, cf MAXFRS register */
1458 	dev_info->min_mtu = VMXNET3_MIN_MTU;
1459 	dev_info->max_mtu = VMXNET3_MAX_MTU;
1460 	dev_info->speed_capa = RTE_ETH_LINK_SPEED_10G;
1461 	dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS;
1462 
1463 	dev_info->flow_type_rss_offloads = VMXNET3_RSS_OFFLOAD_ALL;
1464 
1465 	if (VMXNET3_VERSION_GE_4(hw)) {
1466 		dev_info->flow_type_rss_offloads |= VMXNET3_V4_RSS_MASK;
1467 	}
1468 
1469 	dev_info->rx_desc_lim = (struct rte_eth_desc_lim) {
1470 		.nb_max = VMXNET3_RX_RING_MAX_SIZE,
1471 		.nb_min = VMXNET3_DEF_RX_RING_SIZE,
1472 		.nb_align = 1,
1473 	};
1474 
1475 	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
1476 		.nb_max = VMXNET3_TX_RING_MAX_SIZE,
1477 		.nb_min = VMXNET3_DEF_TX_RING_SIZE,
1478 		.nb_align = 1,
1479 		.nb_seg_max = VMXNET3_TX_MAX_SEG,
1480 		.nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT,
1481 	};
1482 
1483 	dev_info->rx_offload_capa = VMXNET3_RX_OFFLOAD_CAP;
1484 	dev_info->rx_queue_offload_capa = 0;
1485 	dev_info->tx_offload_capa = VMXNET3_TX_OFFLOAD_CAP;
1486 	dev_info->tx_queue_offload_capa = 0;
1487 	if (hw->rss_conf == NULL) {
1488 		/* RSS not configured */
1489 		dev_info->reta_size = 0;
1490 	} else {
1491 		dev_info->reta_size = hw->rss_conf->indTableSize;
1492 	}
1493 	return 0;
1494 }
1495 
1496 static int
1497 vmxnet3_hw_ver_get(struct rte_eth_dev *dev,
1498 		   char *fw_version, size_t fw_size)
1499 {
1500 	int ret;
1501 	struct vmxnet3_hw *hw = dev->data->dev_private;
1502 
1503 	ret = snprintf(fw_version, fw_size, "v%d", hw->version);
1504 
1505 	ret += 1; /* add the size of '\0' */
1506 	if (fw_size < (uint32_t)ret)
1507 		return ret;
1508 	else
1509 		return 0;
1510 }
1511 
1512 static const uint32_t *
1513 vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev)
1514 {
1515 	static const uint32_t ptypes[] = {
1516 		RTE_PTYPE_L3_IPV4_EXT,
1517 		RTE_PTYPE_L3_IPV4,
1518 		RTE_PTYPE_UNKNOWN
1519 	};
1520 
1521 	if (dev->rx_pkt_burst == vmxnet3_recv_pkts)
1522 		return ptypes;
1523 	return NULL;
1524 }
1525 
1526 static int
1527 vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1528 {
1529 	struct vmxnet3_hw *hw = dev->data->dev_private;
1530 
1531 	rte_ether_addr_copy(mac_addr, (struct rte_ether_addr *)(hw->perm_addr));
1532 	vmxnet3_write_mac(hw, mac_addr->addr_bytes);
1533 	return 0;
1534 }
1535 
1536 static int
1537 vmxnet3_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1538 {
1539 	struct vmxnet3_hw *hw = dev->data->dev_private;
1540 	uint32_t frame_size = mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + 4;
1541 
1542 	if (mtu < VMXNET3_MIN_MTU)
1543 		return -EINVAL;
1544 
1545 	if (VMXNET3_VERSION_GE_6(hw)) {
1546 		if (mtu > VMXNET3_V6_MAX_MTU)
1547 			return -EINVAL;
1548 	} else {
1549 		if (mtu > VMXNET3_MAX_MTU) {
1550 			PMD_DRV_LOG(ERR, "MTU %d too large in device version v%d",
1551 				    mtu, hw->version);
1552 			return -EINVAL;
1553 		}
1554 	}
1555 
1556 	dev->data->mtu = mtu;
1557 	/* update max frame size */
1558 	dev->data->dev_conf.rxmode.mtu = frame_size;
1559 
1560 	if (dev->data->dev_started == 0)
1561 		return 0;
1562 
1563     /* changing mtu for vmxnet3 pmd does not require a restart
1564      * as it does not need to repopulate the rx rings to support
1565      * different mtu size.  We stop and restart the device here
1566      * just to pass the mtu info to the backend.
1567      */
1568 	vmxnet3_dev_stop(dev);
1569 	vmxnet3_dev_start(dev);
1570 
1571 	return 0;
1572 }
1573 
1574 /* return 0 means link status changed, -1 means not changed */
1575 static int
1576 __vmxnet3_dev_link_update(struct rte_eth_dev *dev,
1577 			  __rte_unused int wait_to_complete)
1578 {
1579 	struct vmxnet3_hw *hw = dev->data->dev_private;
1580 	struct rte_eth_link link;
1581 	uint32_t ret;
1582 
1583 	memset(&link, 0, sizeof(link));
1584 
1585 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
1586 	ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1587 
1588 	if (ret & 0x1)
1589 		link.link_status = RTE_ETH_LINK_UP;
1590 	link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1591 	link.link_speed = RTE_ETH_SPEED_NUM_10G;
1592 	link.link_autoneg = RTE_ETH_LINK_FIXED;
1593 
1594 	return rte_eth_linkstatus_set(dev, &link);
1595 }
1596 
1597 static int
1598 vmxnet3_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
1599 {
1600 	/* Link status doesn't change for stopped dev */
1601 	if (dev->data->dev_started == 0)
1602 		return -1;
1603 
1604 	return __vmxnet3_dev_link_update(dev, wait_to_complete);
1605 }
1606 
1607 /* Updating rxmode through Vmxnet3_DriverShared structure in adapter */
1608 static void
1609 vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set)
1610 {
1611 	struct Vmxnet3_RxFilterConf *rxConf = &hw->shared->devRead.rxFilterConf;
1612 
1613 	if (set)
1614 		rxConf->rxMode = rxConf->rxMode | feature;
1615 	else
1616 		rxConf->rxMode = rxConf->rxMode & (~feature);
1617 
1618 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_UPDATE_RX_MODE);
1619 }
1620 
1621 /* Promiscuous supported only if Vmxnet3_DriverShared is initialized in adapter */
1622 static int
1623 vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev)
1624 {
1625 	struct vmxnet3_hw *hw = dev->data->dev_private;
1626 	uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1627 
1628 	memset(vf_table, 0, VMXNET3_VFT_TABLE_SIZE);
1629 	vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_PROMISC, 1);
1630 
1631 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1632 			       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1633 
1634 	return 0;
1635 }
1636 
1637 /* Promiscuous supported only if Vmxnet3_DriverShared is initialized in adapter */
1638 static int
1639 vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev)
1640 {
1641 	struct vmxnet3_hw *hw = dev->data->dev_private;
1642 	uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1643 	uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
1644 
1645 	if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER)
1646 		memcpy(vf_table, hw->shadow_vfta, VMXNET3_VFT_TABLE_SIZE);
1647 	else
1648 		memset(vf_table, 0xff, VMXNET3_VFT_TABLE_SIZE);
1649 	vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_PROMISC, 0);
1650 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1651 			       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1652 
1653 	return 0;
1654 }
1655 
1656 /* Allmulticast supported only if Vmxnet3_DriverShared is initialized in adapter */
1657 static int
1658 vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev)
1659 {
1660 	struct vmxnet3_hw *hw = dev->data->dev_private;
1661 
1662 	vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_ALL_MULTI, 1);
1663 
1664 	return 0;
1665 }
1666 
1667 /* Allmulticast supported only if Vmxnet3_DriverShared is initialized in adapter */
1668 static int
1669 vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev)
1670 {
1671 	struct vmxnet3_hw *hw = dev->data->dev_private;
1672 
1673 	vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_ALL_MULTI, 0);
1674 
1675 	return 0;
1676 }
1677 
1678 /* Enable/disable filter on vlan */
1679 static int
1680 vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vid, int on)
1681 {
1682 	struct vmxnet3_hw *hw = dev->data->dev_private;
1683 	struct Vmxnet3_RxFilterConf *rxConf = &hw->shared->devRead.rxFilterConf;
1684 	uint32_t *vf_table = rxConf->vfTable;
1685 
1686 	/* save state for restore */
1687 	if (on)
1688 		VMXNET3_SET_VFTABLE_ENTRY(hw->shadow_vfta, vid);
1689 	else
1690 		VMXNET3_CLEAR_VFTABLE_ENTRY(hw->shadow_vfta, vid);
1691 
1692 	/* don't change active filter if in promiscuous mode */
1693 	if (rxConf->rxMode & VMXNET3_RXM_PROMISC)
1694 		return 0;
1695 
1696 	/* set in hardware */
1697 	if (on)
1698 		VMXNET3_SET_VFTABLE_ENTRY(vf_table, vid);
1699 	else
1700 		VMXNET3_CLEAR_VFTABLE_ENTRY(vf_table, vid);
1701 
1702 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1703 			       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1704 	return 0;
1705 }
1706 
1707 static int
1708 vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
1709 {
1710 	struct vmxnet3_hw *hw = dev->data->dev_private;
1711 	Vmxnet3_DSDevRead *devRead = &hw->shared->devRead;
1712 	uint32_t *vf_table = devRead->rxFilterConf.vfTable;
1713 	uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
1714 
1715 	if (mask & RTE_ETH_VLAN_STRIP_MASK) {
1716 		if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
1717 			devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
1718 		else
1719 			devRead->misc.uptFeatures &= ~UPT1_F_RXVLAN;
1720 
1721 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1722 				       VMXNET3_CMD_UPDATE_FEATURE);
1723 	}
1724 
1725 	if (mask & RTE_ETH_VLAN_FILTER_MASK) {
1726 		if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER)
1727 			memcpy(vf_table, hw->shadow_vfta, VMXNET3_VFT_TABLE_SIZE);
1728 		else
1729 			memset(vf_table, 0xff, VMXNET3_VFT_TABLE_SIZE);
1730 
1731 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1732 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1733 	}
1734 
1735 	return 0;
1736 }
1737 
1738 static void
1739 vmxnet3_process_events(struct rte_eth_dev *dev)
1740 {
1741 	struct vmxnet3_hw *hw = dev->data->dev_private;
1742 	uint32_t events = hw->shared->ecr;
1743 
1744 	if (!events)
1745 		return;
1746 
1747 	/*
1748 	 * ECR bits when written with 1b are cleared. Hence write
1749 	 * events back to ECR so that the bits which were set will be reset.
1750 	 */
1751 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_ECR, events);
1752 
1753 	/* Check if link state has changed */
1754 	if (events & VMXNET3_ECR_LINK) {
1755 		PMD_DRV_LOG(DEBUG, "Process events: VMXNET3_ECR_LINK event");
1756 		if (vmxnet3_dev_link_update(dev, 0) == 0)
1757 			rte_eth_dev_callback_process(dev,
1758 						     RTE_ETH_EVENT_INTR_LSC,
1759 						     NULL);
1760 	}
1761 
1762 	/* Check if there is an error on xmit/recv queues */
1763 	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
1764 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1765 				       VMXNET3_CMD_GET_QUEUE_STATUS);
1766 
1767 		if (hw->tqd_start->status.stopped)
1768 			PMD_DRV_LOG(ERR, "tq error 0x%x",
1769 				    hw->tqd_start->status.error);
1770 
1771 		if (hw->rqd_start->status.stopped)
1772 			PMD_DRV_LOG(ERR, "rq error 0x%x",
1773 				     hw->rqd_start->status.error);
1774 
1775 		/* Reset the device */
1776 		/* Have to reset the device */
1777 	}
1778 
1779 	if (events & VMXNET3_ECR_DIC)
1780 		PMD_DRV_LOG(DEBUG, "Device implementation change event.");
1781 
1782 	if (events & VMXNET3_ECR_DEBUG)
1783 		PMD_DRV_LOG(DEBUG, "Debug event generated by device.");
1784 }
1785 
1786 static void
1787 vmxnet3_interrupt_handler(void *param)
1788 {
1789 	struct rte_eth_dev *dev = param;
1790 	struct vmxnet3_hw *hw = dev->data->dev_private;
1791 	uint32_t events;
1792 	uint8 *eventIntrIdx;
1793 	uint32 *intrCtrl;
1794 
1795 	PMD_INIT_FUNC_TRACE();
1796 
1797 	vmxnet3_get_intr_ctrl_ev(hw, &eventIntrIdx, &intrCtrl);
1798 	vmxnet3_disable_intr(hw, *eventIntrIdx);
1799 
1800 	events = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_ECR);
1801 	if (events == 0)
1802 		goto done;
1803 
1804 	RTE_LOG(DEBUG, PMD, "Reading events: 0x%X", events);
1805 	vmxnet3_process_events(dev);
1806 done:
1807 	vmxnet3_enable_intr(hw, *eventIntrIdx);
1808 }
1809 
1810 static int
1811 vmxnet3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1812 {
1813 	struct vmxnet3_hw *hw = dev->data->dev_private;
1814 
1815 	vmxnet3_enable_intr(hw,
1816 			    rte_intr_vec_list_index_get(dev->intr_handle,
1817 							       queue_id));
1818 
1819 	return 0;
1820 }
1821 
1822 static int
1823 vmxnet3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1824 {
1825 	struct vmxnet3_hw *hw = dev->data->dev_private;
1826 
1827 	vmxnet3_disable_intr(hw,
1828 		rte_intr_vec_list_index_get(dev->intr_handle, queue_id));
1829 
1830 	return 0;
1831 }
1832 
1833 RTE_PMD_REGISTER_PCI(net_vmxnet3, rte_vmxnet3_pmd);
1834 RTE_PMD_REGISTER_PCI_TABLE(net_vmxnet3, pci_id_vmxnet3_map);
1835 RTE_PMD_REGISTER_KMOD_DEP(net_vmxnet3, "* igb_uio | uio_pci_generic | vfio-pci");
1836 RTE_LOG_REGISTER_SUFFIX(vmxnet3_logtype_init, init, NOTICE);
1837 RTE_LOG_REGISTER_SUFFIX(vmxnet3_logtype_driver, driver, NOTICE);
1838 
1839 static int
1840 vmxnet3_rss_reta_update(struct rte_eth_dev *dev,
1841 			struct rte_eth_rss_reta_entry64 *reta_conf,
1842 			uint16_t reta_size)
1843 {
1844 	int i, idx, shift;
1845 	struct vmxnet3_hw *hw = dev->data->dev_private;
1846 	struct VMXNET3_RSSConf *dev_rss_conf = hw->rss_conf;
1847 
1848 	if (reta_size != dev_rss_conf->indTableSize) {
1849 		PMD_DRV_LOG(ERR,
1850 			"The size of hash lookup table configured (%d) doesn't match "
1851 			"the supported number (%d)",
1852 			reta_size, dev_rss_conf->indTableSize);
1853 		return -EINVAL;
1854 	}
1855 
1856 	for (i = 0; i < reta_size; i++) {
1857 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
1858 		shift = i % RTE_ETH_RETA_GROUP_SIZE;
1859 		if (reta_conf[idx].mask & RTE_BIT64(shift))
1860 			dev_rss_conf->indTable[i] = (uint8_t)reta_conf[idx].reta[shift];
1861 	}
1862 
1863 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1864 				VMXNET3_CMD_UPDATE_RSSIDT);
1865 
1866 	return 0;
1867 }
1868 
1869 static int
1870 vmxnet3_rss_reta_query(struct rte_eth_dev *dev,
1871 		       struct rte_eth_rss_reta_entry64 *reta_conf,
1872 		       uint16_t reta_size)
1873 {
1874 	int i, idx, shift;
1875 	struct vmxnet3_hw *hw = dev->data->dev_private;
1876 	struct VMXNET3_RSSConf *dev_rss_conf = hw->rss_conf;
1877 
1878 	if (reta_size != dev_rss_conf->indTableSize) {
1879 		PMD_DRV_LOG(ERR,
1880 			"Size of requested hash lookup table (%d) doesn't "
1881 			"match the configured size (%d)",
1882 			reta_size, dev_rss_conf->indTableSize);
1883 		return -EINVAL;
1884 	}
1885 
1886 	for (i = 0; i < reta_size; i++) {
1887 		idx = i / RTE_ETH_RETA_GROUP_SIZE;
1888 		shift = i % RTE_ETH_RETA_GROUP_SIZE;
1889 		if (reta_conf[idx].mask & RTE_BIT64(shift))
1890 			reta_conf[idx].reta[shift] = dev_rss_conf->indTable[i];
1891 	}
1892 
1893 	return 0;
1894 }
1895