xref: /dpdk/drivers/net/netvsc/hn_ethdev.c (revision 8809f78c7dd9f33a44a4f89c58fc91ded34296ed)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2018 Microsoft Corporation
3  * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
4  * All rights reserved.
5  */
6 
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <errno.h>
11 #include <unistd.h>
12 
13 #include <rte_ethdev.h>
14 #include <rte_memcpy.h>
15 #include <rte_string_fns.h>
16 #include <rte_memzone.h>
17 #include <rte_devargs.h>
18 #include <rte_malloc.h>
19 #include <rte_kvargs.h>
20 #include <rte_atomic.h>
21 #include <rte_branch_prediction.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_cycles.h>
25 #include <rte_errno.h>
26 #include <rte_memory.h>
27 #include <rte_eal.h>
28 #include <rte_dev.h>
29 #include <rte_bus_vmbus.h>
30 
31 #include "hn_logs.h"
32 #include "hn_var.h"
33 #include "hn_rndis.h"
34 #include "hn_nvs.h"
35 #include "ndis.h"
36 
37 #define HN_TX_OFFLOAD_CAPS (DEV_TX_OFFLOAD_IPV4_CKSUM | \
38 			    DEV_TX_OFFLOAD_TCP_CKSUM  | \
39 			    DEV_TX_OFFLOAD_UDP_CKSUM  | \
40 			    DEV_TX_OFFLOAD_TCP_TSO    | \
41 			    DEV_TX_OFFLOAD_MULTI_SEGS | \
42 			    DEV_TX_OFFLOAD_VLAN_INSERT)
43 
44 #define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \
45 			    DEV_RX_OFFLOAD_VLAN_STRIP | \
46 			    DEV_RX_OFFLOAD_RSS_HASH)
47 
48 struct hn_xstats_name_off {
49 	char name[RTE_ETH_XSTATS_NAME_SIZE];
50 	unsigned int offset;
51 };
52 
53 static const struct hn_xstats_name_off hn_stat_strings[] = {
54 	{ "good_packets",           offsetof(struct hn_stats, packets) },
55 	{ "good_bytes",             offsetof(struct hn_stats, bytes) },
56 	{ "errors",                 offsetof(struct hn_stats, errors) },
57 	{ "ring full",              offsetof(struct hn_stats, ring_full) },
58 	{ "channel full",           offsetof(struct hn_stats, channel_full) },
59 	{ "multicast_packets",      offsetof(struct hn_stats, multicast) },
60 	{ "broadcast_packets",      offsetof(struct hn_stats, broadcast) },
61 	{ "undersize_packets",      offsetof(struct hn_stats, size_bins[0]) },
62 	{ "size_64_packets",        offsetof(struct hn_stats, size_bins[1]) },
63 	{ "size_65_127_packets",    offsetof(struct hn_stats, size_bins[2]) },
64 	{ "size_128_255_packets",   offsetof(struct hn_stats, size_bins[3]) },
65 	{ "size_256_511_packets",   offsetof(struct hn_stats, size_bins[4]) },
66 	{ "size_512_1023_packets",  offsetof(struct hn_stats, size_bins[5]) },
67 	{ "size_1024_1518_packets", offsetof(struct hn_stats, size_bins[6]) },
68 	{ "size_1519_max_packets",  offsetof(struct hn_stats, size_bins[7]) },
69 };
70 
71 /* The default RSS key.
72  * This value is the same as MLX5 so that flows will be
73  * received on same path for both VF and synthetic NIC.
74  */
75 static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
76 	0x2c, 0xc6, 0x81, 0xd1,	0x5b, 0xdb, 0xf4, 0xf7,
77 	0xfc, 0xa2, 0x83, 0x19,	0xdb, 0x1a, 0x3e, 0x94,
78 	0x6b, 0x9e, 0x38, 0xd9,	0x2c, 0x9c, 0x03, 0xd1,
79 	0xad, 0x99, 0x44, 0xa7,	0xd9, 0x56, 0x3d, 0x59,
80 	0x06, 0x3c, 0x25, 0xf3,	0xfc, 0x1f, 0xdc, 0x2a,
81 };
82 
83 static struct rte_eth_dev *
84 eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size)
85 {
86 	struct rte_eth_dev *eth_dev;
87 	const char *name;
88 
89 	if (!dev)
90 		return NULL;
91 
92 	name = dev->device.name;
93 
94 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
95 		eth_dev = rte_eth_dev_allocate(name);
96 		if (!eth_dev) {
97 			PMD_DRV_LOG(NOTICE, "can not allocate rte ethdev");
98 			return NULL;
99 		}
100 
101 		if (private_data_size) {
102 			eth_dev->data->dev_private =
103 				rte_zmalloc_socket(name, private_data_size,
104 						     RTE_CACHE_LINE_SIZE, dev->device.numa_node);
105 			if (!eth_dev->data->dev_private) {
106 				PMD_DRV_LOG(NOTICE, "can not allocate driver data");
107 				rte_eth_dev_release_port(eth_dev);
108 				return NULL;
109 			}
110 		}
111 	} else {
112 		eth_dev = rte_eth_dev_attach_secondary(name);
113 		if (!eth_dev) {
114 			PMD_DRV_LOG(NOTICE, "can not attach secondary");
115 			return NULL;
116 		}
117 	}
118 
119 	eth_dev->device = &dev->device;
120 
121 	/* interrupt is simulated */
122 	dev->intr_handle.type = RTE_INTR_HANDLE_EXT;
123 	eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
124 	eth_dev->intr_handle = &dev->intr_handle;
125 
126 	return eth_dev;
127 }
128 
129 static void
130 eth_dev_vmbus_release(struct rte_eth_dev *eth_dev)
131 {
132 	/* free ether device */
133 	rte_eth_dev_release_port(eth_dev);
134 
135 	eth_dev->device = NULL;
136 	eth_dev->intr_handle = NULL;
137 }
138 
139 /* handle "latency=X" from devargs */
140 static int hn_set_latency(const char *key, const char *value, void *opaque)
141 {
142 	struct hn_data *hv = opaque;
143 	char *endp = NULL;
144 	unsigned long lat;
145 
146 	errno = 0;
147 	lat = strtoul(value, &endp, 0);
148 
149 	if (*value == '\0' || *endp != '\0') {
150 		PMD_DRV_LOG(ERR, "invalid parameter %s=%s", key, value);
151 		return -EINVAL;
152 	}
153 
154 	PMD_DRV_LOG(DEBUG, "set latency %lu usec", lat);
155 
156 	hv->latency = lat * 1000;	/* usec to nsec */
157 	return 0;
158 }
159 
160 /* Parse device arguments */
161 static int hn_parse_args(const struct rte_eth_dev *dev)
162 {
163 	struct hn_data *hv = dev->data->dev_private;
164 	struct rte_devargs *devargs = dev->device->devargs;
165 	static const char * const valid_keys[] = {
166 		"latency",
167 		NULL
168 	};
169 	struct rte_kvargs *kvlist;
170 	int ret;
171 
172 	if (!devargs)
173 		return 0;
174 
175 	PMD_INIT_LOG(DEBUG, "device args %s %s",
176 		     devargs->name, devargs->args);
177 
178 	kvlist = rte_kvargs_parse(devargs->args, valid_keys);
179 	if (!kvlist) {
180 		PMD_DRV_LOG(NOTICE, "invalid parameters");
181 		return -EINVAL;
182 	}
183 
184 	ret = rte_kvargs_process(kvlist, "latency", hn_set_latency, hv);
185 	if (ret)
186 		PMD_DRV_LOG(ERR, "Unable to process latency arg\n");
187 
188 	rte_kvargs_free(kvlist);
189 	return ret;
190 }
191 
192 /* Update link status.
193  * Note: the DPDK definition of "wait_to_complete"
194  *   means block this call until link is up.
195  *   which is not worth supporting.
196  */
197 int
198 hn_dev_link_update(struct rte_eth_dev *dev,
199 		   int wait_to_complete __rte_unused)
200 {
201 	struct hn_data *hv = dev->data->dev_private;
202 	struct rte_eth_link link, old;
203 	int error;
204 
205 	old = dev->data->dev_link;
206 
207 	error = hn_rndis_get_linkstatus(hv);
208 	if (error)
209 		return error;
210 
211 	hn_rndis_get_linkspeed(hv);
212 
213 	link = (struct rte_eth_link) {
214 		.link_duplex = ETH_LINK_FULL_DUPLEX,
215 		.link_autoneg = ETH_LINK_SPEED_FIXED,
216 		.link_speed = hv->link_speed / 10000,
217 	};
218 
219 	if (hv->link_status == NDIS_MEDIA_STATE_CONNECTED)
220 		link.link_status = ETH_LINK_UP;
221 	else
222 		link.link_status = ETH_LINK_DOWN;
223 
224 	if (old.link_status == link.link_status)
225 		return 0;
226 
227 	PMD_INIT_LOG(DEBUG, "Port %d is %s", dev->data->port_id,
228 		     (link.link_status == ETH_LINK_UP) ? "up" : "down");
229 
230 	return rte_eth_linkstatus_set(dev, &link);
231 }
232 
233 static int hn_dev_info_get(struct rte_eth_dev *dev,
234 			   struct rte_eth_dev_info *dev_info)
235 {
236 	struct hn_data *hv = dev->data->dev_private;
237 	int rc;
238 
239 	dev_info->speed_capa = ETH_LINK_SPEED_10G;
240 	dev_info->min_rx_bufsize = HN_MIN_RX_BUF_SIZE;
241 	dev_info->max_rx_pktlen  = HN_MAX_XFER_LEN;
242 	dev_info->max_mac_addrs  = 1;
243 
244 	dev_info->hash_key_size = NDIS_HASH_KEYSIZE_TOEPLITZ;
245 	dev_info->flow_type_rss_offloads = hv->rss_offloads;
246 	dev_info->reta_size = ETH_RSS_RETA_SIZE_128;
247 
248 	dev_info->max_rx_queues = hv->max_queues;
249 	dev_info->max_tx_queues = hv->max_queues;
250 
251 	dev_info->tx_desc_lim.nb_min = 1;
252 	dev_info->tx_desc_lim.nb_max = 4096;
253 
254 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
255 		return 0;
256 
257 	/* fills in rx and tx offload capability */
258 	rc = hn_rndis_get_offload(hv, dev_info);
259 	if (rc != 0)
260 		return rc;
261 
262 	/* merges the offload and queues of vf */
263 	return hn_vf_info_get(hv, dev_info);
264 }
265 
266 static int hn_rss_reta_update(struct rte_eth_dev *dev,
267 			      struct rte_eth_rss_reta_entry64 *reta_conf,
268 			      uint16_t reta_size)
269 {
270 	struct hn_data *hv = dev->data->dev_private;
271 	unsigned int i;
272 	int err;
273 
274 	PMD_INIT_FUNC_TRACE();
275 
276 	if (reta_size != NDIS_HASH_INDCNT) {
277 		PMD_DRV_LOG(ERR, "Hash lookup table size does not match NDIS");
278 		return -EINVAL;
279 	}
280 
281 	for (i = 0; i < NDIS_HASH_INDCNT; i++) {
282 		uint16_t idx = i / RTE_RETA_GROUP_SIZE;
283 		uint16_t shift = i % RTE_RETA_GROUP_SIZE;
284 		uint64_t mask = (uint64_t)1 << shift;
285 
286 		if (reta_conf[idx].mask & mask)
287 			hv->rss_ind[i] = reta_conf[idx].reta[shift];
288 	}
289 
290 	err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE);
291 	if (err) {
292 		PMD_DRV_LOG(NOTICE,
293 			"rss disable failed");
294 		return err;
295 	}
296 
297 	err = hn_rndis_conf_rss(hv, 0);
298 	if (err) {
299 		PMD_DRV_LOG(NOTICE,
300 			    "reta reconfig failed");
301 		return err;
302 	}
303 
304 	return hn_vf_reta_hash_update(dev, reta_conf, reta_size);
305 }
306 
307 static int hn_rss_reta_query(struct rte_eth_dev *dev,
308 			     struct rte_eth_rss_reta_entry64 *reta_conf,
309 			     uint16_t reta_size)
310 {
311 	struct hn_data *hv = dev->data->dev_private;
312 	unsigned int i;
313 
314 	PMD_INIT_FUNC_TRACE();
315 
316 	if (reta_size != NDIS_HASH_INDCNT) {
317 		PMD_DRV_LOG(ERR, "Hash lookup table size does not match NDIS");
318 		return -EINVAL;
319 	}
320 
321 	for (i = 0; i < NDIS_HASH_INDCNT; i++) {
322 		uint16_t idx = i / RTE_RETA_GROUP_SIZE;
323 		uint16_t shift = i % RTE_RETA_GROUP_SIZE;
324 		uint64_t mask = (uint64_t)1 << shift;
325 
326 		if (reta_conf[idx].mask & mask)
327 			reta_conf[idx].reta[shift] = hv->rss_ind[i];
328 	}
329 	return 0;
330 }
331 
332 static void hn_rss_hash_init(struct hn_data *hv,
333 			     const struct rte_eth_rss_conf *rss_conf)
334 {
335 	/* Convert from DPDK RSS hash flags to NDIS hash flags */
336 	hv->rss_hash = NDIS_HASH_FUNCTION_TOEPLITZ;
337 
338 	if (rss_conf->rss_hf & ETH_RSS_IPV4)
339 		hv->rss_hash |= NDIS_HASH_IPV4;
340 	if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
341 		hv->rss_hash |= NDIS_HASH_TCP_IPV4;
342 	if (rss_conf->rss_hf & ETH_RSS_IPV6)
343 		hv->rss_hash |=  NDIS_HASH_IPV6;
344 	if (rss_conf->rss_hf & ETH_RSS_IPV6_EX)
345 		hv->rss_hash |=  NDIS_HASH_IPV6_EX;
346 	if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
347 		hv->rss_hash |= NDIS_HASH_TCP_IPV6;
348 	if (rss_conf->rss_hf & ETH_RSS_IPV6_TCP_EX)
349 		hv->rss_hash |= NDIS_HASH_TCP_IPV6_EX;
350 
351 	memcpy(hv->rss_key, rss_conf->rss_key ? : rss_default_key,
352 	       NDIS_HASH_KEYSIZE_TOEPLITZ);
353 }
354 
355 static int hn_rss_hash_update(struct rte_eth_dev *dev,
356 			      struct rte_eth_rss_conf *rss_conf)
357 {
358 	struct hn_data *hv = dev->data->dev_private;
359 	int err;
360 
361 	PMD_INIT_FUNC_TRACE();
362 
363 	err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE);
364 	if (err) {
365 		PMD_DRV_LOG(NOTICE,
366 			    "rss disable failed");
367 		return err;
368 	}
369 
370 	hn_rss_hash_init(hv, rss_conf);
371 
372 	if (rss_conf->rss_hf != 0) {
373 		err = hn_rndis_conf_rss(hv, 0);
374 		if (err) {
375 			PMD_DRV_LOG(NOTICE,
376 				    "rss reconfig failed (RSS disabled)");
377 			return err;
378 		}
379 	}
380 
381 	return hn_vf_rss_hash_update(dev, rss_conf);
382 }
383 
384 static int hn_rss_hash_conf_get(struct rte_eth_dev *dev,
385 				struct rte_eth_rss_conf *rss_conf)
386 {
387 	struct hn_data *hv = dev->data->dev_private;
388 
389 	PMD_INIT_FUNC_TRACE();
390 
391 	if (hv->ndis_ver < NDIS_VERSION_6_20) {
392 		PMD_DRV_LOG(DEBUG, "RSS not supported on this host");
393 		return -EOPNOTSUPP;
394 	}
395 
396 	rss_conf->rss_key_len = NDIS_HASH_KEYSIZE_TOEPLITZ;
397 	if (rss_conf->rss_key)
398 		memcpy(rss_conf->rss_key, hv->rss_key,
399 		       NDIS_HASH_KEYSIZE_TOEPLITZ);
400 
401 	rss_conf->rss_hf = 0;
402 	if (hv->rss_hash & NDIS_HASH_IPV4)
403 		rss_conf->rss_hf |= ETH_RSS_IPV4;
404 
405 	if (hv->rss_hash & NDIS_HASH_TCP_IPV4)
406 		rss_conf->rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
407 
408 	if (hv->rss_hash & NDIS_HASH_IPV6)
409 		rss_conf->rss_hf |= ETH_RSS_IPV6;
410 
411 	if (hv->rss_hash & NDIS_HASH_IPV6_EX)
412 		rss_conf->rss_hf |= ETH_RSS_IPV6_EX;
413 
414 	if (hv->rss_hash & NDIS_HASH_TCP_IPV6)
415 		rss_conf->rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
416 
417 	if (hv->rss_hash & NDIS_HASH_TCP_IPV6_EX)
418 		rss_conf->rss_hf |= ETH_RSS_IPV6_TCP_EX;
419 
420 	return 0;
421 }
422 
423 static int
424 hn_dev_promiscuous_enable(struct rte_eth_dev *dev)
425 {
426 	struct hn_data *hv = dev->data->dev_private;
427 
428 	hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS);
429 	return hn_vf_promiscuous_enable(dev);
430 }
431 
432 static int
433 hn_dev_promiscuous_disable(struct rte_eth_dev *dev)
434 {
435 	struct hn_data *hv = dev->data->dev_private;
436 	uint32_t filter;
437 
438 	filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST;
439 	if (dev->data->all_multicast)
440 		filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
441 	hn_rndis_set_rxfilter(hv, filter);
442 	return hn_vf_promiscuous_disable(dev);
443 }
444 
445 static int
446 hn_dev_allmulticast_enable(struct rte_eth_dev *dev)
447 {
448 	struct hn_data *hv = dev->data->dev_private;
449 
450 	hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
451 			      NDIS_PACKET_TYPE_ALL_MULTICAST |
452 			NDIS_PACKET_TYPE_BROADCAST);
453 	return hn_vf_allmulticast_enable(dev);
454 }
455 
456 static int
457 hn_dev_allmulticast_disable(struct rte_eth_dev *dev)
458 {
459 	struct hn_data *hv = dev->data->dev_private;
460 
461 	hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
462 			     NDIS_PACKET_TYPE_BROADCAST);
463 	return hn_vf_allmulticast_disable(dev);
464 }
465 
466 static int
467 hn_dev_mc_addr_list(struct rte_eth_dev *dev,
468 		     struct rte_ether_addr *mc_addr_set,
469 		     uint32_t nb_mc_addr)
470 {
471 	/* No filtering on the synthetic path, but can do it on VF */
472 	return hn_vf_mc_addr_list(dev, mc_addr_set, nb_mc_addr);
473 }
474 
475 /* Setup shared rx/tx queue data */
476 static int hn_subchan_configure(struct hn_data *hv,
477 				uint32_t subchan)
478 {
479 	struct vmbus_channel *primary = hn_primary_chan(hv);
480 	int err;
481 	unsigned int retry = 0;
482 
483 	PMD_DRV_LOG(DEBUG,
484 		    "open %u subchannels", subchan);
485 
486 	/* Send create sub channels command */
487 	err = hn_nvs_alloc_subchans(hv, &subchan);
488 	if (err)
489 		return  err;
490 
491 	while (subchan > 0) {
492 		struct vmbus_channel *new_sc;
493 		uint16_t chn_index;
494 
495 		err = rte_vmbus_subchan_open(primary, &new_sc);
496 		if (err == -ENOENT && ++retry < 1000) {
497 			/* This can happen if not ready yet */
498 			rte_delay_ms(10);
499 			continue;
500 		}
501 
502 		if (err) {
503 			PMD_DRV_LOG(ERR,
504 				    "open subchannel failed: %d", err);
505 			return err;
506 		}
507 
508 		rte_vmbus_set_latency(hv->vmbus, new_sc, hv->latency);
509 
510 		retry = 0;
511 		chn_index = rte_vmbus_sub_channel_index(new_sc);
512 		if (chn_index == 0 || chn_index > hv->max_queues) {
513 			PMD_DRV_LOG(ERR,
514 				    "Invalid subchannel offermsg channel %u",
515 				    chn_index);
516 			return -EIO;
517 		}
518 
519 		PMD_DRV_LOG(DEBUG, "new sub channel %u", chn_index);
520 		hv->channels[chn_index] = new_sc;
521 		--subchan;
522 	}
523 
524 	return err;
525 }
526 
527 static int hn_dev_configure(struct rte_eth_dev *dev)
528 {
529 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
530 	struct rte_eth_rss_conf *rss_conf = &dev_conf->rx_adv_conf.rss_conf;
531 	const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode;
532 	const struct rte_eth_txmode *txmode = &dev_conf->txmode;
533 	struct hn_data *hv = dev->data->dev_private;
534 	uint64_t unsupported;
535 	int i, err, subchan;
536 
537 	PMD_INIT_FUNC_TRACE();
538 
539 	if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)
540 		dev_conf->rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH;
541 
542 	unsupported = txmode->offloads & ~HN_TX_OFFLOAD_CAPS;
543 	if (unsupported) {
544 		PMD_DRV_LOG(NOTICE,
545 			    "unsupported TX offload: %#" PRIx64,
546 			    unsupported);
547 		return -EINVAL;
548 	}
549 
550 	unsupported = rxmode->offloads & ~HN_RX_OFFLOAD_CAPS;
551 	if (unsupported) {
552 		PMD_DRV_LOG(NOTICE,
553 			    "unsupported RX offload: %#" PRIx64,
554 			    rxmode->offloads);
555 		return -EINVAL;
556 	}
557 
558 	hv->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
559 
560 	err = hn_rndis_conf_offload(hv, txmode->offloads,
561 				    rxmode->offloads);
562 	if (err) {
563 		PMD_DRV_LOG(NOTICE,
564 			    "offload configure failed");
565 		return err;
566 	}
567 
568 	hv->num_queues = RTE_MAX(dev->data->nb_rx_queues,
569 				 dev->data->nb_tx_queues);
570 
571 	for (i = 0; i < NDIS_HASH_INDCNT; i++)
572 		hv->rss_ind[i] = i % dev->data->nb_rx_queues;
573 
574 	hn_rss_hash_init(hv, rss_conf);
575 
576 	subchan = hv->num_queues - 1;
577 	if (subchan > 0) {
578 		err = hn_subchan_configure(hv, subchan);
579 		if (err) {
580 			PMD_DRV_LOG(NOTICE,
581 				    "subchannel configuration failed");
582 			return err;
583 		}
584 
585 		err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE);
586 		if (err) {
587 			PMD_DRV_LOG(NOTICE,
588 				"rss disable failed");
589 			return err;
590 		}
591 
592 		if (rss_conf->rss_hf != 0) {
593 			err = hn_rndis_conf_rss(hv, 0);
594 			if (err) {
595 				PMD_DRV_LOG(NOTICE,
596 					    "initial RSS config failed");
597 				return err;
598 			}
599 		}
600 	}
601 
602 	return hn_vf_configure(dev, dev_conf);
603 }
604 
605 static int hn_dev_stats_get(struct rte_eth_dev *dev,
606 			    struct rte_eth_stats *stats)
607 {
608 	unsigned int i;
609 
610 	hn_vf_stats_get(dev, stats);
611 
612 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
613 		const struct hn_tx_queue *txq = dev->data->tx_queues[i];
614 
615 		if (!txq)
616 			continue;
617 
618 		stats->opackets += txq->stats.packets;
619 		stats->obytes += txq->stats.bytes;
620 		stats->oerrors += txq->stats.errors;
621 
622 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
623 			stats->q_opackets[i] = txq->stats.packets;
624 			stats->q_obytes[i] = txq->stats.bytes;
625 		}
626 	}
627 
628 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
629 		const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
630 
631 		if (!rxq)
632 			continue;
633 
634 		stats->ipackets += rxq->stats.packets;
635 		stats->ibytes += rxq->stats.bytes;
636 		stats->ierrors += rxq->stats.errors;
637 		stats->imissed += rxq->stats.ring_full;
638 
639 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
640 			stats->q_ipackets[i] = rxq->stats.packets;
641 			stats->q_ibytes[i] = rxq->stats.bytes;
642 		}
643 	}
644 
645 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
646 	return 0;
647 }
648 
649 static int
650 hn_dev_stats_reset(struct rte_eth_dev *dev)
651 {
652 	unsigned int i;
653 
654 	PMD_INIT_FUNC_TRACE();
655 
656 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
657 		struct hn_tx_queue *txq = dev->data->tx_queues[i];
658 
659 		if (!txq)
660 			continue;
661 		memset(&txq->stats, 0, sizeof(struct hn_stats));
662 	}
663 
664 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
665 		struct hn_rx_queue *rxq = dev->data->rx_queues[i];
666 
667 		if (!rxq)
668 			continue;
669 
670 		memset(&rxq->stats, 0, sizeof(struct hn_stats));
671 	}
672 
673 	return 0;
674 }
675 
676 static int
677 hn_dev_xstats_reset(struct rte_eth_dev *dev)
678 {
679 	int ret;
680 
681 	ret = hn_dev_stats_reset(dev);
682 	if (ret != 0)
683 		return 0;
684 
685 	return hn_vf_xstats_reset(dev);
686 }
687 
688 static int
689 hn_dev_xstats_count(struct rte_eth_dev *dev)
690 {
691 	int ret, count;
692 
693 	count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings);
694 	count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
695 
696 	ret = hn_vf_xstats_get_names(dev, NULL, 0);
697 	if (ret < 0)
698 		return ret;
699 
700 	return count + ret;
701 }
702 
703 static int
704 hn_dev_xstats_get_names(struct rte_eth_dev *dev,
705 			struct rte_eth_xstat_name *xstats_names,
706 			unsigned int limit)
707 {
708 	unsigned int i, t, count = 0;
709 	int ret;
710 
711 	if (!xstats_names)
712 		return hn_dev_xstats_count(dev);
713 
714 	/* Note: limit checked in rte_eth_xstats_names() */
715 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
716 		const struct hn_tx_queue *txq = dev->data->tx_queues[i];
717 
718 		if (!txq)
719 			continue;
720 
721 		if (count >= limit)
722 			break;
723 
724 		for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
725 			snprintf(xstats_names[count++].name,
726 				 RTE_ETH_XSTATS_NAME_SIZE,
727 				 "tx_q%u_%s", i, hn_stat_strings[t].name);
728 	}
729 
730 	for (i = 0; i < dev->data->nb_rx_queues; i++)  {
731 		const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
732 
733 		if (!rxq)
734 			continue;
735 
736 		if (count >= limit)
737 			break;
738 
739 		for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
740 			snprintf(xstats_names[count++].name,
741 				 RTE_ETH_XSTATS_NAME_SIZE,
742 				 "rx_q%u_%s", i,
743 				 hn_stat_strings[t].name);
744 	}
745 
746 	ret = hn_vf_xstats_get_names(dev, xstats_names + count,
747 				     limit - count);
748 	if (ret < 0)
749 		return ret;
750 
751 	return count + ret;
752 }
753 
754 static int
755 hn_dev_xstats_get(struct rte_eth_dev *dev,
756 		  struct rte_eth_xstat *xstats,
757 		  unsigned int n)
758 {
759 	unsigned int i, t, count = 0;
760 	const unsigned int nstats = hn_dev_xstats_count(dev);
761 	const char *stats;
762 	int ret;
763 
764 	PMD_INIT_FUNC_TRACE();
765 
766 	if (n < nstats)
767 		return nstats;
768 
769 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
770 		const struct hn_tx_queue *txq = dev->data->tx_queues[i];
771 
772 		if (!txq)
773 			continue;
774 
775 		stats = (const char *)&txq->stats;
776 		for (t = 0; t < RTE_DIM(hn_stat_strings); t++, count++) {
777 			xstats[count].id = count;
778 			xstats[count].value = *(const uint64_t *)
779 				(stats + hn_stat_strings[t].offset);
780 		}
781 	}
782 
783 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
784 		const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
785 
786 		if (!rxq)
787 			continue;
788 
789 		stats = (const char *)&rxq->stats;
790 		for (t = 0; t < RTE_DIM(hn_stat_strings); t++, count++) {
791 			xstats[count].id = count;
792 			xstats[count].value = *(const uint64_t *)
793 				(stats + hn_stat_strings[t].offset);
794 		}
795 	}
796 
797 	ret = hn_vf_xstats_get(dev, xstats, count, n);
798 	if (ret < 0)
799 		return ret;
800 
801 	return count + ret;
802 }
803 
804 static int
805 hn_dev_start(struct rte_eth_dev *dev)
806 {
807 	struct hn_data *hv = dev->data->dev_private;
808 	int error;
809 
810 	PMD_INIT_FUNC_TRACE();
811 
812 	error = hn_rndis_set_rxfilter(hv,
813 				      NDIS_PACKET_TYPE_BROADCAST |
814 				      NDIS_PACKET_TYPE_ALL_MULTICAST |
815 				      NDIS_PACKET_TYPE_DIRECTED);
816 	if (error)
817 		return error;
818 
819 	error = hn_vf_start(dev);
820 	if (error)
821 		hn_rndis_set_rxfilter(hv, 0);
822 
823 	/* Initialize Link state */
824 	if (error == 0)
825 		hn_dev_link_update(dev, 0);
826 
827 	return error;
828 }
829 
830 static int
831 hn_dev_stop(struct rte_eth_dev *dev)
832 {
833 	struct hn_data *hv = dev->data->dev_private;
834 
835 	PMD_INIT_FUNC_TRACE();
836 	dev->data->dev_started = 0;
837 
838 	hn_rndis_set_rxfilter(hv, 0);
839 	return hn_vf_stop(dev);
840 }
841 
842 static int
843 hn_dev_close(struct rte_eth_dev *dev)
844 {
845 	int ret;
846 
847 	PMD_INIT_FUNC_TRACE();
848 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
849 		return 0;
850 
851 	ret = hn_vf_close(dev);
852 	hn_dev_free_queues(dev);
853 
854 	return ret;
855 }
856 
857 static const struct eth_dev_ops hn_eth_dev_ops = {
858 	.dev_configure		= hn_dev_configure,
859 	.dev_start		= hn_dev_start,
860 	.dev_stop		= hn_dev_stop,
861 	.dev_close		= hn_dev_close,
862 	.dev_infos_get		= hn_dev_info_get,
863 	.txq_info_get		= hn_dev_tx_queue_info,
864 	.rxq_info_get		= hn_dev_rx_queue_info,
865 	.dev_supported_ptypes_get = hn_vf_supported_ptypes,
866 	.promiscuous_enable     = hn_dev_promiscuous_enable,
867 	.promiscuous_disable    = hn_dev_promiscuous_disable,
868 	.allmulticast_enable    = hn_dev_allmulticast_enable,
869 	.allmulticast_disable   = hn_dev_allmulticast_disable,
870 	.set_mc_addr_list	= hn_dev_mc_addr_list,
871 	.reta_update		= hn_rss_reta_update,
872 	.reta_query             = hn_rss_reta_query,
873 	.rss_hash_update	= hn_rss_hash_update,
874 	.rss_hash_conf_get      = hn_rss_hash_conf_get,
875 	.tx_queue_setup		= hn_dev_tx_queue_setup,
876 	.tx_queue_release	= hn_dev_tx_queue_release,
877 	.tx_done_cleanup        = hn_dev_tx_done_cleanup,
878 	.rx_queue_setup		= hn_dev_rx_queue_setup,
879 	.rx_queue_release	= hn_dev_rx_queue_release,
880 	.link_update		= hn_dev_link_update,
881 	.stats_get		= hn_dev_stats_get,
882 	.stats_reset            = hn_dev_stats_reset,
883 	.xstats_get		= hn_dev_xstats_get,
884 	.xstats_get_names	= hn_dev_xstats_get_names,
885 	.xstats_reset		= hn_dev_xstats_reset,
886 };
887 
888 /*
889  * Setup connection between PMD and kernel.
890  */
891 static int
892 hn_attach(struct hn_data *hv, unsigned int mtu)
893 {
894 	int error;
895 
896 	/* Attach NVS */
897 	error = hn_nvs_attach(hv, mtu);
898 	if (error)
899 		goto failed_nvs;
900 
901 	/* Attach RNDIS */
902 	error = hn_rndis_attach(hv);
903 	if (error)
904 		goto failed_rndis;
905 
906 	/*
907 	 * NOTE:
908 	 * Under certain conditions on certain versions of Hyper-V,
909 	 * the RNDIS rxfilter is _not_ zero on the hypervisor side
910 	 * after the successful RNDIS initialization.
911 	 */
912 	hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_NONE);
913 	return 0;
914 failed_rndis:
915 	hn_nvs_detach(hv);
916 failed_nvs:
917 	return error;
918 }
919 
920 static void
921 hn_detach(struct hn_data *hv)
922 {
923 	hn_nvs_detach(hv);
924 	hn_rndis_detach(hv);
925 }
926 
927 static int
928 eth_hn_dev_init(struct rte_eth_dev *eth_dev)
929 {
930 	struct hn_data *hv = eth_dev->data->dev_private;
931 	struct rte_device *device = eth_dev->device;
932 	struct rte_vmbus_device *vmbus;
933 	unsigned int rxr_cnt;
934 	int err, max_chan;
935 
936 	PMD_INIT_FUNC_TRACE();
937 
938 	vmbus = container_of(device, struct rte_vmbus_device, device);
939 	eth_dev->dev_ops = &hn_eth_dev_ops;
940 	eth_dev->rx_queue_count = hn_dev_rx_queue_count;
941 	eth_dev->rx_descriptor_status = hn_dev_rx_queue_status;
942 	eth_dev->tx_descriptor_status = hn_dev_tx_descriptor_status;
943 	eth_dev->tx_pkt_burst = &hn_xmit_pkts;
944 	eth_dev->rx_pkt_burst = &hn_recv_pkts;
945 
946 	/*
947 	 * for secondary processes, we don't initialize any further as primary
948 	 * has already done this work.
949 	 */
950 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
951 		return 0;
952 
953 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
954 
955 	/* Since Hyper-V only supports one MAC address */
956 	eth_dev->data->mac_addrs = rte_calloc("hv_mac", HN_MAX_MAC_ADDRS,
957 					      sizeof(struct rte_ether_addr), 0);
958 	if (eth_dev->data->mac_addrs == NULL) {
959 		PMD_INIT_LOG(ERR,
960 			     "Failed to allocate memory store MAC addresses");
961 		return -ENOMEM;
962 	}
963 
964 	hv->vmbus = vmbus;
965 	hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP];
966 	hv->chim_res  = &vmbus->resource[HV_SEND_BUF_MAP];
967 	hv->port_id = eth_dev->data->port_id;
968 	hv->latency = HN_CHAN_LATENCY_NS;
969 	hv->max_queues = 1;
970 	rte_rwlock_init(&hv->vf_lock);
971 	hv->vf_port = HN_INVALID_PORT;
972 
973 	err = hn_parse_args(eth_dev);
974 	if (err)
975 		return err;
976 
977 	strlcpy(hv->owner.name, eth_dev->device->name,
978 		RTE_ETH_MAX_OWNER_NAME_LEN);
979 	err = rte_eth_dev_owner_new(&hv->owner.id);
980 	if (err) {
981 		PMD_INIT_LOG(ERR, "Can not get owner id");
982 		return err;
983 	}
984 
985 	/* Initialize primary channel input for control operations */
986 	err = rte_vmbus_chan_open(vmbus, &hv->channels[0]);
987 	if (err)
988 		return err;
989 
990 	rte_vmbus_set_latency(hv->vmbus, hv->channels[0], hv->latency);
991 
992 	hv->primary = hn_rx_queue_alloc(hv, 0,
993 					eth_dev->device->numa_node);
994 
995 	if (!hv->primary)
996 		return -ENOMEM;
997 
998 	err = hn_attach(hv, RTE_ETHER_MTU);
999 	if  (err)
1000 		goto failed;
1001 
1002 	err = hn_chim_init(eth_dev);
1003 	if (err)
1004 		goto failed;
1005 
1006 	err = hn_rndis_get_eaddr(hv, eth_dev->data->mac_addrs->addr_bytes);
1007 	if (err)
1008 		goto failed;
1009 
1010 	/* Multi queue requires later versions of windows server */
1011 	if (hv->nvs_ver < NVS_VERSION_5)
1012 		return 0;
1013 
1014 	max_chan = rte_vmbus_max_channels(vmbus);
1015 	PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan);
1016 	if (max_chan <= 0)
1017 		goto failed;
1018 
1019 	if (hn_rndis_query_rsscaps(hv, &rxr_cnt) != 0)
1020 		rxr_cnt = 1;
1021 
1022 	hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
1023 
1024 	/* If VF was reported but not added, do it now */
1025 	if (hv->vf_present && !hn_vf_attached(hv)) {
1026 		PMD_INIT_LOG(DEBUG, "Adding VF device");
1027 
1028 		err = hn_vf_add(eth_dev, hv);
1029 		if (err)
1030 			hv->vf_present = 0;
1031 	}
1032 
1033 	return 0;
1034 
1035 failed:
1036 	PMD_INIT_LOG(NOTICE, "device init failed");
1037 
1038 	hn_chim_uninit(eth_dev);
1039 	hn_detach(hv);
1040 	return err;
1041 }
1042 
1043 static int
1044 eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
1045 {
1046 	struct hn_data *hv = eth_dev->data->dev_private;
1047 	int ret, ret_stop;
1048 
1049 	PMD_INIT_FUNC_TRACE();
1050 
1051 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1052 		return 0;
1053 
1054 	ret_stop = hn_dev_stop(eth_dev);
1055 	hn_dev_close(eth_dev);
1056 
1057 	hn_detach(hv);
1058 	hn_chim_uninit(eth_dev);
1059 	rte_vmbus_chan_close(hv->primary->chan);
1060 	rte_free(hv->primary);
1061 	ret = rte_eth_dev_owner_delete(hv->owner.id);
1062 	if (ret != 0)
1063 		return ret;
1064 
1065 	return ret_stop;
1066 }
1067 
1068 static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
1069 			struct rte_vmbus_device *dev)
1070 {
1071 	struct rte_eth_dev *eth_dev;
1072 	int ret;
1073 
1074 	PMD_INIT_FUNC_TRACE();
1075 
1076 	eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data));
1077 	if (!eth_dev)
1078 		return -ENOMEM;
1079 
1080 	ret = eth_hn_dev_init(eth_dev);
1081 	if (ret)
1082 		eth_dev_vmbus_release(eth_dev);
1083 	else
1084 		rte_eth_dev_probing_finish(eth_dev);
1085 
1086 	return ret;
1087 }
1088 
1089 static int eth_hn_remove(struct rte_vmbus_device *dev)
1090 {
1091 	struct rte_eth_dev *eth_dev;
1092 	int ret;
1093 
1094 	PMD_INIT_FUNC_TRACE();
1095 
1096 	eth_dev = rte_eth_dev_allocated(dev->device.name);
1097 	if (!eth_dev)
1098 		return 0; /* port already released */
1099 
1100 	ret = eth_hn_dev_uninit(eth_dev);
1101 	if (ret)
1102 		return ret;
1103 
1104 	eth_dev_vmbus_release(eth_dev);
1105 	return 0;
1106 }
1107 
1108 /* Network device GUID */
1109 static const rte_uuid_t hn_net_ids[] = {
1110 	/*  f8615163-df3e-46c5-913f-f2d2f965ed0e */
1111 	RTE_UUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2d2f965ed0eULL),
1112 	{ 0 }
1113 };
1114 
1115 static struct rte_vmbus_driver rte_netvsc_pmd = {
1116 	.id_table = hn_net_ids,
1117 	.probe = eth_hn_probe,
1118 	.remove = eth_hn_remove,
1119 };
1120 
1121 RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd);
1122 RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic");
1123 RTE_LOG_REGISTER(hn_logtype_init, pmd.net.netvsc.init, NOTICE);
1124 RTE_LOG_REGISTER(hn_logtype_driver, pmd.net.netvsc.driver, NOTICE);
1125