1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <dirent.h> 13 #include <net/if.h> 14 #include <net/if_arp.h> 15 #include <netinet/in.h> 16 #include <sys/ioctl.h> 17 18 #include <rte_ethdev.h> 19 #include <rte_memcpy.h> 20 #include <rte_string_fns.h> 21 #include <rte_memzone.h> 22 #include <rte_devargs.h> 23 #include <rte_malloc.h> 24 #include <rte_kvargs.h> 25 #include <rte_atomic.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 #include <ethdev_driver.h> 29 #include <rte_cycles.h> 30 #include <rte_errno.h> 31 #include <rte_memory.h> 32 #include <rte_eal.h> 33 #include <dev_driver.h> 34 #include <bus_driver.h> 35 #include <bus_vmbus_driver.h> 36 #include <rte_alarm.h> 37 38 #include "hn_logs.h" 39 #include "hn_var.h" 40 #include "hn_rndis.h" 41 #include "hn_nvs.h" 42 #include "ndis.h" 43 44 #define HN_TX_OFFLOAD_CAPS (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \ 45 RTE_ETH_TX_OFFLOAD_TCP_CKSUM | \ 46 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \ 47 RTE_ETH_TX_OFFLOAD_TCP_TSO | \ 48 RTE_ETH_TX_OFFLOAD_MULTI_SEGS | \ 49 RTE_ETH_TX_OFFLOAD_VLAN_INSERT) 50 51 #define HN_RX_OFFLOAD_CAPS (RTE_ETH_RX_OFFLOAD_CHECKSUM | \ 52 RTE_ETH_RX_OFFLOAD_VLAN_STRIP | \ 53 RTE_ETH_RX_OFFLOAD_RSS_HASH) 54 55 #define NETVSC_ARG_LATENCY "latency" 56 #define NETVSC_ARG_RXBREAK "rx_copybreak" 57 #define NETVSC_ARG_TXBREAK "tx_copybreak" 58 #define NETVSC_ARG_RX_EXTMBUF_ENABLE "rx_extmbuf_enable" 59 60 /* The max number of retry when hot adding a VF device */ 61 #define NETVSC_MAX_HOTADD_RETRY 10 62 63 struct hn_xstats_name_off { 64 char name[RTE_ETH_XSTATS_NAME_SIZE]; 65 unsigned int offset; 66 }; 67 68 static const struct hn_xstats_name_off hn_stat_strings[] = { 69 { "good_packets", offsetof(struct hn_stats, packets) }, 70 { "good_bytes", offsetof(struct hn_stats, bytes) }, 71 { "errors", offsetof(struct hn_stats, errors) }, 72 { "ring full", offsetof(struct hn_stats, ring_full) }, 73 { "channel full", offsetof(struct hn_stats, channel_full) }, 74 { "multicast_packets", offsetof(struct hn_stats, multicast) }, 75 { "broadcast_packets", offsetof(struct hn_stats, broadcast) }, 76 { "undersize_packets", offsetof(struct hn_stats, size_bins[0]) }, 77 { "size_64_packets", offsetof(struct hn_stats, size_bins[1]) }, 78 { "size_65_127_packets", offsetof(struct hn_stats, size_bins[2]) }, 79 { "size_128_255_packets", offsetof(struct hn_stats, size_bins[3]) }, 80 { "size_256_511_packets", offsetof(struct hn_stats, size_bins[4]) }, 81 { "size_512_1023_packets", offsetof(struct hn_stats, size_bins[5]) }, 82 { "size_1024_1518_packets", offsetof(struct hn_stats, size_bins[6]) }, 83 { "size_1519_max_packets", offsetof(struct hn_stats, size_bins[7]) }, 84 }; 85 86 /* The default RSS key. 87 * This value is the same as MLX5 so that flows will be 88 * received on same path for both VF and synthetic NIC. 89 */ 90 static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 91 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 92 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 93 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 94 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 95 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a, 96 }; 97 98 static struct rte_eth_dev * 99 eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size) 100 { 101 struct rte_eth_dev *eth_dev; 102 const char *name; 103 104 if (!dev) 105 return NULL; 106 107 name = dev->device.name; 108 109 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 110 eth_dev = rte_eth_dev_allocate(name); 111 if (!eth_dev) { 112 PMD_DRV_LOG(NOTICE, "can not allocate rte ethdev"); 113 return NULL; 114 } 115 116 if (private_data_size) { 117 eth_dev->data->dev_private = 118 rte_zmalloc_socket(name, private_data_size, 119 RTE_CACHE_LINE_SIZE, dev->device.numa_node); 120 if (!eth_dev->data->dev_private) { 121 PMD_DRV_LOG(NOTICE, "can not allocate driver data"); 122 rte_eth_dev_release_port(eth_dev); 123 return NULL; 124 } 125 } 126 } else { 127 eth_dev = rte_eth_dev_attach_secondary(name); 128 if (!eth_dev) { 129 PMD_DRV_LOG(NOTICE, "can not attach secondary"); 130 return NULL; 131 } 132 } 133 134 eth_dev->device = &dev->device; 135 136 /* interrupt is simulated */ 137 rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_EXT); 138 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; 139 eth_dev->intr_handle = dev->intr_handle; 140 141 return eth_dev; 142 } 143 144 static void 145 eth_dev_vmbus_release(struct rte_eth_dev *eth_dev) 146 { 147 /* free ether device */ 148 rte_eth_dev_release_port(eth_dev); 149 150 eth_dev->device = NULL; 151 eth_dev->intr_handle = NULL; 152 } 153 154 static int hn_set_parameter(const char *key, const char *value, void *opaque) 155 { 156 struct hn_data *hv = opaque; 157 char *endp = NULL; 158 unsigned long v; 159 160 v = strtoul(value, &endp, 0); 161 if (*value == '\0' || *endp != '\0') { 162 PMD_DRV_LOG(ERR, "invalid parameter %s=%s", key, value); 163 return -EINVAL; 164 } 165 166 if (!strcmp(key, NETVSC_ARG_LATENCY)) { 167 /* usec to nsec */ 168 hv->latency = v * 1000; 169 PMD_DRV_LOG(DEBUG, "set latency %u usec", hv->latency); 170 } else if (!strcmp(key, NETVSC_ARG_RXBREAK)) { 171 hv->rx_copybreak = v; 172 PMD_DRV_LOG(DEBUG, "rx copy break set to %u", 173 hv->rx_copybreak); 174 } else if (!strcmp(key, NETVSC_ARG_TXBREAK)) { 175 hv->tx_copybreak = v; 176 PMD_DRV_LOG(DEBUG, "tx copy break set to %u", 177 hv->tx_copybreak); 178 } else if (!strcmp(key, NETVSC_ARG_RX_EXTMBUF_ENABLE)) { 179 hv->rx_extmbuf_enable = v; 180 PMD_DRV_LOG(DEBUG, "rx extmbuf enable set to %u", 181 hv->rx_extmbuf_enable); 182 } 183 184 return 0; 185 } 186 187 /* Parse device arguments */ 188 static int hn_parse_args(const struct rte_eth_dev *dev) 189 { 190 struct hn_data *hv = dev->data->dev_private; 191 struct rte_devargs *devargs = dev->device->devargs; 192 static const char * const valid_keys[] = { 193 NETVSC_ARG_LATENCY, 194 NETVSC_ARG_RXBREAK, 195 NETVSC_ARG_TXBREAK, 196 NETVSC_ARG_RX_EXTMBUF_ENABLE, 197 NULL 198 }; 199 struct rte_kvargs *kvlist; 200 int ret; 201 202 if (!devargs) 203 return 0; 204 205 PMD_INIT_LOG(DEBUG, "device args %s %s", 206 devargs->name, devargs->args); 207 208 kvlist = rte_kvargs_parse(devargs->args, valid_keys); 209 if (!kvlist) { 210 PMD_DRV_LOG(ERR, "invalid parameters"); 211 return -EINVAL; 212 } 213 214 ret = rte_kvargs_process(kvlist, NULL, hn_set_parameter, hv); 215 rte_kvargs_free(kvlist); 216 217 return ret; 218 } 219 220 /* Update link status. 221 * Note: the DPDK definition of "wait_to_complete" 222 * means block this call until link is up. 223 * which is not worth supporting. 224 */ 225 int 226 hn_dev_link_update(struct rte_eth_dev *dev, 227 int wait_to_complete __rte_unused) 228 { 229 struct hn_data *hv = dev->data->dev_private; 230 struct rte_eth_link link, old; 231 int error; 232 233 old = dev->data->dev_link; 234 235 error = hn_rndis_get_linkstatus(hv); 236 if (error) 237 return error; 238 239 hn_rndis_get_linkspeed(hv); 240 241 link = (struct rte_eth_link) { 242 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 243 .link_autoneg = RTE_ETH_LINK_SPEED_FIXED, 244 .link_speed = hv->link_speed / 10000, 245 }; 246 247 if (hv->link_status == NDIS_MEDIA_STATE_CONNECTED) 248 link.link_status = RTE_ETH_LINK_UP; 249 else 250 link.link_status = RTE_ETH_LINK_DOWN; 251 252 if (old.link_status == link.link_status) 253 return 0; 254 255 PMD_INIT_LOG(DEBUG, "Port %d is %s", dev->data->port_id, 256 (link.link_status == RTE_ETH_LINK_UP) ? "up" : "down"); 257 258 return rte_eth_linkstatus_set(dev, &link); 259 } 260 261 static int hn_dev_info_get(struct rte_eth_dev *dev, 262 struct rte_eth_dev_info *dev_info) 263 { 264 struct hn_data *hv = dev->data->dev_private; 265 int rc; 266 267 dev_info->speed_capa = RTE_ETH_LINK_SPEED_10G; 268 dev_info->min_rx_bufsize = HN_MIN_RX_BUF_SIZE; 269 dev_info->max_rx_pktlen = HN_MAX_XFER_LEN; 270 dev_info->max_mac_addrs = 1; 271 272 dev_info->hash_key_size = NDIS_HASH_KEYSIZE_TOEPLITZ; 273 dev_info->flow_type_rss_offloads = hv->rss_offloads; 274 dev_info->reta_size = RTE_ETH_RSS_RETA_SIZE_128; 275 276 dev_info->max_rx_queues = hv->max_queues; 277 dev_info->max_tx_queues = hv->max_queues; 278 279 dev_info->tx_desc_lim.nb_min = 1; 280 dev_info->tx_desc_lim.nb_max = 4096; 281 282 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 283 return 0; 284 285 /* fills in rx and tx offload capability */ 286 rc = hn_rndis_get_offload(hv, dev_info); 287 if (rc != 0) 288 return rc; 289 290 /* merges the offload and queues of vf */ 291 return hn_vf_info_get(hv, dev_info); 292 } 293 294 static int hn_rss_reta_update(struct rte_eth_dev *dev, 295 struct rte_eth_rss_reta_entry64 *reta_conf, 296 uint16_t reta_size) 297 { 298 struct hn_data *hv = dev->data->dev_private; 299 unsigned int i; 300 int err; 301 302 PMD_INIT_FUNC_TRACE(); 303 304 if (reta_size != NDIS_HASH_INDCNT) { 305 PMD_DRV_LOG(ERR, "Hash lookup table size does not match NDIS"); 306 return -EINVAL; 307 } 308 309 for (i = 0; i < NDIS_HASH_INDCNT; i++) { 310 uint16_t idx = i / RTE_ETH_RETA_GROUP_SIZE; 311 uint16_t shift = i % RTE_ETH_RETA_GROUP_SIZE; 312 uint64_t mask = (uint64_t)1 << shift; 313 314 if (reta_conf[idx].mask & mask) 315 hv->rss_ind[i] = reta_conf[idx].reta[shift]; 316 317 /* 318 * Ensure we don't allow config that directs traffic to an Rx 319 * queue that we aren't going to poll 320 */ 321 if (hv->rss_ind[i] >= dev->data->nb_rx_queues) { 322 PMD_DRV_LOG(ERR, "RSS distributing traffic to invalid Rx queue"); 323 return -EINVAL; 324 } 325 } 326 327 err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); 328 if (err) { 329 PMD_DRV_LOG(NOTICE, 330 "rss disable failed"); 331 return err; 332 } 333 334 err = hn_rndis_conf_rss(hv, 0); 335 if (err) { 336 PMD_DRV_LOG(NOTICE, 337 "reta reconfig failed"); 338 return err; 339 } 340 341 return hn_vf_reta_hash_update(dev, reta_conf, reta_size); 342 } 343 344 static int hn_rss_reta_query(struct rte_eth_dev *dev, 345 struct rte_eth_rss_reta_entry64 *reta_conf, 346 uint16_t reta_size) 347 { 348 struct hn_data *hv = dev->data->dev_private; 349 unsigned int i; 350 351 PMD_INIT_FUNC_TRACE(); 352 353 if (reta_size != NDIS_HASH_INDCNT) { 354 PMD_DRV_LOG(ERR, "Hash lookup table size does not match NDIS"); 355 return -EINVAL; 356 } 357 358 for (i = 0; i < NDIS_HASH_INDCNT; i++) { 359 uint16_t idx = i / RTE_ETH_RETA_GROUP_SIZE; 360 uint16_t shift = i % RTE_ETH_RETA_GROUP_SIZE; 361 uint64_t mask = (uint64_t)1 << shift; 362 363 if (reta_conf[idx].mask & mask) 364 reta_conf[idx].reta[shift] = hv->rss_ind[i]; 365 } 366 return 0; 367 } 368 369 static void hn_rss_hash_init(struct hn_data *hv, 370 const struct rte_eth_rss_conf *rss_conf) 371 { 372 /* Convert from DPDK RSS hash flags to NDIS hash flags */ 373 hv->rss_hash = NDIS_HASH_FUNCTION_TOEPLITZ; 374 375 if (rss_conf->rss_hf & RTE_ETH_RSS_IPV4) 376 hv->rss_hash |= NDIS_HASH_IPV4; 377 if (rss_conf->rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP) 378 hv->rss_hash |= NDIS_HASH_TCP_IPV4; 379 if (rss_conf->rss_hf & RTE_ETH_RSS_IPV6) 380 hv->rss_hash |= NDIS_HASH_IPV6; 381 if (rss_conf->rss_hf & RTE_ETH_RSS_IPV6_EX) 382 hv->rss_hash |= NDIS_HASH_IPV6_EX; 383 if (rss_conf->rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP) 384 hv->rss_hash |= NDIS_HASH_TCP_IPV6; 385 if (rss_conf->rss_hf & RTE_ETH_RSS_IPV6_TCP_EX) 386 hv->rss_hash |= NDIS_HASH_TCP_IPV6_EX; 387 388 memcpy(hv->rss_key, rss_conf->rss_key ? : rss_default_key, 389 NDIS_HASH_KEYSIZE_TOEPLITZ); 390 } 391 392 static int hn_rss_hash_update(struct rte_eth_dev *dev, 393 struct rte_eth_rss_conf *rss_conf) 394 { 395 struct hn_data *hv = dev->data->dev_private; 396 int err; 397 398 PMD_INIT_FUNC_TRACE(); 399 400 err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); 401 if (err) { 402 PMD_DRV_LOG(NOTICE, 403 "rss disable failed"); 404 return err; 405 } 406 407 hn_rss_hash_init(hv, rss_conf); 408 409 if (rss_conf->rss_hf != 0) { 410 err = hn_rndis_conf_rss(hv, 0); 411 if (err) { 412 PMD_DRV_LOG(NOTICE, 413 "rss reconfig failed (RSS disabled)"); 414 return err; 415 } 416 } 417 418 return hn_vf_rss_hash_update(dev, rss_conf); 419 } 420 421 static int hn_rss_hash_conf_get(struct rte_eth_dev *dev, 422 struct rte_eth_rss_conf *rss_conf) 423 { 424 struct hn_data *hv = dev->data->dev_private; 425 426 PMD_INIT_FUNC_TRACE(); 427 428 if (hv->ndis_ver < NDIS_VERSION_6_20) { 429 PMD_DRV_LOG(DEBUG, "RSS not supported on this host"); 430 return -EOPNOTSUPP; 431 } 432 433 rss_conf->rss_key_len = NDIS_HASH_KEYSIZE_TOEPLITZ; 434 if (rss_conf->rss_key) 435 memcpy(rss_conf->rss_key, hv->rss_key, 436 NDIS_HASH_KEYSIZE_TOEPLITZ); 437 438 rss_conf->rss_hf = 0; 439 if (hv->rss_hash & NDIS_HASH_IPV4) 440 rss_conf->rss_hf |= RTE_ETH_RSS_IPV4; 441 442 if (hv->rss_hash & NDIS_HASH_TCP_IPV4) 443 rss_conf->rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_TCP; 444 445 if (hv->rss_hash & NDIS_HASH_IPV6) 446 rss_conf->rss_hf |= RTE_ETH_RSS_IPV6; 447 448 if (hv->rss_hash & NDIS_HASH_IPV6_EX) 449 rss_conf->rss_hf |= RTE_ETH_RSS_IPV6_EX; 450 451 if (hv->rss_hash & NDIS_HASH_TCP_IPV6) 452 rss_conf->rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_TCP; 453 454 if (hv->rss_hash & NDIS_HASH_TCP_IPV6_EX) 455 rss_conf->rss_hf |= RTE_ETH_RSS_IPV6_TCP_EX; 456 457 return 0; 458 } 459 460 static int 461 hn_dev_promiscuous_enable(struct rte_eth_dev *dev) 462 { 463 struct hn_data *hv = dev->data->dev_private; 464 465 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS); 466 return hn_vf_promiscuous_enable(dev); 467 } 468 469 static int 470 hn_dev_promiscuous_disable(struct rte_eth_dev *dev) 471 { 472 struct hn_data *hv = dev->data->dev_private; 473 uint32_t filter; 474 475 filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; 476 if (dev->data->all_multicast) 477 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 478 hn_rndis_set_rxfilter(hv, filter); 479 return hn_vf_promiscuous_disable(dev); 480 } 481 482 static int 483 hn_dev_allmulticast_enable(struct rte_eth_dev *dev) 484 { 485 struct hn_data *hv = dev->data->dev_private; 486 487 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | 488 NDIS_PACKET_TYPE_ALL_MULTICAST | 489 NDIS_PACKET_TYPE_BROADCAST); 490 return hn_vf_allmulticast_enable(dev); 491 } 492 493 static int 494 hn_dev_allmulticast_disable(struct rte_eth_dev *dev) 495 { 496 struct hn_data *hv = dev->data->dev_private; 497 498 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | 499 NDIS_PACKET_TYPE_BROADCAST); 500 return hn_vf_allmulticast_disable(dev); 501 } 502 503 static int 504 hn_dev_mc_addr_list(struct rte_eth_dev *dev, 505 struct rte_ether_addr *mc_addr_set, 506 uint32_t nb_mc_addr) 507 { 508 /* No filtering on the synthetic path, but can do it on VF */ 509 return hn_vf_mc_addr_list(dev, mc_addr_set, nb_mc_addr); 510 } 511 512 /* Setup shared rx/tx queue data */ 513 static int hn_subchan_configure(struct hn_data *hv, 514 uint32_t subchan) 515 { 516 struct vmbus_channel *primary = hn_primary_chan(hv); 517 int err; 518 unsigned int retry = 0; 519 520 PMD_DRV_LOG(DEBUG, 521 "open %u subchannels", subchan); 522 523 /* Send create sub channels command */ 524 err = hn_nvs_alloc_subchans(hv, &subchan); 525 if (err) 526 return err; 527 528 while (subchan > 0) { 529 struct vmbus_channel *new_sc; 530 uint16_t chn_index; 531 532 err = rte_vmbus_subchan_open(primary, &new_sc); 533 if (err == -ENOENT && ++retry < 1000) { 534 /* This can happen if not ready yet */ 535 rte_delay_ms(10); 536 continue; 537 } 538 539 if (err) { 540 PMD_DRV_LOG(ERR, 541 "open subchannel failed: %d", err); 542 return err; 543 } 544 545 rte_vmbus_set_latency(hv->vmbus, new_sc, hv->latency); 546 547 retry = 0; 548 chn_index = rte_vmbus_sub_channel_index(new_sc); 549 if (chn_index == 0 || chn_index > hv->max_queues) { 550 PMD_DRV_LOG(ERR, 551 "Invalid subchannel offermsg channel %u", 552 chn_index); 553 return -EIO; 554 } 555 556 PMD_DRV_LOG(DEBUG, "new sub channel %u", chn_index); 557 hv->channels[chn_index] = new_sc; 558 --subchan; 559 } 560 561 return err; 562 } 563 564 static void netvsc_hotplug_retry(void *args) 565 { 566 int ret; 567 struct hv_hotadd_context *hot_ctx = args; 568 struct hn_data *hv = hot_ctx->hv; 569 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 570 struct rte_devargs *d = &hot_ctx->da; 571 char buf[256]; 572 573 DIR *di; 574 struct dirent *dir; 575 struct ifreq req; 576 struct rte_ether_addr eth_addr; 577 int s; 578 579 PMD_DRV_LOG(DEBUG, "%s: retry count %d", 580 __func__, hot_ctx->eal_hot_plug_retry); 581 582 if (hot_ctx->eal_hot_plug_retry++ > NETVSC_MAX_HOTADD_RETRY) { 583 PMD_DRV_LOG(NOTICE, "Failed to parse PCI device retry=%d", 584 hot_ctx->eal_hot_plug_retry); 585 goto free_hotadd_ctx; 586 } 587 588 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/net", d->name); 589 di = opendir(buf); 590 if (!di) { 591 PMD_DRV_LOG(DEBUG, "%s: can't open directory %s, " 592 "retrying in 1 second", __func__, buf); 593 goto retry; 594 } 595 596 while ((dir = readdir(di))) { 597 /* Skip . and .. directories */ 598 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 599 continue; 600 601 /* trying to get mac address if this is a network device*/ 602 s = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 603 if (s == -1) { 604 PMD_DRV_LOG(ERR, "Failed to create socket errno %d", 605 errno); 606 break; 607 } 608 strlcpy(req.ifr_name, dir->d_name, sizeof(req.ifr_name)); 609 ret = ioctl(s, SIOCGIFHWADDR, &req); 610 close(s); 611 if (ret == -1) { 612 PMD_DRV_LOG(ERR, 613 "Failed to send SIOCGIFHWADDR for device %s", 614 dir->d_name); 615 break; 616 } 617 if (req.ifr_hwaddr.sa_family != ARPHRD_ETHER) { 618 closedir(di); 619 goto free_hotadd_ctx; 620 } 621 memcpy(eth_addr.addr_bytes, req.ifr_hwaddr.sa_data, 622 RTE_DIM(eth_addr.addr_bytes)); 623 624 if (rte_is_same_ether_addr(ð_addr, dev->data->mac_addrs)) { 625 PMD_DRV_LOG(NOTICE, 626 "Found matching MAC address, adding device %s network name %s", 627 d->name, dir->d_name); 628 629 /* If this device has been hot removed from this 630 * parent device, restore its args. 631 */ 632 ret = rte_eal_hotplug_add(d->bus->name, d->name, 633 hv->vf_devargs ? 634 hv->vf_devargs : ""); 635 if (ret) { 636 PMD_DRV_LOG(ERR, 637 "Failed to add PCI device %s", 638 d->name); 639 break; 640 } 641 } 642 /* When the code reaches here, we either have already added 643 * the device, or its MAC address did not match. 644 */ 645 closedir(di); 646 goto free_hotadd_ctx; 647 } 648 closedir(di); 649 retry: 650 /* The device is still being initialized, retry after 1 second */ 651 rte_eal_alarm_set(1000000, netvsc_hotplug_retry, hot_ctx); 652 return; 653 654 free_hotadd_ctx: 655 rte_spinlock_lock(&hv->hotadd_lock); 656 LIST_REMOVE(hot_ctx, list); 657 rte_spinlock_unlock(&hv->hotadd_lock); 658 659 rte_free(hot_ctx); 660 } 661 662 static void 663 netvsc_hotadd_callback(const char *device_name, enum rte_dev_event_type type, 664 void *arg) 665 { 666 struct hn_data *hv = arg; 667 struct hv_hotadd_context *hot_ctx; 668 struct rte_devargs *d; 669 int ret; 670 671 PMD_DRV_LOG(INFO, "Device notification type=%d device_name=%s", 672 type, device_name); 673 674 switch (type) { 675 case RTE_DEV_EVENT_ADD: 676 /* if we already has a VF, don't check on hot add */ 677 if (hv->vf_ctx.vf_state > vf_removed) 678 break; 679 680 hot_ctx = rte_zmalloc("NETVSC-HOTADD", sizeof(*hot_ctx), 681 rte_mem_page_size()); 682 683 if (!hot_ctx) { 684 PMD_DRV_LOG(ERR, "Failed to allocate hotadd context"); 685 return; 686 } 687 688 hot_ctx->hv = hv; 689 d = &hot_ctx->da; 690 691 ret = rte_devargs_parse(d, device_name); 692 if (ret) { 693 PMD_DRV_LOG(ERR, 694 "devargs parsing failed ret=%d", ret); 695 goto free_ctx; 696 } 697 698 if (!strcmp(d->bus->name, "pci")) { 699 /* Start the process of figuring out if this 700 * PCI device is a VF device 701 */ 702 rte_spinlock_lock(&hv->hotadd_lock); 703 LIST_INSERT_HEAD(&hv->hotadd_list, hot_ctx, list); 704 rte_spinlock_unlock(&hv->hotadd_lock); 705 rte_eal_alarm_set(1000000, netvsc_hotplug_retry, hot_ctx); 706 return; 707 } 708 709 /* We will switch to VF on RDNIS configure message 710 * sent from VSP 711 */ 712 free_ctx: 713 rte_free(hot_ctx); 714 break; 715 716 default: 717 break; 718 } 719 } 720 721 static int hn_dev_configure(struct rte_eth_dev *dev) 722 { 723 struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 724 struct rte_eth_rss_conf *rss_conf = &dev_conf->rx_adv_conf.rss_conf; 725 const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode; 726 const struct rte_eth_txmode *txmode = &dev_conf->txmode; 727 struct hn_data *hv = dev->data->dev_private; 728 uint64_t unsupported; 729 int i, err, subchan; 730 731 PMD_INIT_FUNC_TRACE(); 732 733 if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 734 dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 735 736 unsupported = txmode->offloads & ~HN_TX_OFFLOAD_CAPS; 737 if (unsupported) { 738 PMD_DRV_LOG(NOTICE, 739 "unsupported TX offload: %#" PRIx64, 740 unsupported); 741 return -EINVAL; 742 } 743 744 unsupported = rxmode->offloads & ~HN_RX_OFFLOAD_CAPS; 745 if (unsupported) { 746 PMD_DRV_LOG(NOTICE, 747 "unsupported RX offload: %#" PRIx64, 748 rxmode->offloads); 749 return -EINVAL; 750 } 751 752 hv->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 753 754 err = hn_rndis_conf_offload(hv, txmode->offloads, 755 rxmode->offloads); 756 if (err) { 757 PMD_DRV_LOG(NOTICE, 758 "offload configure failed"); 759 return err; 760 } 761 762 hv->num_queues = RTE_MAX(dev->data->nb_rx_queues, 763 dev->data->nb_tx_queues); 764 765 for (i = 0; i < NDIS_HASH_INDCNT; i++) 766 hv->rss_ind[i] = i % dev->data->nb_rx_queues; 767 768 hn_rss_hash_init(hv, rss_conf); 769 770 subchan = hv->num_queues - 1; 771 if (subchan > 0) { 772 err = hn_subchan_configure(hv, subchan); 773 if (err) { 774 PMD_DRV_LOG(NOTICE, 775 "subchannel configuration failed"); 776 return err; 777 } 778 779 err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); 780 if (err) { 781 PMD_DRV_LOG(NOTICE, 782 "rss disable failed"); 783 return err; 784 } 785 786 if (rss_conf->rss_hf != 0) { 787 err = hn_rndis_conf_rss(hv, 0); 788 if (err) { 789 PMD_DRV_LOG(NOTICE, 790 "initial RSS config failed"); 791 return err; 792 } 793 } 794 } 795 796 return hn_vf_configure_locked(dev, dev_conf); 797 } 798 799 static int hn_dev_stats_get(struct rte_eth_dev *dev, 800 struct rte_eth_stats *stats) 801 { 802 unsigned int i; 803 804 hn_vf_stats_get(dev, stats); 805 806 for (i = 0; i < dev->data->nb_tx_queues; i++) { 807 const struct hn_tx_queue *txq = dev->data->tx_queues[i]; 808 809 if (!txq) 810 continue; 811 812 stats->opackets += txq->stats.packets; 813 stats->obytes += txq->stats.bytes; 814 stats->oerrors += txq->stats.errors; 815 816 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 817 stats->q_opackets[i] = txq->stats.packets; 818 stats->q_obytes[i] = txq->stats.bytes; 819 } 820 } 821 822 for (i = 0; i < dev->data->nb_rx_queues; i++) { 823 const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 824 825 if (!rxq) 826 continue; 827 828 stats->ipackets += rxq->stats.packets; 829 stats->ibytes += rxq->stats.bytes; 830 stats->ierrors += rxq->stats.errors; 831 stats->imissed += rxq->stats.ring_full; 832 833 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 834 stats->q_ipackets[i] = rxq->stats.packets; 835 stats->q_ibytes[i] = rxq->stats.bytes; 836 } 837 } 838 839 stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; 840 return 0; 841 } 842 843 static int 844 hn_dev_stats_reset(struct rte_eth_dev *dev) 845 { 846 unsigned int i; 847 848 PMD_INIT_FUNC_TRACE(); 849 850 for (i = 0; i < dev->data->nb_tx_queues; i++) { 851 struct hn_tx_queue *txq = dev->data->tx_queues[i]; 852 853 if (!txq) 854 continue; 855 memset(&txq->stats, 0, sizeof(struct hn_stats)); 856 } 857 858 for (i = 0; i < dev->data->nb_rx_queues; i++) { 859 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 860 861 if (!rxq) 862 continue; 863 864 memset(&rxq->stats, 0, sizeof(struct hn_stats)); 865 } 866 867 return 0; 868 } 869 870 static int 871 hn_dev_xstats_reset(struct rte_eth_dev *dev) 872 { 873 int ret; 874 875 ret = hn_dev_stats_reset(dev); 876 if (ret != 0) 877 return 0; 878 879 return hn_vf_xstats_reset(dev); 880 } 881 882 static int 883 hn_dev_xstats_count(struct rte_eth_dev *dev) 884 { 885 int ret, count; 886 887 count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings); 888 count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); 889 890 ret = hn_vf_xstats_get_names(dev, NULL, 0); 891 if (ret < 0) 892 return ret; 893 894 return count + ret; 895 } 896 897 static int 898 hn_dev_xstats_get_names(struct rte_eth_dev *dev, 899 struct rte_eth_xstat_name *xstats_names, 900 unsigned int limit) 901 { 902 unsigned int i, t, count = 0; 903 int ret; 904 905 if (!xstats_names) 906 return hn_dev_xstats_count(dev); 907 908 /* Note: limit checked in rte_eth_xstats_names() */ 909 for (i = 0; i < dev->data->nb_tx_queues; i++) { 910 const struct hn_tx_queue *txq = dev->data->tx_queues[i]; 911 912 if (!txq) 913 continue; 914 915 if (count >= limit) 916 break; 917 918 for (t = 0; t < RTE_DIM(hn_stat_strings); t++) 919 snprintf(xstats_names[count++].name, 920 RTE_ETH_XSTATS_NAME_SIZE, 921 "tx_q%u_%s", i, hn_stat_strings[t].name); 922 } 923 924 for (i = 0; i < dev->data->nb_rx_queues; i++) { 925 const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 926 927 if (!rxq) 928 continue; 929 930 if (count >= limit) 931 break; 932 933 for (t = 0; t < RTE_DIM(hn_stat_strings); t++) 934 snprintf(xstats_names[count++].name, 935 RTE_ETH_XSTATS_NAME_SIZE, 936 "rx_q%u_%s", i, 937 hn_stat_strings[t].name); 938 } 939 940 ret = hn_vf_xstats_get_names(dev, xstats_names + count, 941 limit - count); 942 if (ret < 0) 943 return ret; 944 945 return count + ret; 946 } 947 948 static int 949 hn_dev_xstats_get(struct rte_eth_dev *dev, 950 struct rte_eth_xstat *xstats, 951 unsigned int n) 952 { 953 unsigned int i, t, count = 0; 954 const unsigned int nstats = hn_dev_xstats_count(dev); 955 const char *stats; 956 int ret; 957 958 PMD_INIT_FUNC_TRACE(); 959 960 if (n < nstats) 961 return nstats; 962 963 for (i = 0; i < dev->data->nb_tx_queues; i++) { 964 const struct hn_tx_queue *txq = dev->data->tx_queues[i]; 965 966 if (!txq) 967 continue; 968 969 stats = (const char *)&txq->stats; 970 for (t = 0; t < RTE_DIM(hn_stat_strings); t++, count++) { 971 xstats[count].id = count; 972 xstats[count].value = *(const uint64_t *) 973 (stats + hn_stat_strings[t].offset); 974 } 975 } 976 977 for (i = 0; i < dev->data->nb_rx_queues; i++) { 978 const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 979 980 if (!rxq) 981 continue; 982 983 stats = (const char *)&rxq->stats; 984 for (t = 0; t < RTE_DIM(hn_stat_strings); t++, count++) { 985 xstats[count].id = count; 986 xstats[count].value = *(const uint64_t *) 987 (stats + hn_stat_strings[t].offset); 988 } 989 } 990 991 ret = hn_vf_xstats_get(dev, xstats, count, n); 992 if (ret < 0) 993 return ret; 994 995 return count + ret; 996 } 997 998 static int 999 hn_dev_start(struct rte_eth_dev *dev) 1000 { 1001 struct hn_data *hv = dev->data->dev_private; 1002 int i, error; 1003 1004 PMD_INIT_FUNC_TRACE(); 1005 1006 /* Register to monitor hot plug events */ 1007 error = rte_dev_event_callback_register(NULL, netvsc_hotadd_callback, 1008 hv); 1009 if (error) { 1010 PMD_DRV_LOG(ERR, "failed to register device event callback"); 1011 return error; 1012 } 1013 1014 error = hn_rndis_set_rxfilter(hv, 1015 NDIS_PACKET_TYPE_BROADCAST | 1016 NDIS_PACKET_TYPE_ALL_MULTICAST | 1017 NDIS_PACKET_TYPE_DIRECTED); 1018 if (error) 1019 return error; 1020 1021 error = hn_vf_start(dev); 1022 if (error) 1023 hn_rndis_set_rxfilter(hv, 0); 1024 1025 /* Initialize Link state */ 1026 if (error == 0) 1027 hn_dev_link_update(dev, 0); 1028 1029 for (i = 0; i < hv->num_queues; i++) { 1030 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1031 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1032 } 1033 1034 return error; 1035 } 1036 1037 static int 1038 hn_dev_stop(struct rte_eth_dev *dev) 1039 { 1040 struct hn_data *hv = dev->data->dev_private; 1041 int i, ret; 1042 1043 PMD_INIT_FUNC_TRACE(); 1044 dev->data->dev_started = 0; 1045 1046 rte_dev_event_callback_unregister(NULL, netvsc_hotadd_callback, hv); 1047 hn_rndis_set_rxfilter(hv, 0); 1048 ret = hn_vf_stop(dev); 1049 1050 for (i = 0; i < hv->num_queues; i++) { 1051 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1052 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1053 } 1054 1055 return ret; 1056 } 1057 1058 static int 1059 hn_dev_close(struct rte_eth_dev *dev) 1060 { 1061 int ret; 1062 struct hn_data *hv = dev->data->dev_private; 1063 struct hv_hotadd_context *hot_ctx; 1064 1065 PMD_INIT_FUNC_TRACE(); 1066 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1067 return 0; 1068 1069 rte_spinlock_lock(&hv->hotadd_lock); 1070 while (!LIST_EMPTY(&hv->hotadd_list)) { 1071 hot_ctx = LIST_FIRST(&hv->hotadd_list); 1072 rte_eal_alarm_cancel(netvsc_hotplug_retry, hot_ctx); 1073 LIST_REMOVE(hot_ctx, list); 1074 rte_free(hot_ctx); 1075 } 1076 rte_spinlock_unlock(&hv->hotadd_lock); 1077 1078 ret = hn_vf_close(dev); 1079 hn_dev_free_queues(dev); 1080 1081 return ret; 1082 } 1083 1084 /* 1085 * Setup connection between PMD and kernel. 1086 */ 1087 static int 1088 hn_attach(struct hn_data *hv, unsigned int mtu) 1089 { 1090 int error; 1091 1092 /* Attach NVS */ 1093 error = hn_nvs_attach(hv, mtu); 1094 if (error) 1095 goto failed_nvs; 1096 1097 /* Attach RNDIS */ 1098 error = hn_rndis_attach(hv); 1099 if (error) 1100 goto failed_rndis; 1101 1102 /* 1103 * NOTE: 1104 * Under certain conditions on certain versions of Hyper-V, 1105 * the RNDIS rxfilter is _not_ zero on the hypervisor side 1106 * after the successful RNDIS initialization. 1107 */ 1108 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_NONE); 1109 return 0; 1110 failed_rndis: 1111 hn_nvs_detach(hv); 1112 failed_nvs: 1113 return error; 1114 } 1115 1116 static void 1117 hn_detach(struct hn_data *hv) 1118 { 1119 hn_nvs_detach(hv); 1120 hn_rndis_detach(hv); 1121 } 1122 1123 /* 1124 * Connects EXISTING rx/tx queues to NEW vmbus channel(s), and 1125 * re-initializes NDIS and RNDIS, including re-sending initial 1126 * NDIS/RNDIS configuration. To be used after the underlying vmbus 1127 * has been un- and re-mapped, e.g. as must happen when the device 1128 * MTU is changed. 1129 */ 1130 static int 1131 hn_reinit(struct rte_eth_dev *dev, uint16_t mtu) 1132 { 1133 struct hn_data *hv = dev->data->dev_private; 1134 struct hn_rx_queue **rxqs = (struct hn_rx_queue **)dev->data->rx_queues; 1135 struct hn_tx_queue **txqs = (struct hn_tx_queue **)dev->data->tx_queues; 1136 int i, ret = 0; 1137 1138 /* Point primary queues at new primary channel */ 1139 if (rxqs[0]) { 1140 rxqs[0]->chan = hv->channels[0]; 1141 txqs[0]->chan = hv->channels[0]; 1142 } 1143 1144 ret = hn_attach(hv, mtu); 1145 if (ret) 1146 return ret; 1147 1148 /* Create vmbus subchannels, additional RNDIS configuration */ 1149 ret = hn_dev_configure(dev); 1150 if (ret) 1151 return ret; 1152 1153 /* Point any additional queues at new subchannels */ 1154 if (rxqs[0]) { 1155 for (i = 1; i < dev->data->nb_rx_queues; i++) 1156 rxqs[i]->chan = hv->channels[i]; 1157 for (i = 1; i < dev->data->nb_tx_queues; i++) 1158 txqs[i]->chan = hv->channels[i]; 1159 } 1160 1161 return ret; 1162 } 1163 1164 static int 1165 hn_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1166 { 1167 struct hn_data *hv = dev->data->dev_private; 1168 unsigned int orig_mtu = dev->data->mtu; 1169 uint32_t rndis_mtu; 1170 int ret = 0; 1171 int i; 1172 1173 if (dev->data->dev_started) { 1174 PMD_DRV_LOG(ERR, "Device must be stopped before changing MTU"); 1175 return -EBUSY; 1176 } 1177 1178 /* Change MTU of underlying VF dev first, if it exists */ 1179 ret = hn_vf_mtu_set(dev, mtu); 1180 if (ret) 1181 return ret; 1182 1183 /* Release channel resources */ 1184 hn_detach(hv); 1185 1186 /* Close any secondary vmbus channels */ 1187 for (i = 1; i < hv->num_queues; i++) 1188 rte_vmbus_chan_close(hv->channels[i]); 1189 1190 /* Close primary vmbus channel */ 1191 rte_free(hv->channels[0]); 1192 1193 /* Unmap and re-map vmbus device */ 1194 rte_vmbus_unmap_device(hv->vmbus); 1195 ret = rte_vmbus_map_device(hv->vmbus); 1196 if (ret) { 1197 /* This is a catastrophic error - the device is unusable */ 1198 PMD_DRV_LOG(ERR, "Could not re-map vmbus device!"); 1199 return ret; 1200 } 1201 1202 /* Update pointers to re-mapped UIO resources */ 1203 hv->rxbuf_res = hv->vmbus->resource[HV_RECV_BUF_MAP]; 1204 hv->chim_res = hv->vmbus->resource[HV_SEND_BUF_MAP]; 1205 1206 /* Re-open the primary vmbus channel */ 1207 ret = rte_vmbus_chan_open(hv->vmbus, &hv->channels[0]); 1208 if (ret) { 1209 /* This is a catastrophic error - the device is unusable */ 1210 PMD_DRV_LOG(ERR, "Could not re-open vmbus channel!"); 1211 return ret; 1212 } 1213 1214 rte_vmbus_set_latency(hv->vmbus, hv->channels[0], hv->latency); 1215 1216 ret = hn_reinit(dev, mtu); 1217 if (!ret) 1218 goto out; 1219 1220 /* In case of error, attempt to restore original MTU */ 1221 ret = hn_reinit(dev, orig_mtu); 1222 if (ret) 1223 PMD_DRV_LOG(ERR, "Restoring original MTU failed for netvsc"); 1224 1225 ret = hn_vf_mtu_set(dev, orig_mtu); 1226 if (ret) 1227 PMD_DRV_LOG(ERR, "Restoring original MTU failed for VF"); 1228 1229 out: 1230 if (hn_rndis_get_mtu(hv, &rndis_mtu)) { 1231 PMD_DRV_LOG(ERR, "Could not get MTU via RNDIS"); 1232 } else { 1233 dev->data->mtu = (uint16_t)rndis_mtu; 1234 PMD_DRV_LOG(DEBUG, "RNDIS MTU is %u", dev->data->mtu); 1235 } 1236 1237 return ret; 1238 } 1239 1240 static const struct eth_dev_ops hn_eth_dev_ops = { 1241 .dev_configure = hn_dev_configure, 1242 .dev_start = hn_dev_start, 1243 .dev_stop = hn_dev_stop, 1244 .dev_close = hn_dev_close, 1245 .dev_infos_get = hn_dev_info_get, 1246 .txq_info_get = hn_dev_tx_queue_info, 1247 .rxq_info_get = hn_dev_rx_queue_info, 1248 .dev_supported_ptypes_get = hn_vf_supported_ptypes, 1249 .promiscuous_enable = hn_dev_promiscuous_enable, 1250 .promiscuous_disable = hn_dev_promiscuous_disable, 1251 .allmulticast_enable = hn_dev_allmulticast_enable, 1252 .allmulticast_disable = hn_dev_allmulticast_disable, 1253 .set_mc_addr_list = hn_dev_mc_addr_list, 1254 .mtu_set = hn_dev_mtu_set, 1255 .reta_update = hn_rss_reta_update, 1256 .reta_query = hn_rss_reta_query, 1257 .rss_hash_update = hn_rss_hash_update, 1258 .rss_hash_conf_get = hn_rss_hash_conf_get, 1259 .tx_queue_setup = hn_dev_tx_queue_setup, 1260 .tx_queue_release = hn_dev_tx_queue_release, 1261 .tx_done_cleanup = hn_dev_tx_done_cleanup, 1262 .rx_queue_setup = hn_dev_rx_queue_setup, 1263 .rx_queue_release = hn_dev_rx_queue_release, 1264 .link_update = hn_dev_link_update, 1265 .stats_get = hn_dev_stats_get, 1266 .stats_reset = hn_dev_stats_reset, 1267 .xstats_get = hn_dev_xstats_get, 1268 .xstats_get_names = hn_dev_xstats_get_names, 1269 .xstats_reset = hn_dev_xstats_reset, 1270 }; 1271 1272 static int 1273 eth_hn_dev_init(struct rte_eth_dev *eth_dev) 1274 { 1275 struct hn_data *hv = eth_dev->data->dev_private; 1276 struct rte_device *device = eth_dev->device; 1277 struct rte_vmbus_device *vmbus; 1278 uint32_t mtu; 1279 unsigned int rxr_cnt; 1280 int err, max_chan; 1281 1282 PMD_INIT_FUNC_TRACE(); 1283 1284 rte_spinlock_init(&hv->hotadd_lock); 1285 LIST_INIT(&hv->hotadd_list); 1286 1287 vmbus = container_of(device, struct rte_vmbus_device, device); 1288 eth_dev->dev_ops = &hn_eth_dev_ops; 1289 eth_dev->rx_queue_count = hn_dev_rx_queue_count; 1290 eth_dev->rx_descriptor_status = hn_dev_rx_queue_status; 1291 eth_dev->tx_descriptor_status = hn_dev_tx_descriptor_status; 1292 eth_dev->tx_pkt_burst = &hn_xmit_pkts; 1293 eth_dev->rx_pkt_burst = &hn_recv_pkts; 1294 1295 /* 1296 * for secondary processes, we don't initialize any further as primary 1297 * has already done this work. 1298 */ 1299 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1300 return 0; 1301 1302 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1303 1304 /* Since Hyper-V only supports one MAC address */ 1305 eth_dev->data->mac_addrs = rte_calloc("hv_mac", HN_MAX_MAC_ADDRS, 1306 sizeof(struct rte_ether_addr), 0); 1307 if (eth_dev->data->mac_addrs == NULL) { 1308 PMD_INIT_LOG(ERR, 1309 "Failed to allocate memory store MAC addresses"); 1310 return -ENOMEM; 1311 } 1312 1313 hv->vmbus = vmbus; 1314 hv->rxbuf_res = vmbus->resource[HV_RECV_BUF_MAP]; 1315 hv->chim_res = vmbus->resource[HV_SEND_BUF_MAP]; 1316 hv->port_id = eth_dev->data->port_id; 1317 hv->latency = HN_CHAN_LATENCY_NS; 1318 hv->rx_copybreak = HN_RXCOPY_THRESHOLD; 1319 hv->tx_copybreak = HN_TXCOPY_THRESHOLD; 1320 hv->rx_extmbuf_enable = HN_RX_EXTMBUF_ENABLE; 1321 hv->max_queues = 1; 1322 1323 rte_rwlock_init(&hv->vf_lock); 1324 hv->vf_ctx.vf_vsc_switched = false; 1325 hv->vf_ctx.vf_vsp_reported = false; 1326 hv->vf_ctx.vf_attached = false; 1327 hv->vf_ctx.vf_state = vf_unknown; 1328 1329 err = hn_parse_args(eth_dev); 1330 if (err) 1331 return err; 1332 1333 strlcpy(hv->owner.name, eth_dev->device->name, 1334 RTE_ETH_MAX_OWNER_NAME_LEN); 1335 err = rte_eth_dev_owner_new(&hv->owner.id); 1336 if (err) { 1337 PMD_INIT_LOG(ERR, "Can not get owner id"); 1338 return err; 1339 } 1340 1341 /* Initialize primary channel input for control operations */ 1342 err = rte_vmbus_chan_open(vmbus, &hv->channels[0]); 1343 if (err) 1344 return err; 1345 1346 rte_vmbus_set_latency(hv->vmbus, hv->channels[0], hv->latency); 1347 1348 hv->primary = hn_rx_queue_alloc(hv, 0, 1349 eth_dev->device->numa_node); 1350 1351 if (!hv->primary) 1352 return -ENOMEM; 1353 1354 err = hn_attach(hv, RTE_ETHER_MTU); 1355 if (err) 1356 goto failed; 1357 1358 err = hn_chim_init(eth_dev); 1359 if (err) 1360 goto failed; 1361 1362 err = hn_rndis_get_mtu(hv, &mtu); 1363 if (err) 1364 goto failed; 1365 eth_dev->data->mtu = (uint16_t)mtu; 1366 PMD_INIT_LOG(DEBUG, "RNDIS MTU is %u", eth_dev->data->mtu); 1367 1368 err = hn_rndis_get_eaddr(hv, eth_dev->data->mac_addrs->addr_bytes); 1369 if (err) 1370 goto failed; 1371 1372 /* Multi queue requires later versions of windows server */ 1373 if (hv->nvs_ver < NVS_VERSION_5) 1374 return 0; 1375 1376 max_chan = rte_vmbus_max_channels(vmbus); 1377 PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan); 1378 if (max_chan <= 0) 1379 goto failed; 1380 1381 if (hn_rndis_query_rsscaps(hv, &rxr_cnt) != 0) 1382 rxr_cnt = 1; 1383 1384 hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan); 1385 1386 /* If VF was reported but not added, do it now */ 1387 if (hv->vf_ctx.vf_vsp_reported && !hv->vf_ctx.vf_vsc_switched) { 1388 PMD_INIT_LOG(DEBUG, "Adding VF device"); 1389 1390 err = hn_vf_add(eth_dev, hv); 1391 } 1392 1393 return 0; 1394 1395 failed: 1396 PMD_INIT_LOG(NOTICE, "device init failed"); 1397 1398 hn_chim_uninit(eth_dev); 1399 hn_detach(hv); 1400 return err; 1401 } 1402 1403 static int 1404 eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) 1405 { 1406 struct hn_data *hv = eth_dev->data->dev_private; 1407 int ret, ret_stop; 1408 1409 PMD_INIT_FUNC_TRACE(); 1410 1411 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1412 return 0; 1413 1414 ret_stop = hn_dev_stop(eth_dev); 1415 hn_dev_close(eth_dev); 1416 1417 free(hv->vf_devargs); 1418 hv->vf_devargs = NULL; 1419 1420 hn_detach(hv); 1421 hn_chim_uninit(eth_dev); 1422 rte_vmbus_chan_close(hv->channels[0]); 1423 rte_free(hv->primary); 1424 ret = rte_eth_dev_owner_delete(hv->owner.id); 1425 if (ret != 0) 1426 return ret; 1427 1428 return ret_stop; 1429 } 1430 1431 static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused, 1432 struct rte_vmbus_device *dev) 1433 { 1434 struct rte_eth_dev *eth_dev; 1435 int ret; 1436 1437 PMD_INIT_FUNC_TRACE(); 1438 1439 ret = rte_dev_event_monitor_start(); 1440 if (ret) { 1441 PMD_DRV_LOG(ERR, "Failed to start device event monitoring"); 1442 return ret; 1443 } 1444 1445 eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data)); 1446 if (!eth_dev) 1447 return -ENOMEM; 1448 1449 ret = eth_hn_dev_init(eth_dev); 1450 if (ret) { 1451 eth_dev_vmbus_release(eth_dev); 1452 rte_dev_event_monitor_stop(); 1453 } else { 1454 rte_eth_dev_probing_finish(eth_dev); 1455 } 1456 1457 return ret; 1458 } 1459 1460 static int eth_hn_remove(struct rte_vmbus_device *dev) 1461 { 1462 struct rte_eth_dev *eth_dev; 1463 int ret; 1464 1465 PMD_INIT_FUNC_TRACE(); 1466 1467 eth_dev = rte_eth_dev_allocated(dev->device.name); 1468 if (!eth_dev) 1469 return 0; /* port already released */ 1470 1471 ret = eth_hn_dev_uninit(eth_dev); 1472 if (ret) 1473 return ret; 1474 1475 eth_dev_vmbus_release(eth_dev); 1476 rte_dev_event_monitor_stop(); 1477 return 0; 1478 } 1479 1480 /* Network device GUID */ 1481 static const rte_uuid_t hn_net_ids[] = { 1482 /* f8615163-df3e-46c5-913f-f2d2f965ed0e */ 1483 RTE_UUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2d2f965ed0eULL), 1484 { 0 } 1485 }; 1486 1487 static struct rte_vmbus_driver rte_netvsc_pmd = { 1488 .id_table = hn_net_ids, 1489 .probe = eth_hn_probe, 1490 .remove = eth_hn_remove, 1491 }; 1492 1493 RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd); 1494 RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic"); 1495 RTE_LOG_REGISTER_SUFFIX(hn_logtype_init, init, NOTICE); 1496 RTE_LOG_REGISTER_SUFFIX(hn_logtype_driver, driver, NOTICE); 1497 RTE_PMD_REGISTER_PARAM_STRING(net_netvsc, 1498 NETVSC_ARG_LATENCY "=<uint32> " 1499 NETVSC_ARG_RXBREAK "=<uint32> " 1500 NETVSC_ARG_TXBREAK "=<uint32> " 1501 NETVSC_ARG_RX_EXTMBUF_ENABLE "=<0|1>"); 1502