1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <dirent.h> 13 #include <net/if.h> 14 #include <net/if_arp.h> 15 #include <sys/ioctl.h> 16 17 #include <rte_ethdev.h> 18 #include <rte_memcpy.h> 19 #include <rte_string_fns.h> 20 #include <rte_memzone.h> 21 #include <rte_devargs.h> 22 #include <rte_malloc.h> 23 #include <rte_kvargs.h> 24 #include <rte_atomic.h> 25 #include <rte_branch_prediction.h> 26 #include <rte_ether.h> 27 #include <ethdev_driver.h> 28 #include <rte_cycles.h> 29 #include <rte_errno.h> 30 #include <rte_memory.h> 31 #include <rte_eal.h> 32 #include <rte_dev.h> 33 #include <rte_bus_vmbus.h> 34 #include <rte_alarm.h> 35 36 #include "hn_logs.h" 37 #include "hn_var.h" 38 #include "hn_rndis.h" 39 #include "hn_nvs.h" 40 #include "ndis.h" 41 42 #define HN_TX_OFFLOAD_CAPS (DEV_TX_OFFLOAD_IPV4_CKSUM | \ 43 DEV_TX_OFFLOAD_TCP_CKSUM | \ 44 DEV_TX_OFFLOAD_UDP_CKSUM | \ 45 DEV_TX_OFFLOAD_TCP_TSO | \ 46 DEV_TX_OFFLOAD_MULTI_SEGS | \ 47 DEV_TX_OFFLOAD_VLAN_INSERT) 48 49 #define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \ 50 DEV_RX_OFFLOAD_VLAN_STRIP | \ 51 DEV_RX_OFFLOAD_RSS_HASH) 52 53 #define NETVSC_ARG_LATENCY "latency" 54 #define NETVSC_ARG_RXBREAK "rx_copybreak" 55 #define NETVSC_ARG_TXBREAK "tx_copybreak" 56 #define NETVSC_ARG_RX_EXTMBUF_ENABLE "rx_extmbuf_enable" 57 58 /* The max number of retry when hot adding a VF device */ 59 #define NETVSC_MAX_HOTADD_RETRY 10 60 61 struct hn_xstats_name_off { 62 char name[RTE_ETH_XSTATS_NAME_SIZE]; 63 unsigned int offset; 64 }; 65 66 static const struct hn_xstats_name_off hn_stat_strings[] = { 67 { "good_packets", offsetof(struct hn_stats, packets) }, 68 { "good_bytes", offsetof(struct hn_stats, bytes) }, 69 { "errors", offsetof(struct hn_stats, errors) }, 70 { "ring full", offsetof(struct hn_stats, ring_full) }, 71 { "channel full", offsetof(struct hn_stats, channel_full) }, 72 { "multicast_packets", offsetof(struct hn_stats, multicast) }, 73 { "broadcast_packets", offsetof(struct hn_stats, broadcast) }, 74 { "undersize_packets", offsetof(struct hn_stats, size_bins[0]) }, 75 { "size_64_packets", offsetof(struct hn_stats, size_bins[1]) }, 76 { "size_65_127_packets", offsetof(struct hn_stats, size_bins[2]) }, 77 { "size_128_255_packets", offsetof(struct hn_stats, size_bins[3]) }, 78 { "size_256_511_packets", offsetof(struct hn_stats, size_bins[4]) }, 79 { "size_512_1023_packets", offsetof(struct hn_stats, size_bins[5]) }, 80 { "size_1024_1518_packets", offsetof(struct hn_stats, size_bins[6]) }, 81 { "size_1519_max_packets", offsetof(struct hn_stats, size_bins[7]) }, 82 }; 83 84 /* The default RSS key. 85 * This value is the same as MLX5 so that flows will be 86 * received on same path for both VF and synthetic NIC. 87 */ 88 static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 89 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 90 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 91 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 92 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 93 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a, 94 }; 95 96 static struct rte_eth_dev * 97 eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size) 98 { 99 struct rte_eth_dev *eth_dev; 100 const char *name; 101 102 if (!dev) 103 return NULL; 104 105 name = dev->device.name; 106 107 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 108 eth_dev = rte_eth_dev_allocate(name); 109 if (!eth_dev) { 110 PMD_DRV_LOG(NOTICE, "can not allocate rte ethdev"); 111 return NULL; 112 } 113 114 if (private_data_size) { 115 eth_dev->data->dev_private = 116 rte_zmalloc_socket(name, private_data_size, 117 RTE_CACHE_LINE_SIZE, dev->device.numa_node); 118 if (!eth_dev->data->dev_private) { 119 PMD_DRV_LOG(NOTICE, "can not allocate driver data"); 120 rte_eth_dev_release_port(eth_dev); 121 return NULL; 122 } 123 } 124 } else { 125 eth_dev = rte_eth_dev_attach_secondary(name); 126 if (!eth_dev) { 127 PMD_DRV_LOG(NOTICE, "can not attach secondary"); 128 return NULL; 129 } 130 } 131 132 eth_dev->device = &dev->device; 133 134 /* interrupt is simulated */ 135 dev->intr_handle.type = RTE_INTR_HANDLE_EXT; 136 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; 137 eth_dev->intr_handle = &dev->intr_handle; 138 139 return eth_dev; 140 } 141 142 static void 143 eth_dev_vmbus_release(struct rte_eth_dev *eth_dev) 144 { 145 /* free ether device */ 146 rte_eth_dev_release_port(eth_dev); 147 148 eth_dev->device = NULL; 149 eth_dev->intr_handle = NULL; 150 } 151 152 static int hn_set_parameter(const char *key, const char *value, void *opaque) 153 { 154 struct hn_data *hv = opaque; 155 char *endp = NULL; 156 unsigned long v; 157 158 v = strtoul(value, &endp, 0); 159 if (*value == '\0' || *endp != '\0') { 160 PMD_DRV_LOG(ERR, "invalid parameter %s=%s", key, value); 161 return -EINVAL; 162 } 163 164 if (!strcmp(key, NETVSC_ARG_LATENCY)) { 165 /* usec to nsec */ 166 hv->latency = v * 1000; 167 PMD_DRV_LOG(DEBUG, "set latency %u usec", hv->latency); 168 } else if (!strcmp(key, NETVSC_ARG_RXBREAK)) { 169 hv->rx_copybreak = v; 170 PMD_DRV_LOG(DEBUG, "rx copy break set to %u", 171 hv->rx_copybreak); 172 } else if (!strcmp(key, NETVSC_ARG_TXBREAK)) { 173 hv->tx_copybreak = v; 174 PMD_DRV_LOG(DEBUG, "tx copy break set to %u", 175 hv->tx_copybreak); 176 } else if (!strcmp(key, NETVSC_ARG_RX_EXTMBUF_ENABLE)) { 177 hv->rx_extmbuf_enable = v; 178 PMD_DRV_LOG(DEBUG, "rx extmbuf enable set to %u", 179 hv->rx_extmbuf_enable); 180 } 181 182 return 0; 183 } 184 185 /* Parse device arguments */ 186 static int hn_parse_args(const struct rte_eth_dev *dev) 187 { 188 struct hn_data *hv = dev->data->dev_private; 189 struct rte_devargs *devargs = dev->device->devargs; 190 static const char * const valid_keys[] = { 191 NETVSC_ARG_LATENCY, 192 NETVSC_ARG_RXBREAK, 193 NETVSC_ARG_TXBREAK, 194 NETVSC_ARG_RX_EXTMBUF_ENABLE, 195 NULL 196 }; 197 struct rte_kvargs *kvlist; 198 int ret; 199 200 if (!devargs) 201 return 0; 202 203 PMD_INIT_LOG(DEBUG, "device args %s %s", 204 devargs->name, devargs->args); 205 206 kvlist = rte_kvargs_parse(devargs->args, valid_keys); 207 if (!kvlist) { 208 PMD_DRV_LOG(ERR, "invalid parameters"); 209 return -EINVAL; 210 } 211 212 ret = rte_kvargs_process(kvlist, NULL, hn_set_parameter, hv); 213 rte_kvargs_free(kvlist); 214 215 return ret; 216 } 217 218 /* Update link status. 219 * Note: the DPDK definition of "wait_to_complete" 220 * means block this call until link is up. 221 * which is not worth supporting. 222 */ 223 int 224 hn_dev_link_update(struct rte_eth_dev *dev, 225 int wait_to_complete __rte_unused) 226 { 227 struct hn_data *hv = dev->data->dev_private; 228 struct rte_eth_link link, old; 229 int error; 230 231 old = dev->data->dev_link; 232 233 error = hn_rndis_get_linkstatus(hv); 234 if (error) 235 return error; 236 237 hn_rndis_get_linkspeed(hv); 238 239 link = (struct rte_eth_link) { 240 .link_duplex = ETH_LINK_FULL_DUPLEX, 241 .link_autoneg = ETH_LINK_SPEED_FIXED, 242 .link_speed = hv->link_speed / 10000, 243 }; 244 245 if (hv->link_status == NDIS_MEDIA_STATE_CONNECTED) 246 link.link_status = ETH_LINK_UP; 247 else 248 link.link_status = ETH_LINK_DOWN; 249 250 if (old.link_status == link.link_status) 251 return 0; 252 253 PMD_INIT_LOG(DEBUG, "Port %d is %s", dev->data->port_id, 254 (link.link_status == ETH_LINK_UP) ? "up" : "down"); 255 256 return rte_eth_linkstatus_set(dev, &link); 257 } 258 259 static int hn_dev_info_get(struct rte_eth_dev *dev, 260 struct rte_eth_dev_info *dev_info) 261 { 262 struct hn_data *hv = dev->data->dev_private; 263 int rc; 264 265 dev_info->speed_capa = ETH_LINK_SPEED_10G; 266 dev_info->min_rx_bufsize = HN_MIN_RX_BUF_SIZE; 267 dev_info->max_rx_pktlen = HN_MAX_XFER_LEN; 268 dev_info->max_mac_addrs = 1; 269 270 dev_info->hash_key_size = NDIS_HASH_KEYSIZE_TOEPLITZ; 271 dev_info->flow_type_rss_offloads = hv->rss_offloads; 272 dev_info->reta_size = ETH_RSS_RETA_SIZE_128; 273 274 dev_info->max_rx_queues = hv->max_queues; 275 dev_info->max_tx_queues = hv->max_queues; 276 277 dev_info->tx_desc_lim.nb_min = 1; 278 dev_info->tx_desc_lim.nb_max = 4096; 279 280 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 281 return 0; 282 283 /* fills in rx and tx offload capability */ 284 rc = hn_rndis_get_offload(hv, dev_info); 285 if (rc != 0) 286 return rc; 287 288 /* merges the offload and queues of vf */ 289 return hn_vf_info_get(hv, dev_info); 290 } 291 292 static int hn_rss_reta_update(struct rte_eth_dev *dev, 293 struct rte_eth_rss_reta_entry64 *reta_conf, 294 uint16_t reta_size) 295 { 296 struct hn_data *hv = dev->data->dev_private; 297 unsigned int i; 298 int err; 299 300 PMD_INIT_FUNC_TRACE(); 301 302 if (reta_size != NDIS_HASH_INDCNT) { 303 PMD_DRV_LOG(ERR, "Hash lookup table size does not match NDIS"); 304 return -EINVAL; 305 } 306 307 for (i = 0; i < NDIS_HASH_INDCNT; i++) { 308 uint16_t idx = i / RTE_RETA_GROUP_SIZE; 309 uint16_t shift = i % RTE_RETA_GROUP_SIZE; 310 uint64_t mask = (uint64_t)1 << shift; 311 312 if (reta_conf[idx].mask & mask) 313 hv->rss_ind[i] = reta_conf[idx].reta[shift]; 314 } 315 316 err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); 317 if (err) { 318 PMD_DRV_LOG(NOTICE, 319 "rss disable failed"); 320 return err; 321 } 322 323 err = hn_rndis_conf_rss(hv, 0); 324 if (err) { 325 PMD_DRV_LOG(NOTICE, 326 "reta reconfig failed"); 327 return err; 328 } 329 330 return hn_vf_reta_hash_update(dev, reta_conf, reta_size); 331 } 332 333 static int hn_rss_reta_query(struct rte_eth_dev *dev, 334 struct rte_eth_rss_reta_entry64 *reta_conf, 335 uint16_t reta_size) 336 { 337 struct hn_data *hv = dev->data->dev_private; 338 unsigned int i; 339 340 PMD_INIT_FUNC_TRACE(); 341 342 if (reta_size != NDIS_HASH_INDCNT) { 343 PMD_DRV_LOG(ERR, "Hash lookup table size does not match NDIS"); 344 return -EINVAL; 345 } 346 347 for (i = 0; i < NDIS_HASH_INDCNT; i++) { 348 uint16_t idx = i / RTE_RETA_GROUP_SIZE; 349 uint16_t shift = i % RTE_RETA_GROUP_SIZE; 350 uint64_t mask = (uint64_t)1 << shift; 351 352 if (reta_conf[idx].mask & mask) 353 reta_conf[idx].reta[shift] = hv->rss_ind[i]; 354 } 355 return 0; 356 } 357 358 static void hn_rss_hash_init(struct hn_data *hv, 359 const struct rte_eth_rss_conf *rss_conf) 360 { 361 /* Convert from DPDK RSS hash flags to NDIS hash flags */ 362 hv->rss_hash = NDIS_HASH_FUNCTION_TOEPLITZ; 363 364 if (rss_conf->rss_hf & ETH_RSS_IPV4) 365 hv->rss_hash |= NDIS_HASH_IPV4; 366 if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) 367 hv->rss_hash |= NDIS_HASH_TCP_IPV4; 368 if (rss_conf->rss_hf & ETH_RSS_IPV6) 369 hv->rss_hash |= NDIS_HASH_IPV6; 370 if (rss_conf->rss_hf & ETH_RSS_IPV6_EX) 371 hv->rss_hash |= NDIS_HASH_IPV6_EX; 372 if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) 373 hv->rss_hash |= NDIS_HASH_TCP_IPV6; 374 if (rss_conf->rss_hf & ETH_RSS_IPV6_TCP_EX) 375 hv->rss_hash |= NDIS_HASH_TCP_IPV6_EX; 376 377 memcpy(hv->rss_key, rss_conf->rss_key ? : rss_default_key, 378 NDIS_HASH_KEYSIZE_TOEPLITZ); 379 } 380 381 static int hn_rss_hash_update(struct rte_eth_dev *dev, 382 struct rte_eth_rss_conf *rss_conf) 383 { 384 struct hn_data *hv = dev->data->dev_private; 385 int err; 386 387 PMD_INIT_FUNC_TRACE(); 388 389 err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); 390 if (err) { 391 PMD_DRV_LOG(NOTICE, 392 "rss disable failed"); 393 return err; 394 } 395 396 hn_rss_hash_init(hv, rss_conf); 397 398 if (rss_conf->rss_hf != 0) { 399 err = hn_rndis_conf_rss(hv, 0); 400 if (err) { 401 PMD_DRV_LOG(NOTICE, 402 "rss reconfig failed (RSS disabled)"); 403 return err; 404 } 405 } 406 407 return hn_vf_rss_hash_update(dev, rss_conf); 408 } 409 410 static int hn_rss_hash_conf_get(struct rte_eth_dev *dev, 411 struct rte_eth_rss_conf *rss_conf) 412 { 413 struct hn_data *hv = dev->data->dev_private; 414 415 PMD_INIT_FUNC_TRACE(); 416 417 if (hv->ndis_ver < NDIS_VERSION_6_20) { 418 PMD_DRV_LOG(DEBUG, "RSS not supported on this host"); 419 return -EOPNOTSUPP; 420 } 421 422 rss_conf->rss_key_len = NDIS_HASH_KEYSIZE_TOEPLITZ; 423 if (rss_conf->rss_key) 424 memcpy(rss_conf->rss_key, hv->rss_key, 425 NDIS_HASH_KEYSIZE_TOEPLITZ); 426 427 rss_conf->rss_hf = 0; 428 if (hv->rss_hash & NDIS_HASH_IPV4) 429 rss_conf->rss_hf |= ETH_RSS_IPV4; 430 431 if (hv->rss_hash & NDIS_HASH_TCP_IPV4) 432 rss_conf->rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP; 433 434 if (hv->rss_hash & NDIS_HASH_IPV6) 435 rss_conf->rss_hf |= ETH_RSS_IPV6; 436 437 if (hv->rss_hash & NDIS_HASH_IPV6_EX) 438 rss_conf->rss_hf |= ETH_RSS_IPV6_EX; 439 440 if (hv->rss_hash & NDIS_HASH_TCP_IPV6) 441 rss_conf->rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP; 442 443 if (hv->rss_hash & NDIS_HASH_TCP_IPV6_EX) 444 rss_conf->rss_hf |= ETH_RSS_IPV6_TCP_EX; 445 446 return 0; 447 } 448 449 static int 450 hn_dev_promiscuous_enable(struct rte_eth_dev *dev) 451 { 452 struct hn_data *hv = dev->data->dev_private; 453 454 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS); 455 return hn_vf_promiscuous_enable(dev); 456 } 457 458 static int 459 hn_dev_promiscuous_disable(struct rte_eth_dev *dev) 460 { 461 struct hn_data *hv = dev->data->dev_private; 462 uint32_t filter; 463 464 filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; 465 if (dev->data->all_multicast) 466 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 467 hn_rndis_set_rxfilter(hv, filter); 468 return hn_vf_promiscuous_disable(dev); 469 } 470 471 static int 472 hn_dev_allmulticast_enable(struct rte_eth_dev *dev) 473 { 474 struct hn_data *hv = dev->data->dev_private; 475 476 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | 477 NDIS_PACKET_TYPE_ALL_MULTICAST | 478 NDIS_PACKET_TYPE_BROADCAST); 479 return hn_vf_allmulticast_enable(dev); 480 } 481 482 static int 483 hn_dev_allmulticast_disable(struct rte_eth_dev *dev) 484 { 485 struct hn_data *hv = dev->data->dev_private; 486 487 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | 488 NDIS_PACKET_TYPE_BROADCAST); 489 return hn_vf_allmulticast_disable(dev); 490 } 491 492 static int 493 hn_dev_mc_addr_list(struct rte_eth_dev *dev, 494 struct rte_ether_addr *mc_addr_set, 495 uint32_t nb_mc_addr) 496 { 497 /* No filtering on the synthetic path, but can do it on VF */ 498 return hn_vf_mc_addr_list(dev, mc_addr_set, nb_mc_addr); 499 } 500 501 /* Setup shared rx/tx queue data */ 502 static int hn_subchan_configure(struct hn_data *hv, 503 uint32_t subchan) 504 { 505 struct vmbus_channel *primary = hn_primary_chan(hv); 506 int err; 507 unsigned int retry = 0; 508 509 PMD_DRV_LOG(DEBUG, 510 "open %u subchannels", subchan); 511 512 /* Send create sub channels command */ 513 err = hn_nvs_alloc_subchans(hv, &subchan); 514 if (err) 515 return err; 516 517 while (subchan > 0) { 518 struct vmbus_channel *new_sc; 519 uint16_t chn_index; 520 521 err = rte_vmbus_subchan_open(primary, &new_sc); 522 if (err == -ENOENT && ++retry < 1000) { 523 /* This can happen if not ready yet */ 524 rte_delay_ms(10); 525 continue; 526 } 527 528 if (err) { 529 PMD_DRV_LOG(ERR, 530 "open subchannel failed: %d", err); 531 return err; 532 } 533 534 rte_vmbus_set_latency(hv->vmbus, new_sc, hv->latency); 535 536 retry = 0; 537 chn_index = rte_vmbus_sub_channel_index(new_sc); 538 if (chn_index == 0 || chn_index > hv->max_queues) { 539 PMD_DRV_LOG(ERR, 540 "Invalid subchannel offermsg channel %u", 541 chn_index); 542 return -EIO; 543 } 544 545 PMD_DRV_LOG(DEBUG, "new sub channel %u", chn_index); 546 hv->channels[chn_index] = new_sc; 547 --subchan; 548 } 549 550 return err; 551 } 552 553 static void netvsc_hotplug_retry(void *args) 554 { 555 int ret; 556 struct hn_data *hv = args; 557 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 558 struct rte_devargs *d = &hv->devargs; 559 char buf[256]; 560 561 DIR *di; 562 struct dirent *dir; 563 struct ifreq req; 564 struct rte_ether_addr eth_addr; 565 int s; 566 567 PMD_DRV_LOG(DEBUG, "%s: retry count %d\n", 568 __func__, hv->eal_hot_plug_retry); 569 570 if (hv->eal_hot_plug_retry++ > NETVSC_MAX_HOTADD_RETRY) 571 return; 572 573 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/net", d->name); 574 di = opendir(buf); 575 if (!di) { 576 PMD_DRV_LOG(DEBUG, "%s: can't open directory %s, " 577 "retrying in 1 second\n", __func__, buf); 578 goto retry; 579 } 580 581 while ((dir = readdir(di))) { 582 /* Skip . and .. directories */ 583 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 584 continue; 585 586 /* trying to get mac address if this is a network device*/ 587 s = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 588 if (s == -1) { 589 PMD_DRV_LOG(ERR, "Failed to create socket errno %d\n", 590 errno); 591 break; 592 } 593 strlcpy(req.ifr_name, dir->d_name, sizeof(req.ifr_name)); 594 ret = ioctl(s, SIOCGIFHWADDR, &req); 595 close(s); 596 if (ret == -1) { 597 PMD_DRV_LOG(ERR, "Failed to send SIOCGIFHWADDR for " 598 "device %s\n", dir->d_name); 599 break; 600 } 601 if (req.ifr_hwaddr.sa_family != ARPHRD_ETHER) { 602 closedir(di); 603 return; 604 } 605 memcpy(eth_addr.addr_bytes, req.ifr_hwaddr.sa_data, 606 RTE_DIM(eth_addr.addr_bytes)); 607 608 if (rte_is_same_ether_addr(ð_addr, dev->data->mac_addrs)) { 609 PMD_DRV_LOG(NOTICE, "Found matching MAC address, " 610 "adding device %s network name %s\n", 611 d->name, dir->d_name); 612 ret = rte_eal_hotplug_add(d->bus->name, d->name, 613 d->args); 614 if (ret) { 615 PMD_DRV_LOG(ERR, 616 "Failed to add PCI device %s\n", 617 d->name); 618 break; 619 } 620 } 621 /* When the code reaches here, we either have already added 622 * the device, or its MAC address did not match. 623 */ 624 closedir(di); 625 return; 626 } 627 closedir(di); 628 retry: 629 /* The device is still being initialized, retry after 1 second */ 630 rte_eal_alarm_set(1000000, netvsc_hotplug_retry, hv); 631 } 632 633 static void 634 netvsc_hotadd_callback(const char *device_name, enum rte_dev_event_type type, 635 void *arg) 636 { 637 struct hn_data *hv = arg; 638 struct rte_devargs *d = &hv->devargs; 639 int ret; 640 641 PMD_DRV_LOG(INFO, "Device notification type=%d device_name=%s\n", 642 type, device_name); 643 644 switch (type) { 645 case RTE_DEV_EVENT_ADD: 646 /* if we already has a VF, don't check on hot add */ 647 if (hv->vf_ctx.vf_state > vf_removed) 648 break; 649 650 ret = rte_devargs_parse(d, device_name); 651 if (ret) { 652 PMD_DRV_LOG(ERR, 653 "devargs parsing failed ret=%d\n", ret); 654 return; 655 } 656 657 if (!strcmp(d->bus->name, "pci")) { 658 /* Start the process of figuring out if this 659 * PCI device is a VF device 660 */ 661 hv->eal_hot_plug_retry = 0; 662 rte_eal_alarm_set(1000000, netvsc_hotplug_retry, hv); 663 } 664 665 /* We will switch to VF on RDNIS configure message 666 * sent from VSP 667 */ 668 669 break; 670 default: 671 break; 672 } 673 } 674 675 static int hn_dev_configure(struct rte_eth_dev *dev) 676 { 677 struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 678 struct rte_eth_rss_conf *rss_conf = &dev_conf->rx_adv_conf.rss_conf; 679 const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode; 680 const struct rte_eth_txmode *txmode = &dev_conf->txmode; 681 struct hn_data *hv = dev->data->dev_private; 682 uint64_t unsupported; 683 int i, err, subchan; 684 685 PMD_INIT_FUNC_TRACE(); 686 687 if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) 688 dev_conf->rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; 689 690 unsupported = txmode->offloads & ~HN_TX_OFFLOAD_CAPS; 691 if (unsupported) { 692 PMD_DRV_LOG(NOTICE, 693 "unsupported TX offload: %#" PRIx64, 694 unsupported); 695 return -EINVAL; 696 } 697 698 unsupported = rxmode->offloads & ~HN_RX_OFFLOAD_CAPS; 699 if (unsupported) { 700 PMD_DRV_LOG(NOTICE, 701 "unsupported RX offload: %#" PRIx64, 702 rxmode->offloads); 703 return -EINVAL; 704 } 705 706 hv->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 707 708 err = hn_rndis_conf_offload(hv, txmode->offloads, 709 rxmode->offloads); 710 if (err) { 711 PMD_DRV_LOG(NOTICE, 712 "offload configure failed"); 713 return err; 714 } 715 716 hv->num_queues = RTE_MAX(dev->data->nb_rx_queues, 717 dev->data->nb_tx_queues); 718 719 for (i = 0; i < NDIS_HASH_INDCNT; i++) 720 hv->rss_ind[i] = i % dev->data->nb_rx_queues; 721 722 hn_rss_hash_init(hv, rss_conf); 723 724 subchan = hv->num_queues - 1; 725 if (subchan > 0) { 726 err = hn_subchan_configure(hv, subchan); 727 if (err) { 728 PMD_DRV_LOG(NOTICE, 729 "subchannel configuration failed"); 730 return err; 731 } 732 733 err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); 734 if (err) { 735 PMD_DRV_LOG(NOTICE, 736 "rss disable failed"); 737 return err; 738 } 739 740 if (rss_conf->rss_hf != 0) { 741 err = hn_rndis_conf_rss(hv, 0); 742 if (err) { 743 PMD_DRV_LOG(NOTICE, 744 "initial RSS config failed"); 745 return err; 746 } 747 } 748 } 749 750 return hn_vf_configure_locked(dev, dev_conf); 751 } 752 753 static int hn_dev_stats_get(struct rte_eth_dev *dev, 754 struct rte_eth_stats *stats) 755 { 756 unsigned int i; 757 758 hn_vf_stats_get(dev, stats); 759 760 for (i = 0; i < dev->data->nb_tx_queues; i++) { 761 const struct hn_tx_queue *txq = dev->data->tx_queues[i]; 762 763 if (!txq) 764 continue; 765 766 stats->opackets += txq->stats.packets; 767 stats->obytes += txq->stats.bytes; 768 stats->oerrors += txq->stats.errors; 769 770 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 771 stats->q_opackets[i] = txq->stats.packets; 772 stats->q_obytes[i] = txq->stats.bytes; 773 } 774 } 775 776 for (i = 0; i < dev->data->nb_rx_queues; i++) { 777 const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 778 779 if (!rxq) 780 continue; 781 782 stats->ipackets += rxq->stats.packets; 783 stats->ibytes += rxq->stats.bytes; 784 stats->ierrors += rxq->stats.errors; 785 stats->imissed += rxq->stats.ring_full; 786 787 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 788 stats->q_ipackets[i] = rxq->stats.packets; 789 stats->q_ibytes[i] = rxq->stats.bytes; 790 } 791 } 792 793 stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; 794 return 0; 795 } 796 797 static int 798 hn_dev_stats_reset(struct rte_eth_dev *dev) 799 { 800 unsigned int i; 801 802 PMD_INIT_FUNC_TRACE(); 803 804 for (i = 0; i < dev->data->nb_tx_queues; i++) { 805 struct hn_tx_queue *txq = dev->data->tx_queues[i]; 806 807 if (!txq) 808 continue; 809 memset(&txq->stats, 0, sizeof(struct hn_stats)); 810 } 811 812 for (i = 0; i < dev->data->nb_rx_queues; i++) { 813 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 814 815 if (!rxq) 816 continue; 817 818 memset(&rxq->stats, 0, sizeof(struct hn_stats)); 819 } 820 821 return 0; 822 } 823 824 static int 825 hn_dev_xstats_reset(struct rte_eth_dev *dev) 826 { 827 int ret; 828 829 ret = hn_dev_stats_reset(dev); 830 if (ret != 0) 831 return 0; 832 833 return hn_vf_xstats_reset(dev); 834 } 835 836 static int 837 hn_dev_xstats_count(struct rte_eth_dev *dev) 838 { 839 int ret, count; 840 841 count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings); 842 count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); 843 844 ret = hn_vf_xstats_get_names(dev, NULL, 0); 845 if (ret < 0) 846 return ret; 847 848 return count + ret; 849 } 850 851 static int 852 hn_dev_xstats_get_names(struct rte_eth_dev *dev, 853 struct rte_eth_xstat_name *xstats_names, 854 unsigned int limit) 855 { 856 unsigned int i, t, count = 0; 857 int ret; 858 859 if (!xstats_names) 860 return hn_dev_xstats_count(dev); 861 862 /* Note: limit checked in rte_eth_xstats_names() */ 863 for (i = 0; i < dev->data->nb_tx_queues; i++) { 864 const struct hn_tx_queue *txq = dev->data->tx_queues[i]; 865 866 if (!txq) 867 continue; 868 869 if (count >= limit) 870 break; 871 872 for (t = 0; t < RTE_DIM(hn_stat_strings); t++) 873 snprintf(xstats_names[count++].name, 874 RTE_ETH_XSTATS_NAME_SIZE, 875 "tx_q%u_%s", i, hn_stat_strings[t].name); 876 } 877 878 for (i = 0; i < dev->data->nb_rx_queues; i++) { 879 const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 880 881 if (!rxq) 882 continue; 883 884 if (count >= limit) 885 break; 886 887 for (t = 0; t < RTE_DIM(hn_stat_strings); t++) 888 snprintf(xstats_names[count++].name, 889 RTE_ETH_XSTATS_NAME_SIZE, 890 "rx_q%u_%s", i, 891 hn_stat_strings[t].name); 892 } 893 894 ret = hn_vf_xstats_get_names(dev, xstats_names + count, 895 limit - count); 896 if (ret < 0) 897 return ret; 898 899 return count + ret; 900 } 901 902 static int 903 hn_dev_xstats_get(struct rte_eth_dev *dev, 904 struct rte_eth_xstat *xstats, 905 unsigned int n) 906 { 907 unsigned int i, t, count = 0; 908 const unsigned int nstats = hn_dev_xstats_count(dev); 909 const char *stats; 910 int ret; 911 912 PMD_INIT_FUNC_TRACE(); 913 914 if (n < nstats) 915 return nstats; 916 917 for (i = 0; i < dev->data->nb_tx_queues; i++) { 918 const struct hn_tx_queue *txq = dev->data->tx_queues[i]; 919 920 if (!txq) 921 continue; 922 923 stats = (const char *)&txq->stats; 924 for (t = 0; t < RTE_DIM(hn_stat_strings); t++, count++) { 925 xstats[count].id = count; 926 xstats[count].value = *(const uint64_t *) 927 (stats + hn_stat_strings[t].offset); 928 } 929 } 930 931 for (i = 0; i < dev->data->nb_rx_queues; i++) { 932 const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 933 934 if (!rxq) 935 continue; 936 937 stats = (const char *)&rxq->stats; 938 for (t = 0; t < RTE_DIM(hn_stat_strings); t++, count++) { 939 xstats[count].id = count; 940 xstats[count].value = *(const uint64_t *) 941 (stats + hn_stat_strings[t].offset); 942 } 943 } 944 945 ret = hn_vf_xstats_get(dev, xstats, count, n); 946 if (ret < 0) 947 return ret; 948 949 return count + ret; 950 } 951 952 static int 953 hn_dev_start(struct rte_eth_dev *dev) 954 { 955 struct hn_data *hv = dev->data->dev_private; 956 int error; 957 958 PMD_INIT_FUNC_TRACE(); 959 960 /* Register to monitor hot plug events */ 961 error = rte_dev_event_callback_register(NULL, netvsc_hotadd_callback, 962 hv); 963 if (error) { 964 PMD_DRV_LOG(ERR, "failed to register device event callback\n"); 965 return error; 966 } 967 968 error = hn_rndis_set_rxfilter(hv, 969 NDIS_PACKET_TYPE_BROADCAST | 970 NDIS_PACKET_TYPE_ALL_MULTICAST | 971 NDIS_PACKET_TYPE_DIRECTED); 972 if (error) 973 return error; 974 975 error = hn_vf_start(dev); 976 if (error) 977 hn_rndis_set_rxfilter(hv, 0); 978 979 /* Initialize Link state */ 980 if (error == 0) 981 hn_dev_link_update(dev, 0); 982 983 return error; 984 } 985 986 static int 987 hn_dev_stop(struct rte_eth_dev *dev) 988 { 989 struct hn_data *hv = dev->data->dev_private; 990 991 PMD_INIT_FUNC_TRACE(); 992 dev->data->dev_started = 0; 993 994 rte_dev_event_callback_unregister(NULL, netvsc_hotadd_callback, hv); 995 hn_rndis_set_rxfilter(hv, 0); 996 return hn_vf_stop(dev); 997 } 998 999 static int 1000 hn_dev_close(struct rte_eth_dev *dev) 1001 { 1002 int ret; 1003 struct hn_data *hv = dev->data->dev_private; 1004 1005 PMD_INIT_FUNC_TRACE(); 1006 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1007 return 0; 1008 1009 rte_eal_alarm_cancel(netvsc_hotplug_retry, &hv->devargs); 1010 1011 ret = hn_vf_close(dev); 1012 hn_dev_free_queues(dev); 1013 1014 return ret; 1015 } 1016 1017 static const struct eth_dev_ops hn_eth_dev_ops = { 1018 .dev_configure = hn_dev_configure, 1019 .dev_start = hn_dev_start, 1020 .dev_stop = hn_dev_stop, 1021 .dev_close = hn_dev_close, 1022 .dev_infos_get = hn_dev_info_get, 1023 .txq_info_get = hn_dev_tx_queue_info, 1024 .rxq_info_get = hn_dev_rx_queue_info, 1025 .dev_supported_ptypes_get = hn_vf_supported_ptypes, 1026 .promiscuous_enable = hn_dev_promiscuous_enable, 1027 .promiscuous_disable = hn_dev_promiscuous_disable, 1028 .allmulticast_enable = hn_dev_allmulticast_enable, 1029 .allmulticast_disable = hn_dev_allmulticast_disable, 1030 .set_mc_addr_list = hn_dev_mc_addr_list, 1031 .reta_update = hn_rss_reta_update, 1032 .reta_query = hn_rss_reta_query, 1033 .rss_hash_update = hn_rss_hash_update, 1034 .rss_hash_conf_get = hn_rss_hash_conf_get, 1035 .tx_queue_setup = hn_dev_tx_queue_setup, 1036 .tx_queue_release = hn_dev_tx_queue_release, 1037 .tx_done_cleanup = hn_dev_tx_done_cleanup, 1038 .rx_queue_setup = hn_dev_rx_queue_setup, 1039 .rx_queue_release = hn_dev_rx_queue_release, 1040 .link_update = hn_dev_link_update, 1041 .stats_get = hn_dev_stats_get, 1042 .stats_reset = hn_dev_stats_reset, 1043 .xstats_get = hn_dev_xstats_get, 1044 .xstats_get_names = hn_dev_xstats_get_names, 1045 .xstats_reset = hn_dev_xstats_reset, 1046 }; 1047 1048 /* 1049 * Setup connection between PMD and kernel. 1050 */ 1051 static int 1052 hn_attach(struct hn_data *hv, unsigned int mtu) 1053 { 1054 int error; 1055 1056 /* Attach NVS */ 1057 error = hn_nvs_attach(hv, mtu); 1058 if (error) 1059 goto failed_nvs; 1060 1061 /* Attach RNDIS */ 1062 error = hn_rndis_attach(hv); 1063 if (error) 1064 goto failed_rndis; 1065 1066 /* 1067 * NOTE: 1068 * Under certain conditions on certain versions of Hyper-V, 1069 * the RNDIS rxfilter is _not_ zero on the hypervisor side 1070 * after the successful RNDIS initialization. 1071 */ 1072 hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_NONE); 1073 return 0; 1074 failed_rndis: 1075 hn_nvs_detach(hv); 1076 failed_nvs: 1077 return error; 1078 } 1079 1080 static void 1081 hn_detach(struct hn_data *hv) 1082 { 1083 hn_nvs_detach(hv); 1084 hn_rndis_detach(hv); 1085 } 1086 1087 static int 1088 eth_hn_dev_init(struct rte_eth_dev *eth_dev) 1089 { 1090 struct hn_data *hv = eth_dev->data->dev_private; 1091 struct rte_device *device = eth_dev->device; 1092 struct rte_vmbus_device *vmbus; 1093 unsigned int rxr_cnt; 1094 int err, max_chan; 1095 1096 PMD_INIT_FUNC_TRACE(); 1097 1098 vmbus = container_of(device, struct rte_vmbus_device, device); 1099 eth_dev->dev_ops = &hn_eth_dev_ops; 1100 eth_dev->rx_queue_count = hn_dev_rx_queue_count; 1101 eth_dev->rx_descriptor_status = hn_dev_rx_queue_status; 1102 eth_dev->tx_descriptor_status = hn_dev_tx_descriptor_status; 1103 eth_dev->tx_pkt_burst = &hn_xmit_pkts; 1104 eth_dev->rx_pkt_burst = &hn_recv_pkts; 1105 1106 /* 1107 * for secondary processes, we don't initialize any further as primary 1108 * has already done this work. 1109 */ 1110 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1111 return 0; 1112 1113 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1114 1115 /* Since Hyper-V only supports one MAC address */ 1116 eth_dev->data->mac_addrs = rte_calloc("hv_mac", HN_MAX_MAC_ADDRS, 1117 sizeof(struct rte_ether_addr), 0); 1118 if (eth_dev->data->mac_addrs == NULL) { 1119 PMD_INIT_LOG(ERR, 1120 "Failed to allocate memory store MAC addresses"); 1121 return -ENOMEM; 1122 } 1123 1124 hv->vmbus = vmbus; 1125 hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP]; 1126 hv->chim_res = &vmbus->resource[HV_SEND_BUF_MAP]; 1127 hv->port_id = eth_dev->data->port_id; 1128 hv->latency = HN_CHAN_LATENCY_NS; 1129 hv->rx_copybreak = HN_RXCOPY_THRESHOLD; 1130 hv->tx_copybreak = HN_TXCOPY_THRESHOLD; 1131 hv->rx_extmbuf_enable = HN_RX_EXTMBUF_ENABLE; 1132 hv->max_queues = 1; 1133 1134 rte_rwlock_init(&hv->vf_lock); 1135 hv->vf_ctx.vf_vsc_switched = false; 1136 hv->vf_ctx.vf_vsp_reported = false; 1137 hv->vf_ctx.vf_attached = false; 1138 hv->vf_ctx.vf_state = vf_unknown; 1139 1140 err = hn_parse_args(eth_dev); 1141 if (err) 1142 return err; 1143 1144 strlcpy(hv->owner.name, eth_dev->device->name, 1145 RTE_ETH_MAX_OWNER_NAME_LEN); 1146 err = rte_eth_dev_owner_new(&hv->owner.id); 1147 if (err) { 1148 PMD_INIT_LOG(ERR, "Can not get owner id"); 1149 return err; 1150 } 1151 1152 /* Initialize primary channel input for control operations */ 1153 err = rte_vmbus_chan_open(vmbus, &hv->channels[0]); 1154 if (err) 1155 return err; 1156 1157 rte_vmbus_set_latency(hv->vmbus, hv->channels[0], hv->latency); 1158 1159 hv->primary = hn_rx_queue_alloc(hv, 0, 1160 eth_dev->device->numa_node); 1161 1162 if (!hv->primary) 1163 return -ENOMEM; 1164 1165 err = hn_attach(hv, RTE_ETHER_MTU); 1166 if (err) 1167 goto failed; 1168 1169 err = hn_chim_init(eth_dev); 1170 if (err) 1171 goto failed; 1172 1173 err = hn_rndis_get_eaddr(hv, eth_dev->data->mac_addrs->addr_bytes); 1174 if (err) 1175 goto failed; 1176 1177 /* Multi queue requires later versions of windows server */ 1178 if (hv->nvs_ver < NVS_VERSION_5) 1179 return 0; 1180 1181 max_chan = rte_vmbus_max_channels(vmbus); 1182 PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan); 1183 if (max_chan <= 0) 1184 goto failed; 1185 1186 if (hn_rndis_query_rsscaps(hv, &rxr_cnt) != 0) 1187 rxr_cnt = 1; 1188 1189 hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan); 1190 1191 /* If VF was reported but not added, do it now */ 1192 if (hv->vf_ctx.vf_vsp_reported && !hv->vf_ctx.vf_vsc_switched) { 1193 PMD_INIT_LOG(DEBUG, "Adding VF device"); 1194 1195 err = hn_vf_add(eth_dev, hv); 1196 } 1197 1198 return 0; 1199 1200 failed: 1201 PMD_INIT_LOG(NOTICE, "device init failed"); 1202 1203 hn_chim_uninit(eth_dev); 1204 hn_detach(hv); 1205 return err; 1206 } 1207 1208 static int 1209 eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) 1210 { 1211 struct hn_data *hv = eth_dev->data->dev_private; 1212 int ret, ret_stop; 1213 1214 PMD_INIT_FUNC_TRACE(); 1215 1216 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1217 return 0; 1218 1219 ret_stop = hn_dev_stop(eth_dev); 1220 hn_dev_close(eth_dev); 1221 1222 hn_detach(hv); 1223 hn_chim_uninit(eth_dev); 1224 rte_vmbus_chan_close(hv->primary->chan); 1225 rte_free(hv->primary); 1226 ret = rte_eth_dev_owner_delete(hv->owner.id); 1227 if (ret != 0) 1228 return ret; 1229 1230 return ret_stop; 1231 } 1232 1233 static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused, 1234 struct rte_vmbus_device *dev) 1235 { 1236 struct rte_eth_dev *eth_dev; 1237 int ret; 1238 1239 PMD_INIT_FUNC_TRACE(); 1240 1241 ret = rte_dev_event_monitor_start(); 1242 if (ret) { 1243 PMD_DRV_LOG(ERR, "Failed to start device event monitoring\n"); 1244 return ret; 1245 } 1246 1247 eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data)); 1248 if (!eth_dev) 1249 return -ENOMEM; 1250 1251 ret = eth_hn_dev_init(eth_dev); 1252 if (ret) { 1253 eth_dev_vmbus_release(eth_dev); 1254 rte_dev_event_monitor_stop(); 1255 } else { 1256 rte_eth_dev_probing_finish(eth_dev); 1257 } 1258 1259 return ret; 1260 } 1261 1262 static int eth_hn_remove(struct rte_vmbus_device *dev) 1263 { 1264 struct rte_eth_dev *eth_dev; 1265 int ret; 1266 1267 PMD_INIT_FUNC_TRACE(); 1268 1269 eth_dev = rte_eth_dev_allocated(dev->device.name); 1270 if (!eth_dev) 1271 return 0; /* port already released */ 1272 1273 ret = eth_hn_dev_uninit(eth_dev); 1274 if (ret) 1275 return ret; 1276 1277 eth_dev_vmbus_release(eth_dev); 1278 rte_dev_event_monitor_stop(); 1279 return 0; 1280 } 1281 1282 /* Network device GUID */ 1283 static const rte_uuid_t hn_net_ids[] = { 1284 /* f8615163-df3e-46c5-913f-f2d2f965ed0e */ 1285 RTE_UUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2d2f965ed0eULL), 1286 { 0 } 1287 }; 1288 1289 static struct rte_vmbus_driver rte_netvsc_pmd = { 1290 .id_table = hn_net_ids, 1291 .probe = eth_hn_probe, 1292 .remove = eth_hn_remove, 1293 }; 1294 1295 RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd); 1296 RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic"); 1297 RTE_LOG_REGISTER(hn_logtype_init, pmd.net.netvsc.init, NOTICE); 1298 RTE_LOG_REGISTER(hn_logtype_driver, pmd.net.netvsc.driver, NOTICE); 1299 RTE_PMD_REGISTER_PARAM_STRING(net_netvsc, 1300 NETVSC_ARG_LATENCY "=<uint32> " 1301 NETVSC_ARG_RXBREAK "=<uint32> " 1302 NETVSC_ARG_TXBREAK "=<uint32> " 1303 NETVSC_ARG_RX_EXTMBUF_ENABLE "=<0|1>"); 1304