1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <string.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <unistd.h> 10 11 #include <rte_ethdev_driver.h> 12 #include <rte_ethdev_pci.h> 13 #include <rte_memcpy.h> 14 #include <rte_string_fns.h> 15 #include <rte_memzone.h> 16 #include <rte_malloc.h> 17 #include <rte_branch_prediction.h> 18 #include <rte_pci.h> 19 #include <rte_bus_pci.h> 20 #include <rte_ether.h> 21 #include <rte_ip.h> 22 #include <rte_arp.h> 23 #include <rte_common.h> 24 #include <rte_errno.h> 25 #include <rte_cpuflags.h> 26 27 #include <rte_memory.h> 28 #include <rte_eal.h> 29 #include <rte_dev.h> 30 #include <rte_cycles.h> 31 #include <rte_kvargs.h> 32 33 #include "virtio_ethdev.h" 34 #include "virtio_pci.h" 35 #include "virtio_logs.h" 36 #include "virtqueue.h" 37 #include "virtio_rxtx.h" 38 39 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); 40 static int virtio_dev_configure(struct rte_eth_dev *dev); 41 static int virtio_dev_start(struct rte_eth_dev *dev); 42 static void virtio_dev_stop(struct rte_eth_dev *dev); 43 static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev); 44 static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev); 45 static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev); 46 static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev); 47 static void virtio_dev_info_get(struct rte_eth_dev *dev, 48 struct rte_eth_dev_info *dev_info); 49 static int virtio_dev_link_update(struct rte_eth_dev *dev, 50 int wait_to_complete); 51 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask); 52 53 static void virtio_set_hwaddr(struct virtio_hw *hw); 54 static void virtio_get_hwaddr(struct virtio_hw *hw); 55 56 static int virtio_dev_stats_get(struct rte_eth_dev *dev, 57 struct rte_eth_stats *stats); 58 static int virtio_dev_xstats_get(struct rte_eth_dev *dev, 59 struct rte_eth_xstat *xstats, unsigned n); 60 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, 61 struct rte_eth_xstat_name *xstats_names, 62 unsigned limit); 63 static void virtio_dev_stats_reset(struct rte_eth_dev *dev); 64 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); 65 static int virtio_vlan_filter_set(struct rte_eth_dev *dev, 66 uint16_t vlan_id, int on); 67 static int virtio_mac_addr_add(struct rte_eth_dev *dev, 68 struct ether_addr *mac_addr, 69 uint32_t index, uint32_t vmdq); 70 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index); 71 static int virtio_mac_addr_set(struct rte_eth_dev *dev, 72 struct ether_addr *mac_addr); 73 74 static int virtio_intr_enable(struct rte_eth_dev *dev); 75 static int virtio_intr_disable(struct rte_eth_dev *dev); 76 77 static int virtio_dev_queue_stats_mapping_set( 78 struct rte_eth_dev *eth_dev, 79 uint16_t queue_id, 80 uint8_t stat_idx, 81 uint8_t is_rx); 82 83 int virtio_logtype_init; 84 int virtio_logtype_driver; 85 86 static void virtio_notify_peers(struct rte_eth_dev *dev); 87 static void virtio_ack_link_announce(struct rte_eth_dev *dev); 88 89 /* 90 * The set of PCI devices this driver supports 91 */ 92 static const struct rte_pci_id pci_id_virtio_map[] = { 93 { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) }, 94 { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) }, 95 { .vendor_id = 0, /* sentinel */ }, 96 }; 97 98 struct rte_virtio_xstats_name_off { 99 char name[RTE_ETH_XSTATS_NAME_SIZE]; 100 unsigned offset; 101 }; 102 103 /* [rt]x_qX_ is prepended to the name string here */ 104 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = { 105 {"good_packets", offsetof(struct virtnet_rx, stats.packets)}, 106 {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)}, 107 {"errors", offsetof(struct virtnet_rx, stats.errors)}, 108 {"multicast_packets", offsetof(struct virtnet_rx, stats.multicast)}, 109 {"broadcast_packets", offsetof(struct virtnet_rx, stats.broadcast)}, 110 {"undersize_packets", offsetof(struct virtnet_rx, stats.size_bins[0])}, 111 {"size_64_packets", offsetof(struct virtnet_rx, stats.size_bins[1])}, 112 {"size_65_127_packets", offsetof(struct virtnet_rx, stats.size_bins[2])}, 113 {"size_128_255_packets", offsetof(struct virtnet_rx, stats.size_bins[3])}, 114 {"size_256_511_packets", offsetof(struct virtnet_rx, stats.size_bins[4])}, 115 {"size_512_1023_packets", offsetof(struct virtnet_rx, stats.size_bins[5])}, 116 {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])}, 117 {"size_1519_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])}, 118 }; 119 120 /* [rt]x_qX_ is prepended to the name string here */ 121 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { 122 {"good_packets", offsetof(struct virtnet_tx, stats.packets)}, 123 {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)}, 124 {"errors", offsetof(struct virtnet_tx, stats.errors)}, 125 {"multicast_packets", offsetof(struct virtnet_tx, stats.multicast)}, 126 {"broadcast_packets", offsetof(struct virtnet_tx, stats.broadcast)}, 127 {"undersize_packets", offsetof(struct virtnet_tx, stats.size_bins[0])}, 128 {"size_64_packets", offsetof(struct virtnet_tx, stats.size_bins[1])}, 129 {"size_65_127_packets", offsetof(struct virtnet_tx, stats.size_bins[2])}, 130 {"size_128_255_packets", offsetof(struct virtnet_tx, stats.size_bins[3])}, 131 {"size_256_511_packets", offsetof(struct virtnet_tx, stats.size_bins[4])}, 132 {"size_512_1023_packets", offsetof(struct virtnet_tx, stats.size_bins[5])}, 133 {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])}, 134 {"size_1519_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])}, 135 }; 136 137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \ 138 sizeof(rte_virtio_rxq_stat_strings[0])) 139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \ 140 sizeof(rte_virtio_txq_stat_strings[0])) 141 142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS]; 143 144 static int 145 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, 146 int *dlen, int pkt_num) 147 { 148 uint32_t head, i; 149 int k, sum = 0; 150 virtio_net_ctrl_ack status = ~0; 151 struct virtio_pmd_ctrl *result; 152 struct virtqueue *vq; 153 154 ctrl->status = status; 155 156 if (!cvq || !cvq->vq) { 157 PMD_INIT_LOG(ERR, "Control queue is not supported."); 158 return -1; 159 } 160 161 rte_spinlock_lock(&cvq->lock); 162 vq = cvq->vq; 163 head = vq->vq_desc_head_idx; 164 165 PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, " 166 "vq->hw->cvq = %p vq = %p", 167 vq->vq_desc_head_idx, status, vq->hw->cvq, vq); 168 169 if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) { 170 rte_spinlock_unlock(&cvq->lock); 171 return -1; 172 } 173 174 memcpy(cvq->virtio_net_hdr_mz->addr, ctrl, 175 sizeof(struct virtio_pmd_ctrl)); 176 177 /* 178 * Format is enforced in qemu code: 179 * One TX packet for header; 180 * At least one TX packet per argument; 181 * One RX packet for ACK. 182 */ 183 vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; 184 vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem; 185 vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); 186 vq->vq_free_cnt--; 187 i = vq->vq_ring.desc[head].next; 188 189 for (k = 0; k < pkt_num; k++) { 190 vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; 191 vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem 192 + sizeof(struct virtio_net_ctrl_hdr) 193 + sizeof(ctrl->status) + sizeof(uint8_t)*sum; 194 vq->vq_ring.desc[i].len = dlen[k]; 195 sum += dlen[k]; 196 vq->vq_free_cnt--; 197 i = vq->vq_ring.desc[i].next; 198 } 199 200 vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; 201 vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem 202 + sizeof(struct virtio_net_ctrl_hdr); 203 vq->vq_ring.desc[i].len = sizeof(ctrl->status); 204 vq->vq_free_cnt--; 205 206 vq->vq_desc_head_idx = vq->vq_ring.desc[i].next; 207 208 vq_update_avail_ring(vq, head); 209 vq_update_avail_idx(vq); 210 211 PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index); 212 213 virtqueue_notify(vq); 214 215 rte_rmb(); 216 while (VIRTQUEUE_NUSED(vq) == 0) { 217 rte_rmb(); 218 usleep(100); 219 } 220 221 while (VIRTQUEUE_NUSED(vq)) { 222 uint32_t idx, desc_idx, used_idx; 223 struct vring_used_elem *uep; 224 225 used_idx = (uint32_t)(vq->vq_used_cons_idx 226 & (vq->vq_nentries - 1)); 227 uep = &vq->vq_ring.used->ring[used_idx]; 228 idx = (uint32_t) uep->id; 229 desc_idx = idx; 230 231 while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) { 232 desc_idx = vq->vq_ring.desc[desc_idx].next; 233 vq->vq_free_cnt++; 234 } 235 236 vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx; 237 vq->vq_desc_head_idx = idx; 238 239 vq->vq_used_cons_idx++; 240 vq->vq_free_cnt++; 241 } 242 243 PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d", 244 vq->vq_free_cnt, vq->vq_desc_head_idx); 245 246 result = cvq->virtio_net_hdr_mz->addr; 247 248 rte_spinlock_unlock(&cvq->lock); 249 return result->status; 250 } 251 252 static int 253 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) 254 { 255 struct virtio_hw *hw = dev->data->dev_private; 256 struct virtio_pmd_ctrl ctrl; 257 int dlen[1]; 258 int ret; 259 260 ctrl.hdr.class = VIRTIO_NET_CTRL_MQ; 261 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 262 memcpy(ctrl.data, &nb_queues, sizeof(uint16_t)); 263 264 dlen[0] = sizeof(uint16_t); 265 266 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 267 if (ret) { 268 PMD_INIT_LOG(ERR, "Multiqueue configured but send command " 269 "failed, this is too late now..."); 270 return -EINVAL; 271 } 272 273 return 0; 274 } 275 276 static void 277 virtio_dev_queue_release(void *queue __rte_unused) 278 { 279 /* do nothing */ 280 } 281 282 static uint16_t 283 virtio_get_nr_vq(struct virtio_hw *hw) 284 { 285 uint16_t nr_vq = hw->max_queue_pairs * 2; 286 287 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) 288 nr_vq += 1; 289 290 return nr_vq; 291 } 292 293 static void 294 virtio_init_vring(struct virtqueue *vq) 295 { 296 int size = vq->vq_nentries; 297 struct vring *vr = &vq->vq_ring; 298 uint8_t *ring_mem = vq->vq_ring_virt_mem; 299 300 PMD_INIT_FUNC_TRACE(); 301 302 /* 303 * Reinitialise since virtio port might have been stopped and restarted 304 */ 305 memset(ring_mem, 0, vq->vq_ring_size); 306 vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); 307 vq->vq_used_cons_idx = 0; 308 vq->vq_desc_head_idx = 0; 309 vq->vq_avail_idx = 0; 310 vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); 311 vq->vq_free_cnt = vq->vq_nentries; 312 memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); 313 314 vring_desc_init(vr->desc, size); 315 316 /* 317 * Disable device(host) interrupting guest 318 */ 319 virtqueue_disable_intr(vq); 320 } 321 322 static int 323 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) 324 { 325 char vq_name[VIRTQUEUE_MAX_NAME_SZ]; 326 char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ]; 327 const struct rte_memzone *mz = NULL, *hdr_mz = NULL; 328 unsigned int vq_size, size; 329 struct virtio_hw *hw = dev->data->dev_private; 330 struct virtnet_rx *rxvq = NULL; 331 struct virtnet_tx *txvq = NULL; 332 struct virtnet_ctl *cvq = NULL; 333 struct virtqueue *vq; 334 size_t sz_hdr_mz = 0; 335 void *sw_ring = NULL; 336 int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx); 337 int ret; 338 339 PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); 340 341 /* 342 * Read the virtqueue size from the Queue Size field 343 * Always power of 2 and if 0 virtqueue does not exist 344 */ 345 vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx); 346 PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size); 347 if (vq_size == 0) { 348 PMD_INIT_LOG(ERR, "virtqueue does not exist"); 349 return -EINVAL; 350 } 351 352 if (!rte_is_power_of_2(vq_size)) { 353 PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2"); 354 return -EINVAL; 355 } 356 357 snprintf(vq_name, sizeof(vq_name), "port%d_vq%d", 358 dev->data->port_id, vtpci_queue_idx); 359 360 size = RTE_ALIGN_CEIL(sizeof(*vq) + 361 vq_size * sizeof(struct vq_desc_extra), 362 RTE_CACHE_LINE_SIZE); 363 if (queue_type == VTNET_TQ) { 364 /* 365 * For each xmit packet, allocate a virtio_net_hdr 366 * and indirect ring elements 367 */ 368 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region); 369 } else if (queue_type == VTNET_CQ) { 370 /* Allocate a page for control vq command, data and status */ 371 sz_hdr_mz = PAGE_SIZE; 372 } 373 374 vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE, 375 SOCKET_ID_ANY); 376 if (vq == NULL) { 377 PMD_INIT_LOG(ERR, "can not allocate vq"); 378 return -ENOMEM; 379 } 380 hw->vqs[vtpci_queue_idx] = vq; 381 382 vq->hw = hw; 383 vq->vq_queue_index = vtpci_queue_idx; 384 vq->vq_nentries = vq_size; 385 386 /* 387 * Reserve a memzone for vring elements 388 */ 389 size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); 390 vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); 391 PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", 392 size, vq->vq_ring_size); 393 394 mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, 395 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, 396 VIRTIO_PCI_VRING_ALIGN); 397 if (mz == NULL) { 398 if (rte_errno == EEXIST) 399 mz = rte_memzone_lookup(vq_name); 400 if (mz == NULL) { 401 ret = -ENOMEM; 402 goto fail_q_alloc; 403 } 404 } 405 406 memset(mz->addr, 0, mz->len); 407 408 vq->vq_ring_mem = mz->iova; 409 vq->vq_ring_virt_mem = mz->addr; 410 PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, 411 (uint64_t)mz->iova); 412 PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64, 413 (uint64_t)(uintptr_t)mz->addr); 414 415 virtio_init_vring(vq); 416 417 if (sz_hdr_mz) { 418 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr", 419 dev->data->port_id, vtpci_queue_idx); 420 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz, 421 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, 422 RTE_CACHE_LINE_SIZE); 423 if (hdr_mz == NULL) { 424 if (rte_errno == EEXIST) 425 hdr_mz = rte_memzone_lookup(vq_hdr_name); 426 if (hdr_mz == NULL) { 427 ret = -ENOMEM; 428 goto fail_q_alloc; 429 } 430 } 431 } 432 433 if (queue_type == VTNET_RQ) { 434 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * 435 sizeof(vq->sw_ring[0]); 436 437 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, 438 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 439 if (!sw_ring) { 440 PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); 441 ret = -ENOMEM; 442 goto fail_q_alloc; 443 } 444 445 vq->sw_ring = sw_ring; 446 rxvq = &vq->rxq; 447 rxvq->vq = vq; 448 rxvq->port_id = dev->data->port_id; 449 rxvq->mz = mz; 450 } else if (queue_type == VTNET_TQ) { 451 txvq = &vq->txq; 452 txvq->vq = vq; 453 txvq->port_id = dev->data->port_id; 454 txvq->mz = mz; 455 txvq->virtio_net_hdr_mz = hdr_mz; 456 txvq->virtio_net_hdr_mem = hdr_mz->iova; 457 } else if (queue_type == VTNET_CQ) { 458 cvq = &vq->cq; 459 cvq->vq = vq; 460 cvq->mz = mz; 461 cvq->virtio_net_hdr_mz = hdr_mz; 462 cvq->virtio_net_hdr_mem = hdr_mz->iova; 463 memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); 464 465 hw->cvq = cvq; 466 } 467 468 /* For virtio_user case (that is when hw->dev is NULL), we use 469 * virtual address. And we need properly set _offset_, please see 470 * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. 471 */ 472 if (!hw->virtio_user_dev) 473 vq->offset = offsetof(struct rte_mbuf, buf_iova); 474 else { 475 vq->vq_ring_mem = (uintptr_t)mz->addr; 476 vq->offset = offsetof(struct rte_mbuf, buf_addr); 477 if (queue_type == VTNET_TQ) 478 txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; 479 else if (queue_type == VTNET_CQ) 480 cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; 481 } 482 483 if (queue_type == VTNET_TQ) { 484 struct virtio_tx_region *txr; 485 unsigned int i; 486 487 txr = hdr_mz->addr; 488 memset(txr, 0, vq_size * sizeof(*txr)); 489 for (i = 0; i < vq_size; i++) { 490 struct vring_desc *start_dp = txr[i].tx_indir; 491 492 vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); 493 494 /* first indirect descriptor is always the tx header */ 495 start_dp->addr = txvq->virtio_net_hdr_mem 496 + i * sizeof(*txr) 497 + offsetof(struct virtio_tx_region, tx_hdr); 498 499 start_dp->len = hw->vtnet_hdr_size; 500 start_dp->flags = VRING_DESC_F_NEXT; 501 } 502 } 503 504 if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) { 505 PMD_INIT_LOG(ERR, "setup_queue failed"); 506 return -EINVAL; 507 } 508 509 return 0; 510 511 fail_q_alloc: 512 rte_free(sw_ring); 513 rte_memzone_free(hdr_mz); 514 rte_memzone_free(mz); 515 rte_free(vq); 516 517 return ret; 518 } 519 520 static void 521 virtio_free_queues(struct virtio_hw *hw) 522 { 523 uint16_t nr_vq = virtio_get_nr_vq(hw); 524 struct virtqueue *vq; 525 int queue_type; 526 uint16_t i; 527 528 if (hw->vqs == NULL) 529 return; 530 531 for (i = 0; i < nr_vq; i++) { 532 vq = hw->vqs[i]; 533 if (!vq) 534 continue; 535 536 queue_type = virtio_get_queue_type(hw, i); 537 if (queue_type == VTNET_RQ) { 538 rte_free(vq->sw_ring); 539 rte_memzone_free(vq->rxq.mz); 540 } else if (queue_type == VTNET_TQ) { 541 rte_memzone_free(vq->txq.mz); 542 rte_memzone_free(vq->txq.virtio_net_hdr_mz); 543 } else { 544 rte_memzone_free(vq->cq.mz); 545 rte_memzone_free(vq->cq.virtio_net_hdr_mz); 546 } 547 548 rte_free(vq); 549 hw->vqs[i] = NULL; 550 } 551 552 rte_free(hw->vqs); 553 hw->vqs = NULL; 554 } 555 556 static int 557 virtio_alloc_queues(struct rte_eth_dev *dev) 558 { 559 struct virtio_hw *hw = dev->data->dev_private; 560 uint16_t nr_vq = virtio_get_nr_vq(hw); 561 uint16_t i; 562 int ret; 563 564 hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0); 565 if (!hw->vqs) { 566 PMD_INIT_LOG(ERR, "failed to allocate vqs"); 567 return -ENOMEM; 568 } 569 570 for (i = 0; i < nr_vq; i++) { 571 ret = virtio_init_queue(dev, i); 572 if (ret < 0) { 573 virtio_free_queues(hw); 574 return ret; 575 } 576 } 577 578 return 0; 579 } 580 581 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev); 582 583 static void 584 virtio_dev_close(struct rte_eth_dev *dev) 585 { 586 struct virtio_hw *hw = dev->data->dev_private; 587 struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; 588 589 PMD_INIT_LOG(DEBUG, "virtio_dev_close"); 590 591 /* reset the NIC */ 592 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 593 VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR); 594 if (intr_conf->rxq) 595 virtio_queues_unbind_intr(dev); 596 597 if (intr_conf->lsc || intr_conf->rxq) { 598 virtio_intr_disable(dev); 599 rte_intr_efd_disable(dev->intr_handle); 600 rte_free(dev->intr_handle->intr_vec); 601 dev->intr_handle->intr_vec = NULL; 602 } 603 604 vtpci_reset(hw); 605 virtio_dev_free_mbufs(dev); 606 virtio_free_queues(hw); 607 } 608 609 static void 610 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev) 611 { 612 struct virtio_hw *hw = dev->data->dev_private; 613 struct virtio_pmd_ctrl ctrl; 614 int dlen[1]; 615 int ret; 616 617 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 618 PMD_INIT_LOG(INFO, "host does not support rx control"); 619 return; 620 } 621 622 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 623 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; 624 ctrl.data[0] = 1; 625 dlen[0] = 1; 626 627 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 628 if (ret) 629 PMD_INIT_LOG(ERR, "Failed to enable promisc"); 630 } 631 632 static void 633 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev) 634 { 635 struct virtio_hw *hw = dev->data->dev_private; 636 struct virtio_pmd_ctrl ctrl; 637 int dlen[1]; 638 int ret; 639 640 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 641 PMD_INIT_LOG(INFO, "host does not support rx control"); 642 return; 643 } 644 645 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 646 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; 647 ctrl.data[0] = 0; 648 dlen[0] = 1; 649 650 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 651 if (ret) 652 PMD_INIT_LOG(ERR, "Failed to disable promisc"); 653 } 654 655 static void 656 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev) 657 { 658 struct virtio_hw *hw = dev->data->dev_private; 659 struct virtio_pmd_ctrl ctrl; 660 int dlen[1]; 661 int ret; 662 663 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 664 PMD_INIT_LOG(INFO, "host does not support rx control"); 665 return; 666 } 667 668 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 669 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; 670 ctrl.data[0] = 1; 671 dlen[0] = 1; 672 673 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 674 if (ret) 675 PMD_INIT_LOG(ERR, "Failed to enable allmulticast"); 676 } 677 678 static void 679 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) 680 { 681 struct virtio_hw *hw = dev->data->dev_private; 682 struct virtio_pmd_ctrl ctrl; 683 int dlen[1]; 684 int ret; 685 686 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 687 PMD_INIT_LOG(INFO, "host does not support rx control"); 688 return; 689 } 690 691 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 692 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; 693 ctrl.data[0] = 0; 694 dlen[0] = 1; 695 696 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 697 if (ret) 698 PMD_INIT_LOG(ERR, "Failed to disable allmulticast"); 699 } 700 701 #define VLAN_TAG_LEN 4 /* 802.3ac tag (not DMA'd) */ 702 static int 703 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 704 { 705 struct virtio_hw *hw = dev->data->dev_private; 706 uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN + 707 hw->vtnet_hdr_size; 708 uint32_t frame_size = mtu + ether_hdr_len; 709 uint32_t max_frame_size = hw->max_mtu + ether_hdr_len; 710 711 max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN); 712 713 if (mtu < ETHER_MIN_MTU || frame_size > max_frame_size) { 714 PMD_INIT_LOG(ERR, "MTU should be between %d and %d", 715 ETHER_MIN_MTU, max_frame_size - ether_hdr_len); 716 return -EINVAL; 717 } 718 return 0; 719 } 720 721 static int 722 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) 723 { 724 struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; 725 struct virtqueue *vq = rxvq->vq; 726 727 virtqueue_enable_intr(vq); 728 return 0; 729 } 730 731 static int 732 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) 733 { 734 struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; 735 struct virtqueue *vq = rxvq->vq; 736 737 virtqueue_disable_intr(vq); 738 return 0; 739 } 740 741 /* 742 * dev_ops for virtio, bare necessities for basic operation 743 */ 744 static const struct eth_dev_ops virtio_eth_dev_ops = { 745 .dev_configure = virtio_dev_configure, 746 .dev_start = virtio_dev_start, 747 .dev_stop = virtio_dev_stop, 748 .dev_close = virtio_dev_close, 749 .promiscuous_enable = virtio_dev_promiscuous_enable, 750 .promiscuous_disable = virtio_dev_promiscuous_disable, 751 .allmulticast_enable = virtio_dev_allmulticast_enable, 752 .allmulticast_disable = virtio_dev_allmulticast_disable, 753 .mtu_set = virtio_mtu_set, 754 .dev_infos_get = virtio_dev_info_get, 755 .stats_get = virtio_dev_stats_get, 756 .xstats_get = virtio_dev_xstats_get, 757 .xstats_get_names = virtio_dev_xstats_get_names, 758 .stats_reset = virtio_dev_stats_reset, 759 .xstats_reset = virtio_dev_stats_reset, 760 .link_update = virtio_dev_link_update, 761 .vlan_offload_set = virtio_dev_vlan_offload_set, 762 .rx_queue_setup = virtio_dev_rx_queue_setup, 763 .rx_queue_intr_enable = virtio_dev_rx_queue_intr_enable, 764 .rx_queue_intr_disable = virtio_dev_rx_queue_intr_disable, 765 .rx_queue_release = virtio_dev_queue_release, 766 .rx_descriptor_done = virtio_dev_rx_queue_done, 767 .tx_queue_setup = virtio_dev_tx_queue_setup, 768 .tx_queue_release = virtio_dev_queue_release, 769 /* collect stats per queue */ 770 .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set, 771 .vlan_filter_set = virtio_vlan_filter_set, 772 .mac_addr_add = virtio_mac_addr_add, 773 .mac_addr_remove = virtio_mac_addr_remove, 774 .mac_addr_set = virtio_mac_addr_set, 775 }; 776 777 static void 778 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 779 { 780 unsigned i; 781 782 for (i = 0; i < dev->data->nb_tx_queues; i++) { 783 const struct virtnet_tx *txvq = dev->data->tx_queues[i]; 784 if (txvq == NULL) 785 continue; 786 787 stats->opackets += txvq->stats.packets; 788 stats->obytes += txvq->stats.bytes; 789 stats->oerrors += txvq->stats.errors; 790 791 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 792 stats->q_opackets[i] = txvq->stats.packets; 793 stats->q_obytes[i] = txvq->stats.bytes; 794 } 795 } 796 797 for (i = 0; i < dev->data->nb_rx_queues; i++) { 798 const struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 799 if (rxvq == NULL) 800 continue; 801 802 stats->ipackets += rxvq->stats.packets; 803 stats->ibytes += rxvq->stats.bytes; 804 stats->ierrors += rxvq->stats.errors; 805 806 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 807 stats->q_ipackets[i] = rxvq->stats.packets; 808 stats->q_ibytes[i] = rxvq->stats.bytes; 809 } 810 } 811 812 stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; 813 } 814 815 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, 816 struct rte_eth_xstat_name *xstats_names, 817 __rte_unused unsigned limit) 818 { 819 unsigned i; 820 unsigned count = 0; 821 unsigned t; 822 823 unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + 824 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; 825 826 if (xstats_names != NULL) { 827 /* Note: limit checked in rte_eth_xstats_names() */ 828 829 for (i = 0; i < dev->data->nb_rx_queues; i++) { 830 struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 831 if (rxvq == NULL) 832 continue; 833 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { 834 snprintf(xstats_names[count].name, 835 sizeof(xstats_names[count].name), 836 "rx_q%u_%s", i, 837 rte_virtio_rxq_stat_strings[t].name); 838 count++; 839 } 840 } 841 842 for (i = 0; i < dev->data->nb_tx_queues; i++) { 843 struct virtnet_tx *txvq = dev->data->tx_queues[i]; 844 if (txvq == NULL) 845 continue; 846 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { 847 snprintf(xstats_names[count].name, 848 sizeof(xstats_names[count].name), 849 "tx_q%u_%s", i, 850 rte_virtio_txq_stat_strings[t].name); 851 count++; 852 } 853 } 854 return count; 855 } 856 return nstats; 857 } 858 859 static int 860 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 861 unsigned n) 862 { 863 unsigned i; 864 unsigned count = 0; 865 866 unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + 867 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; 868 869 if (n < nstats) 870 return nstats; 871 872 for (i = 0; i < dev->data->nb_rx_queues; i++) { 873 struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 874 875 if (rxvq == NULL) 876 continue; 877 878 unsigned t; 879 880 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { 881 xstats[count].value = *(uint64_t *)(((char *)rxvq) + 882 rte_virtio_rxq_stat_strings[t].offset); 883 xstats[count].id = count; 884 count++; 885 } 886 } 887 888 for (i = 0; i < dev->data->nb_tx_queues; i++) { 889 struct virtnet_tx *txvq = dev->data->tx_queues[i]; 890 891 if (txvq == NULL) 892 continue; 893 894 unsigned t; 895 896 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { 897 xstats[count].value = *(uint64_t *)(((char *)txvq) + 898 rte_virtio_txq_stat_strings[t].offset); 899 xstats[count].id = count; 900 count++; 901 } 902 } 903 904 return count; 905 } 906 907 static int 908 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 909 { 910 virtio_update_stats(dev, stats); 911 912 return 0; 913 } 914 915 static void 916 virtio_dev_stats_reset(struct rte_eth_dev *dev) 917 { 918 unsigned int i; 919 920 for (i = 0; i < dev->data->nb_tx_queues; i++) { 921 struct virtnet_tx *txvq = dev->data->tx_queues[i]; 922 if (txvq == NULL) 923 continue; 924 925 txvq->stats.packets = 0; 926 txvq->stats.bytes = 0; 927 txvq->stats.errors = 0; 928 txvq->stats.multicast = 0; 929 txvq->stats.broadcast = 0; 930 memset(txvq->stats.size_bins, 0, 931 sizeof(txvq->stats.size_bins[0]) * 8); 932 } 933 934 for (i = 0; i < dev->data->nb_rx_queues; i++) { 935 struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 936 if (rxvq == NULL) 937 continue; 938 939 rxvq->stats.packets = 0; 940 rxvq->stats.bytes = 0; 941 rxvq->stats.errors = 0; 942 rxvq->stats.multicast = 0; 943 rxvq->stats.broadcast = 0; 944 memset(rxvq->stats.size_bins, 0, 945 sizeof(rxvq->stats.size_bins[0]) * 8); 946 } 947 } 948 949 static void 950 virtio_set_hwaddr(struct virtio_hw *hw) 951 { 952 vtpci_write_dev_config(hw, 953 offsetof(struct virtio_net_config, mac), 954 &hw->mac_addr, ETHER_ADDR_LEN); 955 } 956 957 static void 958 virtio_get_hwaddr(struct virtio_hw *hw) 959 { 960 if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) { 961 vtpci_read_dev_config(hw, 962 offsetof(struct virtio_net_config, mac), 963 &hw->mac_addr, ETHER_ADDR_LEN); 964 } else { 965 eth_random_addr(&hw->mac_addr[0]); 966 virtio_set_hwaddr(hw); 967 } 968 } 969 970 static int 971 virtio_mac_table_set(struct virtio_hw *hw, 972 const struct virtio_net_ctrl_mac *uc, 973 const struct virtio_net_ctrl_mac *mc) 974 { 975 struct virtio_pmd_ctrl ctrl; 976 int err, len[2]; 977 978 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 979 PMD_DRV_LOG(INFO, "host does not support mac table"); 980 return -1; 981 } 982 983 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; 984 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 985 986 len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries); 987 memcpy(ctrl.data, uc, len[0]); 988 989 len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries); 990 memcpy(ctrl.data + len[0], mc, len[1]); 991 992 err = virtio_send_command(hw->cvq, &ctrl, len, 2); 993 if (err != 0) 994 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err); 995 return err; 996 } 997 998 static int 999 virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, 1000 uint32_t index, uint32_t vmdq __rte_unused) 1001 { 1002 struct virtio_hw *hw = dev->data->dev_private; 1003 const struct ether_addr *addrs = dev->data->mac_addrs; 1004 unsigned int i; 1005 struct virtio_net_ctrl_mac *uc, *mc; 1006 1007 if (index >= VIRTIO_MAX_MAC_ADDRS) { 1008 PMD_DRV_LOG(ERR, "mac address index %u out of range", index); 1009 return -EINVAL; 1010 } 1011 1012 uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); 1013 uc->entries = 0; 1014 mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); 1015 mc->entries = 0; 1016 1017 for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { 1018 const struct ether_addr *addr 1019 = (i == index) ? mac_addr : addrs + i; 1020 struct virtio_net_ctrl_mac *tbl 1021 = is_multicast_ether_addr(addr) ? mc : uc; 1022 1023 memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN); 1024 } 1025 1026 return virtio_mac_table_set(hw, uc, mc); 1027 } 1028 1029 static void 1030 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) 1031 { 1032 struct virtio_hw *hw = dev->data->dev_private; 1033 struct ether_addr *addrs = dev->data->mac_addrs; 1034 struct virtio_net_ctrl_mac *uc, *mc; 1035 unsigned int i; 1036 1037 if (index >= VIRTIO_MAX_MAC_ADDRS) { 1038 PMD_DRV_LOG(ERR, "mac address index %u out of range", index); 1039 return; 1040 } 1041 1042 uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); 1043 uc->entries = 0; 1044 mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); 1045 mc->entries = 0; 1046 1047 for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { 1048 struct virtio_net_ctrl_mac *tbl; 1049 1050 if (i == index || is_zero_ether_addr(addrs + i)) 1051 continue; 1052 1053 tbl = is_multicast_ether_addr(addrs + i) ? mc : uc; 1054 memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN); 1055 } 1056 1057 virtio_mac_table_set(hw, uc, mc); 1058 } 1059 1060 static int 1061 virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) 1062 { 1063 struct virtio_hw *hw = dev->data->dev_private; 1064 1065 memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN); 1066 1067 /* Use atomic update if available */ 1068 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 1069 struct virtio_pmd_ctrl ctrl; 1070 int len = ETHER_ADDR_LEN; 1071 1072 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; 1073 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 1074 1075 memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN); 1076 return virtio_send_command(hw->cvq, &ctrl, &len, 1); 1077 } 1078 1079 if (!vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) 1080 return -ENOTSUP; 1081 1082 virtio_set_hwaddr(hw); 1083 return 0; 1084 } 1085 1086 static int 1087 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) 1088 { 1089 struct virtio_hw *hw = dev->data->dev_private; 1090 struct virtio_pmd_ctrl ctrl; 1091 int len; 1092 1093 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) 1094 return -ENOTSUP; 1095 1096 ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN; 1097 ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 1098 memcpy(ctrl.data, &vlan_id, sizeof(vlan_id)); 1099 len = sizeof(vlan_id); 1100 1101 return virtio_send_command(hw->cvq, &ctrl, &len, 1); 1102 } 1103 1104 static int 1105 virtio_intr_enable(struct rte_eth_dev *dev) 1106 { 1107 struct virtio_hw *hw = dev->data->dev_private; 1108 1109 if (rte_intr_enable(dev->intr_handle) < 0) 1110 return -1; 1111 1112 if (!hw->virtio_user_dev) 1113 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); 1114 1115 return 0; 1116 } 1117 1118 static int 1119 virtio_intr_disable(struct rte_eth_dev *dev) 1120 { 1121 struct virtio_hw *hw = dev->data->dev_private; 1122 1123 if (rte_intr_disable(dev->intr_handle) < 0) 1124 return -1; 1125 1126 if (!hw->virtio_user_dev) 1127 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); 1128 1129 return 0; 1130 } 1131 1132 static int 1133 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features) 1134 { 1135 uint64_t host_features; 1136 1137 /* Prepare guest_features: feature that driver wants to support */ 1138 PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64, 1139 req_features); 1140 1141 /* Read device(host) feature bits */ 1142 host_features = VTPCI_OPS(hw)->get_features(hw); 1143 PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64, 1144 host_features); 1145 1146 /* If supported, ensure MTU value is valid before acknowledging it. */ 1147 if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) { 1148 struct virtio_net_config config; 1149 1150 vtpci_read_dev_config(hw, 1151 offsetof(struct virtio_net_config, mtu), 1152 &config.mtu, sizeof(config.mtu)); 1153 1154 if (config.mtu < ETHER_MIN_MTU) 1155 req_features &= ~(1ULL << VIRTIO_NET_F_MTU); 1156 } 1157 1158 /* 1159 * Negotiate features: Subset of device feature bits are written back 1160 * guest feature bits. 1161 */ 1162 hw->guest_features = req_features; 1163 hw->guest_features = vtpci_negotiate_features(hw, host_features); 1164 PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64, 1165 hw->guest_features); 1166 1167 if (hw->modern) { 1168 if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) { 1169 PMD_INIT_LOG(ERR, 1170 "VIRTIO_F_VERSION_1 features is not enabled."); 1171 return -1; 1172 } 1173 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK); 1174 if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) { 1175 PMD_INIT_LOG(ERR, 1176 "failed to set FEATURES_OK status!"); 1177 return -1; 1178 } 1179 } 1180 1181 hw->req_guest_features = req_features; 1182 1183 return 0; 1184 } 1185 1186 int 1187 virtio_dev_pause(struct rte_eth_dev *dev) 1188 { 1189 struct virtio_hw *hw = dev->data->dev_private; 1190 1191 rte_spinlock_lock(&hw->state_lock); 1192 1193 if (hw->started == 0) { 1194 /* Device is just stopped. */ 1195 rte_spinlock_unlock(&hw->state_lock); 1196 return -1; 1197 } 1198 hw->started = 0; 1199 /* 1200 * Prevent the worker threads from touching queues to avoid contention, 1201 * 1 ms should be enough for the ongoing Tx function to finish. 1202 */ 1203 rte_delay_ms(1); 1204 return 0; 1205 } 1206 1207 /* 1208 * Recover hw state to let the worker threads continue. 1209 */ 1210 void 1211 virtio_dev_resume(struct rte_eth_dev *dev) 1212 { 1213 struct virtio_hw *hw = dev->data->dev_private; 1214 1215 hw->started = 1; 1216 rte_spinlock_unlock(&hw->state_lock); 1217 } 1218 1219 /* 1220 * Should be called only after device is paused. 1221 */ 1222 int 1223 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts, 1224 int nb_pkts) 1225 { 1226 struct virtio_hw *hw = dev->data->dev_private; 1227 struct virtnet_tx *txvq = dev->data->tx_queues[0]; 1228 int ret; 1229 1230 hw->inject_pkts = tx_pkts; 1231 ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts); 1232 hw->inject_pkts = NULL; 1233 1234 return ret; 1235 } 1236 1237 static void 1238 virtio_notify_peers(struct rte_eth_dev *dev) 1239 { 1240 struct virtio_hw *hw = dev->data->dev_private; 1241 struct virtnet_rx *rxvq; 1242 struct rte_mbuf *rarp_mbuf; 1243 1244 if (!dev->data->rx_queues) 1245 return; 1246 1247 rxvq = dev->data->rx_queues[0]; 1248 if (!rxvq) 1249 return; 1250 1251 rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool, 1252 (struct ether_addr *)hw->mac_addr); 1253 if (rarp_mbuf == NULL) { 1254 PMD_DRV_LOG(ERR, "failed to make RARP packet."); 1255 return; 1256 } 1257 1258 /* If virtio port just stopped, no need to send RARP */ 1259 if (virtio_dev_pause(dev) < 0) { 1260 rte_pktmbuf_free(rarp_mbuf); 1261 return; 1262 } 1263 1264 virtio_inject_pkts(dev, &rarp_mbuf, 1); 1265 virtio_dev_resume(dev); 1266 } 1267 1268 static void 1269 virtio_ack_link_announce(struct rte_eth_dev *dev) 1270 { 1271 struct virtio_hw *hw = dev->data->dev_private; 1272 struct virtio_pmd_ctrl ctrl; 1273 1274 ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE; 1275 ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK; 1276 1277 virtio_send_command(hw->cvq, &ctrl, NULL, 0); 1278 } 1279 1280 /* 1281 * Process virtio config changed interrupt. Call the callback 1282 * if link state changed, generate gratuitous RARP packet if 1283 * the status indicates an ANNOUNCE. 1284 */ 1285 void 1286 virtio_interrupt_handler(void *param) 1287 { 1288 struct rte_eth_dev *dev = param; 1289 struct virtio_hw *hw = dev->data->dev_private; 1290 uint8_t isr; 1291 1292 /* Read interrupt status which clears interrupt */ 1293 isr = vtpci_isr(hw); 1294 PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); 1295 1296 if (virtio_intr_enable(dev) < 0) 1297 PMD_DRV_LOG(ERR, "interrupt enable failed"); 1298 1299 if (isr & VIRTIO_PCI_ISR_CONFIG) { 1300 if (virtio_dev_link_update(dev, 0) == 0) 1301 _rte_eth_dev_callback_process(dev, 1302 RTE_ETH_EVENT_INTR_LSC, 1303 NULL); 1304 } 1305 1306 if (isr & VIRTIO_NET_S_ANNOUNCE) { 1307 virtio_notify_peers(dev); 1308 if (hw->cvq) 1309 virtio_ack_link_announce(dev); 1310 } 1311 } 1312 1313 /* set rx and tx handlers according to what is supported */ 1314 static void 1315 set_rxtx_funcs(struct rte_eth_dev *eth_dev) 1316 { 1317 struct virtio_hw *hw = eth_dev->data->dev_private; 1318 1319 if (hw->use_simple_rx) { 1320 PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u", 1321 eth_dev->data->port_id); 1322 eth_dev->rx_pkt_burst = virtio_recv_pkts_vec; 1323 } else if (hw->use_inorder_rx) { 1324 PMD_INIT_LOG(INFO, 1325 "virtio: using inorder mergeable buffer Rx path on port %u", 1326 eth_dev->data->port_id); 1327 eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts_inorder; 1328 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 1329 PMD_INIT_LOG(INFO, 1330 "virtio: using mergeable buffer Rx path on port %u", 1331 eth_dev->data->port_id); 1332 eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts; 1333 } else { 1334 PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u", 1335 eth_dev->data->port_id); 1336 eth_dev->rx_pkt_burst = &virtio_recv_pkts; 1337 } 1338 1339 if (hw->use_inorder_tx) { 1340 PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u", 1341 eth_dev->data->port_id); 1342 eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder; 1343 } else { 1344 PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u", 1345 eth_dev->data->port_id); 1346 eth_dev->tx_pkt_burst = virtio_xmit_pkts; 1347 } 1348 } 1349 1350 /* Only support 1:1 queue/interrupt mapping so far. 1351 * TODO: support n:1 queue/interrupt mapping when there are limited number of 1352 * interrupt vectors (<N+1). 1353 */ 1354 static int 1355 virtio_queues_bind_intr(struct rte_eth_dev *dev) 1356 { 1357 uint32_t i; 1358 struct virtio_hw *hw = dev->data->dev_private; 1359 1360 PMD_INIT_LOG(INFO, "queue/interrupt binding"); 1361 for (i = 0; i < dev->data->nb_rx_queues; ++i) { 1362 dev->intr_handle->intr_vec[i] = i + 1; 1363 if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) == 1364 VIRTIO_MSI_NO_VECTOR) { 1365 PMD_DRV_LOG(ERR, "failed to set queue vector"); 1366 return -EBUSY; 1367 } 1368 } 1369 1370 return 0; 1371 } 1372 1373 static void 1374 virtio_queues_unbind_intr(struct rte_eth_dev *dev) 1375 { 1376 uint32_t i; 1377 struct virtio_hw *hw = dev->data->dev_private; 1378 1379 PMD_INIT_LOG(INFO, "queue/interrupt unbinding"); 1380 for (i = 0; i < dev->data->nb_rx_queues; ++i) 1381 VTPCI_OPS(hw)->set_queue_irq(hw, 1382 hw->vqs[i * VTNET_CQ], 1383 VIRTIO_MSI_NO_VECTOR); 1384 } 1385 1386 static int 1387 virtio_configure_intr(struct rte_eth_dev *dev) 1388 { 1389 struct virtio_hw *hw = dev->data->dev_private; 1390 1391 if (!rte_intr_cap_multiple(dev->intr_handle)) { 1392 PMD_INIT_LOG(ERR, "Multiple intr vector not supported"); 1393 return -ENOTSUP; 1394 } 1395 1396 if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) { 1397 PMD_INIT_LOG(ERR, "Fail to create eventfd"); 1398 return -1; 1399 } 1400 1401 if (!dev->intr_handle->intr_vec) { 1402 dev->intr_handle->intr_vec = 1403 rte_zmalloc("intr_vec", 1404 hw->max_queue_pairs * sizeof(int), 0); 1405 if (!dev->intr_handle->intr_vec) { 1406 PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors", 1407 hw->max_queue_pairs); 1408 return -ENOMEM; 1409 } 1410 } 1411 1412 /* Re-register callback to update max_intr */ 1413 rte_intr_callback_unregister(dev->intr_handle, 1414 virtio_interrupt_handler, 1415 dev); 1416 rte_intr_callback_register(dev->intr_handle, 1417 virtio_interrupt_handler, 1418 dev); 1419 1420 /* DO NOT try to remove this! This function will enable msix, or QEMU 1421 * will encounter SIGSEGV when DRIVER_OK is sent. 1422 * And for legacy devices, this should be done before queue/vec binding 1423 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR 1424 * (22) will be ignored. 1425 */ 1426 if (virtio_intr_enable(dev) < 0) { 1427 PMD_DRV_LOG(ERR, "interrupt enable failed"); 1428 return -1; 1429 } 1430 1431 if (virtio_queues_bind_intr(dev) < 0) { 1432 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt"); 1433 return -1; 1434 } 1435 1436 return 0; 1437 } 1438 1439 /* reset device and renegotiate features if needed */ 1440 static int 1441 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) 1442 { 1443 struct virtio_hw *hw = eth_dev->data->dev_private; 1444 struct virtio_net_config *config; 1445 struct virtio_net_config local_config; 1446 struct rte_pci_device *pci_dev = NULL; 1447 int ret; 1448 1449 /* Reset the device although not necessary at startup */ 1450 vtpci_reset(hw); 1451 1452 if (hw->vqs) { 1453 virtio_dev_free_mbufs(eth_dev); 1454 virtio_free_queues(hw); 1455 } 1456 1457 /* Tell the host we've noticed this device. */ 1458 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK); 1459 1460 /* Tell the host we've known how to drive the device. */ 1461 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); 1462 if (virtio_negotiate_features(hw, req_features) < 0) 1463 return -1; 1464 1465 if (!hw->virtio_user_dev) { 1466 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 1467 rte_eth_copy_pci_info(eth_dev, pci_dev); 1468 } 1469 1470 /* If host does not support both status and MSI-X then disable LSC */ 1471 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) && 1472 hw->use_msix != VIRTIO_MSIX_NONE) 1473 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; 1474 else 1475 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 1476 1477 /* Setting up rx_header size for the device */ 1478 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) || 1479 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) 1480 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1481 else 1482 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 1483 1484 /* Copy the permanent MAC address to: virtio_hw */ 1485 virtio_get_hwaddr(hw); 1486 ether_addr_copy((struct ether_addr *) hw->mac_addr, 1487 ð_dev->data->mac_addrs[0]); 1488 PMD_INIT_LOG(DEBUG, 1489 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", 1490 hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], 1491 hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); 1492 1493 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) { 1494 config = &local_config; 1495 1496 vtpci_read_dev_config(hw, 1497 offsetof(struct virtio_net_config, mac), 1498 &config->mac, sizeof(config->mac)); 1499 1500 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { 1501 vtpci_read_dev_config(hw, 1502 offsetof(struct virtio_net_config, status), 1503 &config->status, sizeof(config->status)); 1504 } else { 1505 PMD_INIT_LOG(DEBUG, 1506 "VIRTIO_NET_F_STATUS is not supported"); 1507 config->status = 0; 1508 } 1509 1510 if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) { 1511 vtpci_read_dev_config(hw, 1512 offsetof(struct virtio_net_config, max_virtqueue_pairs), 1513 &config->max_virtqueue_pairs, 1514 sizeof(config->max_virtqueue_pairs)); 1515 } else { 1516 PMD_INIT_LOG(DEBUG, 1517 "VIRTIO_NET_F_MQ is not supported"); 1518 config->max_virtqueue_pairs = 1; 1519 } 1520 1521 hw->max_queue_pairs = config->max_virtqueue_pairs; 1522 1523 if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) { 1524 vtpci_read_dev_config(hw, 1525 offsetof(struct virtio_net_config, mtu), 1526 &config->mtu, 1527 sizeof(config->mtu)); 1528 1529 /* 1530 * MTU value has already been checked at negotiation 1531 * time, but check again in case it has changed since 1532 * then, which should not happen. 1533 */ 1534 if (config->mtu < ETHER_MIN_MTU) { 1535 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)", 1536 config->mtu); 1537 return -1; 1538 } 1539 1540 hw->max_mtu = config->mtu; 1541 /* Set initial MTU to maximum one supported by vhost */ 1542 eth_dev->data->mtu = config->mtu; 1543 1544 } else { 1545 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN - 1546 VLAN_TAG_LEN - hw->vtnet_hdr_size; 1547 } 1548 1549 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d", 1550 config->max_virtqueue_pairs); 1551 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status); 1552 PMD_INIT_LOG(DEBUG, 1553 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", 1554 config->mac[0], config->mac[1], 1555 config->mac[2], config->mac[3], 1556 config->mac[4], config->mac[5]); 1557 } else { 1558 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1"); 1559 hw->max_queue_pairs = 1; 1560 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN - 1561 VLAN_TAG_LEN - hw->vtnet_hdr_size; 1562 } 1563 1564 ret = virtio_alloc_queues(eth_dev); 1565 if (ret < 0) 1566 return ret; 1567 1568 if (eth_dev->data->dev_conf.intr_conf.rxq) { 1569 if (virtio_configure_intr(eth_dev) < 0) { 1570 PMD_INIT_LOG(ERR, "failed to configure interrupt"); 1571 return -1; 1572 } 1573 } 1574 1575 vtpci_reinit_complete(hw); 1576 1577 if (pci_dev) 1578 PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", 1579 eth_dev->data->port_id, pci_dev->id.vendor_id, 1580 pci_dev->id.device_id); 1581 1582 return 0; 1583 } 1584 1585 /* 1586 * Remap the PCI device again (IO port map for legacy device and 1587 * memory map for modern device), so that the secondary process 1588 * could have the PCI initiated correctly. 1589 */ 1590 static int 1591 virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw) 1592 { 1593 if (hw->modern) { 1594 /* 1595 * We don't have to re-parse the PCI config space, since 1596 * rte_pci_map_device() makes sure the mapped address 1597 * in secondary process would equal to the one mapped in 1598 * the primary process: error will be returned if that 1599 * requirement is not met. 1600 * 1601 * That said, we could simply reuse all cap pointers 1602 * (such as dev_cfg, common_cfg, etc.) parsed from the 1603 * primary process, which is stored in shared memory. 1604 */ 1605 if (rte_pci_map_device(pci_dev)) { 1606 PMD_INIT_LOG(DEBUG, "failed to map pci device!"); 1607 return -1; 1608 } 1609 } else { 1610 if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) 1611 return -1; 1612 } 1613 1614 return 0; 1615 } 1616 1617 static void 1618 virtio_set_vtpci_ops(struct virtio_hw *hw) 1619 { 1620 #ifdef RTE_VIRTIO_USER 1621 if (hw->virtio_user_dev) 1622 VTPCI_OPS(hw) = &virtio_user_ops; 1623 else 1624 #endif 1625 if (hw->modern) 1626 VTPCI_OPS(hw) = &modern_ops; 1627 else 1628 VTPCI_OPS(hw) = &legacy_ops; 1629 } 1630 1631 /* 1632 * This function is based on probe() function in virtio_pci.c 1633 * It returns 0 on success. 1634 */ 1635 int 1636 eth_virtio_dev_init(struct rte_eth_dev *eth_dev) 1637 { 1638 struct virtio_hw *hw = eth_dev->data->dev_private; 1639 int ret; 1640 1641 RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1642 1643 eth_dev->dev_ops = &virtio_eth_dev_ops; 1644 1645 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1646 if (!hw->virtio_user_dev) { 1647 ret = virtio_remap_pci(RTE_ETH_DEV_TO_PCI(eth_dev), hw); 1648 if (ret) 1649 return ret; 1650 } 1651 1652 virtio_set_vtpci_ops(hw); 1653 set_rxtx_funcs(eth_dev); 1654 1655 return 0; 1656 } 1657 1658 /* Allocate memory for storing MAC addresses */ 1659 eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); 1660 if (eth_dev->data->mac_addrs == NULL) { 1661 PMD_INIT_LOG(ERR, 1662 "Failed to allocate %d bytes needed to store MAC addresses", 1663 VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); 1664 return -ENOMEM; 1665 } 1666 1667 hw->port_id = eth_dev->data->port_id; 1668 /* For virtio_user case the hw->virtio_user_dev is populated by 1669 * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called. 1670 */ 1671 if (!hw->virtio_user_dev) { 1672 ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw); 1673 if (ret) 1674 goto out; 1675 } 1676 1677 /* reset device and negotiate default features */ 1678 ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES); 1679 if (ret < 0) 1680 goto out; 1681 1682 /* Setup interrupt callback */ 1683 if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1684 rte_intr_callback_register(eth_dev->intr_handle, 1685 virtio_interrupt_handler, eth_dev); 1686 1687 return 0; 1688 1689 out: 1690 rte_free(eth_dev->data->mac_addrs); 1691 return ret; 1692 } 1693 1694 static int 1695 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) 1696 { 1697 PMD_INIT_FUNC_TRACE(); 1698 1699 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 1700 return -EPERM; 1701 1702 virtio_dev_stop(eth_dev); 1703 virtio_dev_close(eth_dev); 1704 1705 eth_dev->dev_ops = NULL; 1706 eth_dev->tx_pkt_burst = NULL; 1707 eth_dev->rx_pkt_burst = NULL; 1708 1709 rte_free(eth_dev->data->mac_addrs); 1710 eth_dev->data->mac_addrs = NULL; 1711 1712 /* reset interrupt callback */ 1713 if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1714 rte_intr_callback_unregister(eth_dev->intr_handle, 1715 virtio_interrupt_handler, 1716 eth_dev); 1717 if (eth_dev->device) 1718 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev)); 1719 1720 PMD_INIT_LOG(DEBUG, "dev_uninit completed"); 1721 1722 return 0; 1723 } 1724 1725 static int vdpa_check_handler(__rte_unused const char *key, 1726 const char *value, __rte_unused void *opaque) 1727 { 1728 if (strcmp(value, "1")) 1729 return -1; 1730 1731 return 0; 1732 } 1733 1734 static int 1735 vdpa_mode_selected(struct rte_devargs *devargs) 1736 { 1737 struct rte_kvargs *kvlist; 1738 const char *key = "vdpa"; 1739 int ret = 0; 1740 1741 if (devargs == NULL) 1742 return 0; 1743 1744 kvlist = rte_kvargs_parse(devargs->args, NULL); 1745 if (kvlist == NULL) 1746 return 0; 1747 1748 if (!rte_kvargs_count(kvlist, key)) 1749 goto exit; 1750 1751 /* vdpa mode selected when there's a key-value pair: vdpa=1 */ 1752 if (rte_kvargs_process(kvlist, key, 1753 vdpa_check_handler, NULL) < 0) { 1754 goto exit; 1755 } 1756 ret = 1; 1757 1758 exit: 1759 rte_kvargs_free(kvlist); 1760 return ret; 1761 } 1762 1763 static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1764 struct rte_pci_device *pci_dev) 1765 { 1766 /* virtio pmd skips probe if device needs to work in vdpa mode */ 1767 if (vdpa_mode_selected(pci_dev->device.devargs)) 1768 return 1; 1769 1770 return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw), 1771 eth_virtio_dev_init); 1772 } 1773 1774 static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev) 1775 { 1776 return rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit); 1777 } 1778 1779 static struct rte_pci_driver rte_virtio_pmd = { 1780 .driver = { 1781 .name = "net_virtio", 1782 }, 1783 .id_table = pci_id_virtio_map, 1784 .drv_flags = 0, 1785 .probe = eth_virtio_pci_probe, 1786 .remove = eth_virtio_pci_remove, 1787 }; 1788 1789 RTE_INIT(rte_virtio_pmd_init) 1790 { 1791 if (rte_eal_iopl_init() != 0) { 1792 PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD"); 1793 return; 1794 } 1795 1796 rte_pci_register(&rte_virtio_pmd); 1797 } 1798 1799 static bool 1800 rx_offload_enabled(struct virtio_hw *hw) 1801 { 1802 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 1803 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 1804 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 1805 } 1806 1807 static bool 1808 tx_offload_enabled(struct virtio_hw *hw) 1809 { 1810 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 1811 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 1812 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 1813 } 1814 1815 /* 1816 * Configure virtio device 1817 * It returns 0 on success. 1818 */ 1819 static int 1820 virtio_dev_configure(struct rte_eth_dev *dev) 1821 { 1822 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 1823 const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode; 1824 struct virtio_hw *hw = dev->data->dev_private; 1825 uint64_t rx_offloads = rxmode->offloads; 1826 uint64_t tx_offloads = txmode->offloads; 1827 uint64_t req_features; 1828 int ret; 1829 1830 PMD_INIT_LOG(DEBUG, "configure"); 1831 req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES; 1832 1833 if (dev->data->dev_conf.intr_conf.rxq) { 1834 ret = virtio_init_device(dev, hw->req_guest_features); 1835 if (ret < 0) 1836 return ret; 1837 } 1838 1839 if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM | 1840 DEV_RX_OFFLOAD_TCP_CKSUM)) 1841 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM); 1842 1843 if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) 1844 req_features |= 1845 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 1846 (1ULL << VIRTIO_NET_F_GUEST_TSO6); 1847 1848 if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM | 1849 DEV_TX_OFFLOAD_TCP_CKSUM)) 1850 req_features |= (1ULL << VIRTIO_NET_F_CSUM); 1851 1852 if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO) 1853 req_features |= 1854 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 1855 (1ULL << VIRTIO_NET_F_HOST_TSO6); 1856 1857 /* if request features changed, reinit the device */ 1858 if (req_features != hw->req_guest_features) { 1859 ret = virtio_init_device(dev, req_features); 1860 if (ret < 0) 1861 return ret; 1862 } 1863 1864 if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM | 1865 DEV_RX_OFFLOAD_TCP_CKSUM)) && 1866 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) { 1867 PMD_DRV_LOG(ERR, 1868 "rx checksum not available on this host"); 1869 return -ENOTSUP; 1870 } 1871 1872 if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) && 1873 (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 1874 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) { 1875 PMD_DRV_LOG(ERR, 1876 "Large Receive Offload not available on this host"); 1877 return -ENOTSUP; 1878 } 1879 1880 /* start control queue */ 1881 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) 1882 virtio_dev_cq_start(dev); 1883 1884 if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) 1885 hw->vlan_strip = 1; 1886 1887 if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) 1888 && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) { 1889 PMD_DRV_LOG(ERR, 1890 "vlan filtering not available on this host"); 1891 return -ENOTSUP; 1892 } 1893 1894 hw->has_tx_offload = tx_offload_enabled(hw); 1895 hw->has_rx_offload = rx_offload_enabled(hw); 1896 1897 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1898 /* Enable vector (0) for Link State Intrerrupt */ 1899 if (VTPCI_OPS(hw)->set_config_irq(hw, 0) == 1900 VIRTIO_MSI_NO_VECTOR) { 1901 PMD_DRV_LOG(ERR, "failed to set config vector"); 1902 return -EBUSY; 1903 } 1904 1905 rte_spinlock_init(&hw->state_lock); 1906 1907 hw->use_simple_rx = 1; 1908 1909 if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) { 1910 hw->use_inorder_tx = 1; 1911 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 1912 hw->use_inorder_rx = 1; 1913 hw->use_simple_rx = 0; 1914 } else { 1915 hw->use_inorder_rx = 0; 1916 } 1917 } 1918 1919 #if defined RTE_ARCH_ARM64 || defined RTE_ARCH_ARM 1920 if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) { 1921 hw->use_simple_rx = 0; 1922 } 1923 #endif 1924 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 1925 hw->use_simple_rx = 0; 1926 } 1927 1928 if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM | 1929 DEV_RX_OFFLOAD_TCP_CKSUM | 1930 DEV_RX_OFFLOAD_TCP_LRO | 1931 DEV_RX_OFFLOAD_VLAN_STRIP)) 1932 hw->use_simple_rx = 0; 1933 1934 return 0; 1935 } 1936 1937 1938 static int 1939 virtio_dev_start(struct rte_eth_dev *dev) 1940 { 1941 uint16_t nb_queues, i; 1942 struct virtnet_rx *rxvq; 1943 struct virtnet_tx *txvq __rte_unused; 1944 struct virtio_hw *hw = dev->data->dev_private; 1945 int ret; 1946 1947 /* Finish the initialization of the queues */ 1948 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1949 ret = virtio_dev_rx_queue_setup_finish(dev, i); 1950 if (ret < 0) 1951 return ret; 1952 } 1953 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1954 ret = virtio_dev_tx_queue_setup_finish(dev, i); 1955 if (ret < 0) 1956 return ret; 1957 } 1958 1959 /* check if lsc interrupt feature is enabled */ 1960 if (dev->data->dev_conf.intr_conf.lsc) { 1961 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { 1962 PMD_DRV_LOG(ERR, "link status not supported by host"); 1963 return -ENOTSUP; 1964 } 1965 } 1966 1967 /* Enable uio/vfio intr/eventfd mapping: althrough we already did that 1968 * in device configure, but it could be unmapped when device is 1969 * stopped. 1970 */ 1971 if (dev->data->dev_conf.intr_conf.lsc || 1972 dev->data->dev_conf.intr_conf.rxq) { 1973 virtio_intr_disable(dev); 1974 1975 if (virtio_intr_enable(dev) < 0) { 1976 PMD_DRV_LOG(ERR, "interrupt enable failed"); 1977 return -EIO; 1978 } 1979 } 1980 1981 /*Notify the backend 1982 *Otherwise the tap backend might already stop its queue due to fullness. 1983 *vhost backend will have no chance to be waked up 1984 */ 1985 nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues); 1986 if (hw->max_queue_pairs > 1) { 1987 if (virtio_set_multiple_queues(dev, nb_queues) != 0) 1988 return -EINVAL; 1989 } 1990 1991 PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues); 1992 1993 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1994 rxvq = dev->data->rx_queues[i]; 1995 /* Flush the old packets */ 1996 virtqueue_rxvq_flush(rxvq->vq); 1997 virtqueue_notify(rxvq->vq); 1998 } 1999 2000 for (i = 0; i < dev->data->nb_tx_queues; i++) { 2001 txvq = dev->data->tx_queues[i]; 2002 virtqueue_notify(txvq->vq); 2003 } 2004 2005 PMD_INIT_LOG(DEBUG, "Notified backend at initialization"); 2006 2007 for (i = 0; i < dev->data->nb_rx_queues; i++) { 2008 rxvq = dev->data->rx_queues[i]; 2009 VIRTQUEUE_DUMP(rxvq->vq); 2010 } 2011 2012 for (i = 0; i < dev->data->nb_tx_queues; i++) { 2013 txvq = dev->data->tx_queues[i]; 2014 VIRTQUEUE_DUMP(txvq->vq); 2015 } 2016 2017 set_rxtx_funcs(dev); 2018 hw->started = 1; 2019 2020 /* Initialize Link state */ 2021 virtio_dev_link_update(dev, 0); 2022 2023 return 0; 2024 } 2025 2026 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) 2027 { 2028 struct virtio_hw *hw = dev->data->dev_private; 2029 uint16_t nr_vq = virtio_get_nr_vq(hw); 2030 const char *type __rte_unused; 2031 unsigned int i, mbuf_num = 0; 2032 struct virtqueue *vq; 2033 struct rte_mbuf *buf; 2034 int queue_type; 2035 2036 if (hw->vqs == NULL) 2037 return; 2038 2039 for (i = 0; i < nr_vq; i++) { 2040 vq = hw->vqs[i]; 2041 if (!vq) 2042 continue; 2043 2044 queue_type = virtio_get_queue_type(hw, i); 2045 if (queue_type == VTNET_RQ) 2046 type = "rxq"; 2047 else if (queue_type == VTNET_TQ) 2048 type = "txq"; 2049 else 2050 continue; 2051 2052 PMD_INIT_LOG(DEBUG, 2053 "Before freeing %s[%d] used and unused buf", 2054 type, i); 2055 VIRTQUEUE_DUMP(vq); 2056 2057 while ((buf = virtqueue_detach_unused(vq)) != NULL) { 2058 rte_pktmbuf_free(buf); 2059 mbuf_num++; 2060 } 2061 2062 PMD_INIT_LOG(DEBUG, 2063 "After freeing %s[%d] used and unused buf", 2064 type, i); 2065 VIRTQUEUE_DUMP(vq); 2066 } 2067 2068 PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num); 2069 } 2070 2071 /* 2072 * Stop device: disable interrupt and mark link down 2073 */ 2074 static void 2075 virtio_dev_stop(struct rte_eth_dev *dev) 2076 { 2077 struct virtio_hw *hw = dev->data->dev_private; 2078 struct rte_eth_link link; 2079 struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; 2080 2081 PMD_INIT_LOG(DEBUG, "stop"); 2082 2083 rte_spinlock_lock(&hw->state_lock); 2084 if (intr_conf->lsc || intr_conf->rxq) 2085 virtio_intr_disable(dev); 2086 2087 hw->started = 0; 2088 memset(&link, 0, sizeof(link)); 2089 rte_eth_linkstatus_set(dev, &link); 2090 rte_spinlock_unlock(&hw->state_lock); 2091 } 2092 2093 static int 2094 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) 2095 { 2096 struct rte_eth_link link; 2097 uint16_t status; 2098 struct virtio_hw *hw = dev->data->dev_private; 2099 2100 memset(&link, 0, sizeof(link)); 2101 link.link_duplex = ETH_LINK_FULL_DUPLEX; 2102 link.link_speed = ETH_SPEED_NUM_10G; 2103 link.link_autoneg = ETH_LINK_FIXED; 2104 2105 if (hw->started == 0) { 2106 link.link_status = ETH_LINK_DOWN; 2107 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { 2108 PMD_INIT_LOG(DEBUG, "Get link status from hw"); 2109 vtpci_read_dev_config(hw, 2110 offsetof(struct virtio_net_config, status), 2111 &status, sizeof(status)); 2112 if ((status & VIRTIO_NET_S_LINK_UP) == 0) { 2113 link.link_status = ETH_LINK_DOWN; 2114 PMD_INIT_LOG(DEBUG, "Port %d is down", 2115 dev->data->port_id); 2116 } else { 2117 link.link_status = ETH_LINK_UP; 2118 PMD_INIT_LOG(DEBUG, "Port %d is up", 2119 dev->data->port_id); 2120 } 2121 } else { 2122 link.link_status = ETH_LINK_UP; 2123 } 2124 2125 return rte_eth_linkstatus_set(dev, &link); 2126 } 2127 2128 static int 2129 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask) 2130 { 2131 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 2132 struct virtio_hw *hw = dev->data->dev_private; 2133 uint64_t offloads = rxmode->offloads; 2134 2135 if (mask & ETH_VLAN_FILTER_MASK) { 2136 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) && 2137 !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) { 2138 2139 PMD_DRV_LOG(NOTICE, 2140 "vlan filtering not available on this host"); 2141 2142 return -ENOTSUP; 2143 } 2144 } 2145 2146 if (mask & ETH_VLAN_STRIP_MASK) 2147 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 2148 2149 return 0; 2150 } 2151 2152 static void 2153 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 2154 { 2155 uint64_t tso_mask, host_features; 2156 struct virtio_hw *hw = dev->data->dev_private; 2157 2158 dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */ 2159 2160 dev_info->max_rx_queues = 2161 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES); 2162 dev_info->max_tx_queues = 2163 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES); 2164 dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; 2165 dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; 2166 dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; 2167 2168 host_features = VTPCI_OPS(hw)->get_features(hw); 2169 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | 2170 DEV_RX_OFFLOAD_CRC_STRIP; 2171 if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { 2172 dev_info->rx_offload_capa |= 2173 DEV_RX_OFFLOAD_TCP_CKSUM | 2174 DEV_RX_OFFLOAD_UDP_CKSUM; 2175 } 2176 if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN)) 2177 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER; 2178 tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 2179 (1ULL << VIRTIO_NET_F_GUEST_TSO6); 2180 if ((host_features & tso_mask) == tso_mask) 2181 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO; 2182 2183 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 2184 DEV_TX_OFFLOAD_VLAN_INSERT; 2185 if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) { 2186 dev_info->tx_offload_capa |= 2187 DEV_TX_OFFLOAD_UDP_CKSUM | 2188 DEV_TX_OFFLOAD_TCP_CKSUM; 2189 } 2190 tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2191 (1ULL << VIRTIO_NET_F_HOST_TSO6); 2192 if ((host_features & tso_mask) == tso_mask) 2193 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; 2194 } 2195 2196 /* 2197 * It enables testpmd to collect per queue stats. 2198 */ 2199 static int 2200 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, 2201 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, 2202 __rte_unused uint8_t is_rx) 2203 { 2204 return 0; 2205 } 2206 2207 RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__); 2208 RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map); 2209 RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio-pci"); 2210 2211 RTE_INIT(virtio_init_log) 2212 { 2213 virtio_logtype_init = rte_log_register("pmd.net.virtio.init"); 2214 if (virtio_logtype_init >= 0) 2215 rte_log_set_level(virtio_logtype_init, RTE_LOG_NOTICE); 2216 virtio_logtype_driver = rte_log_register("pmd.net.virtio.driver"); 2217 if (virtio_logtype_driver >= 0) 2218 rte_log_set_level(virtio_logtype_driver, RTE_LOG_NOTICE); 2219 } 2220