1 /* $OpenBSD: vionet.c,v 1.22 2024/11/21 13:39:34 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> 5 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/types.h> 20 21 #include <dev/pci/virtio_pcireg.h> 22 #include <dev/pv/virtioreg.h> 23 24 #include <net/if.h> 25 #include <netinet/in.h> 26 #include <netinet/if_ether.h> 27 28 #include <errno.h> 29 #include <event.h> 30 #include <fcntl.h> 31 #include <pthread.h> 32 #include <pthread_np.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 37 #include "atomicio.h" 38 #include "virtio.h" 39 #include "vmd.h" 40 41 #define VIRTIO_NET_F_MAC (1 << 5) 42 #define RXQ 0 43 #define TXQ 1 44 45 extern char *__progname; 46 extern struct vmd_vm *current_vm; 47 48 struct packet { 49 uint8_t *buf; 50 size_t len; 51 }; 52 53 static void *rx_run_loop(void *); 54 static void *tx_run_loop(void *); 55 static int vionet_rx(struct vionet_dev *, int); 56 static ssize_t vionet_rx_copy(struct vionet_dev *, int, const struct iovec *, 57 int, size_t); 58 static ssize_t vionet_rx_zerocopy(struct vionet_dev *, int, 59 const struct iovec *, int); 60 static void vionet_rx_event(int, short, void *); 61 static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, 62 int8_t *); 63 static void handle_io_write(struct viodev_msg *, struct virtio_dev *); 64 static int vionet_tx(struct virtio_dev *); 65 static void vionet_notifyq(struct virtio_dev *); 66 static void dev_dispatch_vm(int, short, void *); 67 static void handle_sync_io(int, short, void *); 68 static void read_pipe_main(int, short, void *); 69 static void read_pipe_rx(int, short, void *); 70 static void read_pipe_tx(int, short, void *); 71 static void vionet_assert_pic_irq(struct virtio_dev *); 72 static void vionet_deassert_pic_irq(struct virtio_dev *); 73 74 /* Device Globals */ 75 struct event ev_tap; 76 struct event ev_inject; 77 struct event_base *ev_base_main; 78 struct event_base *ev_base_rx; 79 struct event_base *ev_base_tx; 80 pthread_t rx_thread; 81 pthread_t tx_thread; 82 struct vm_dev_pipe pipe_main; 83 struct vm_dev_pipe pipe_rx; 84 struct vm_dev_pipe pipe_tx; 85 int pipe_inject[2]; 86 #define READ 0 87 #define WRITE 1 88 struct iovec iov_rx[VIONET_QUEUE_SIZE]; 89 struct iovec iov_tx[VIONET_QUEUE_SIZE]; 90 pthread_rwlock_t lock = NULL; /* Guards device config state. */ 91 int resetting = 0; /* Transient reset state used to coordinate reset. */ 92 int rx_enabled = 0; /* 1: we expect to read the tap, 0: wait for notify. */ 93 94 __dead void 95 vionet_main(int fd, int fd_vmm) 96 { 97 struct virtio_dev dev; 98 struct vionet_dev *vionet = NULL; 99 struct viodev_msg msg; 100 struct vmd_vm vm; 101 struct vm_create_params *vcp; 102 ssize_t sz; 103 int ret; 104 105 /* 106 * stdio - needed for read/write to disk fds and channels to the vm. 107 * vmm + proc - needed to create shared vm mappings. 108 */ 109 if (pledge("stdio vmm proc", NULL) == -1) 110 fatal("pledge"); 111 112 /* Initialize iovec arrays. */ 113 memset(iov_rx, 0, sizeof(iov_rx)); 114 memset(iov_tx, 0, sizeof(iov_tx)); 115 116 /* Receive our vionet_dev, mostly preconfigured. */ 117 sz = atomicio(read, fd, &dev, sizeof(dev)); 118 if (sz != sizeof(dev)) { 119 ret = errno; 120 log_warn("failed to receive vionet"); 121 goto fail; 122 } 123 if (dev.dev_type != VMD_DEVTYPE_NET) { 124 ret = EINVAL; 125 log_warn("received invalid device type"); 126 goto fail; 127 } 128 dev.sync_fd = fd; 129 vionet = &dev.vionet; 130 131 log_debug("%s: got vionet dev. tap fd = %d, syncfd = %d, asyncfd = %d" 132 ", vmm fd = %d", __func__, vionet->data_fd, dev.sync_fd, 133 dev.async_fd, fd_vmm); 134 135 /* Receive our vm information from the vm process. */ 136 memset(&vm, 0, sizeof(vm)); 137 sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm)); 138 if (sz != sizeof(vm)) { 139 ret = EIO; 140 log_warnx("failed to receive vm details"); 141 goto fail; 142 } 143 vcp = &vm.vm_params.vmc_params; 144 current_vm = &vm; 145 setproctitle("%s/vionet%d", vcp->vcp_name, vionet->idx); 146 log_procinit("vm/%s/vionet%d", vcp->vcp_name, vionet->idx); 147 148 /* Now that we have our vm information, we can remap memory. */ 149 ret = remap_guest_mem(&vm, fd_vmm); 150 if (ret) { 151 fatal("%s: failed to remap", __func__); 152 goto fail; 153 } 154 155 /* 156 * We no longer need /dev/vmm access. 157 */ 158 close_fd(fd_vmm); 159 if (pledge("stdio", NULL) == -1) 160 fatal("pledge2"); 161 162 /* If we're restoring hardware, re-initialize virtqueue hva's. */ 163 if (vm.vm_state & VM_STATE_RECEIVED) { 164 struct virtio_vq_info *vq_info; 165 void *hva = NULL; 166 167 vq_info = &dev.vionet.vq[TXQ]; 168 if (vq_info->q_gpa != 0) { 169 log_debug("%s: restoring TX virtqueue for gpa 0x%llx", 170 __func__, vq_info->q_gpa); 171 hva = hvaddr_mem(vq_info->q_gpa, 172 vring_size(VIONET_QUEUE_SIZE)); 173 if (hva == NULL) 174 fatalx("%s: hva == NULL", __func__); 175 vq_info->q_hva = hva; 176 } 177 178 vq_info = &dev.vionet.vq[RXQ]; 179 if (vq_info->q_gpa != 0) { 180 log_debug("%s: restoring RX virtqueue for gpa 0x%llx", 181 __func__, vq_info->q_gpa); 182 hva = hvaddr_mem(vq_info->q_gpa, 183 vring_size(VIONET_QUEUE_SIZE)); 184 if (hva == NULL) 185 fatalx("%s: hva == NULL", __func__); 186 vq_info->q_hva = hva; 187 } 188 } 189 190 /* Initialize our packet injection pipe. */ 191 if (pipe2(pipe_inject, O_NONBLOCK) == -1) { 192 log_warn("%s: injection pipe", __func__); 193 goto fail; 194 } 195 196 /* Initialize inter-thread communication channels. */ 197 vm_pipe_init2(&pipe_main, read_pipe_main, &dev); 198 vm_pipe_init2(&pipe_rx, read_pipe_rx, &dev); 199 vm_pipe_init2(&pipe_tx, read_pipe_tx, &dev); 200 201 /* Initialize RX and TX threads . */ 202 ret = pthread_create(&rx_thread, NULL, rx_run_loop, &dev); 203 if (ret) { 204 errno = ret; 205 log_warn("%s: failed to initialize rx thread", __func__); 206 goto fail; 207 } 208 pthread_set_name_np(rx_thread, "rx"); 209 ret = pthread_create(&tx_thread, NULL, tx_run_loop, &dev); 210 if (ret) { 211 errno = ret; 212 log_warn("%s: failed to initialize tx thread", __func__); 213 goto fail; 214 } 215 pthread_set_name_np(tx_thread, "tx"); 216 217 /* Initialize our rwlock for guarding shared device state. */ 218 ret = pthread_rwlock_init(&lock, NULL); 219 if (ret) { 220 errno = ret; 221 log_warn("%s: failed to initialize rwlock", __func__); 222 goto fail; 223 } 224 225 /* Initialize libevent so we can start wiring event handlers. */ 226 ev_base_main = event_base_new(); 227 228 /* Add our handler for receiving messages from the RX/TX threads. */ 229 event_base_set(ev_base_main, &pipe_main.read_ev); 230 event_add(&pipe_main.read_ev, NULL); 231 232 /* Wire up an async imsg channel. */ 233 log_debug("%s: wiring in async vm event handler (fd=%d)", __func__, 234 dev.async_fd); 235 if (vm_device_pipe(&dev, dev_dispatch_vm, ev_base_main)) { 236 ret = EIO; 237 log_warnx("vm_device_pipe"); 238 goto fail; 239 } 240 241 /* Configure our sync channel event handler. */ 242 log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, 243 dev.sync_fd); 244 if (imsgbuf_init(&dev.sync_iev.ibuf, dev.sync_fd) == -1) { 245 log_warnx("imsgbuf_init"); 246 goto fail; 247 } 248 imsgbuf_allow_fdpass(&dev.sync_iev.ibuf); 249 dev.sync_iev.handler = handle_sync_io; 250 dev.sync_iev.data = &dev; 251 dev.sync_iev.events = EV_READ; 252 imsg_event_add2(&dev.sync_iev, ev_base_main); 253 254 /* Send a ready message over the sync channel. */ 255 log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name); 256 memset(&msg, 0, sizeof(msg)); 257 msg.type = VIODEV_MSG_READY; 258 imsg_compose_event2(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 259 sizeof(msg), ev_base_main); 260 261 /* Send a ready message over the async channel. */ 262 log_debug("%s: sending async ready message", __func__); 263 ret = imsg_compose_event2(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 264 &msg, sizeof(msg), ev_base_main); 265 if (ret == -1) { 266 log_warnx("%s: failed to send async ready message!", __func__); 267 goto fail; 268 } 269 270 /* Engage the event loop! */ 271 ret = event_base_dispatch(ev_base_main); 272 event_base_free(ev_base_main); 273 274 /* Try stopping the rx & tx threads cleanly by messaging them. */ 275 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_STOP); 276 vm_pipe_send(&pipe_tx, VIRTIO_THREAD_STOP); 277 278 /* Wait for threads to stop. */ 279 pthread_join(rx_thread, NULL); 280 pthread_join(tx_thread, NULL); 281 pthread_rwlock_destroy(&lock); 282 283 /* Cleanup */ 284 if (ret == 0) { 285 close_fd(dev.sync_fd); 286 close_fd(dev.async_fd); 287 close_fd(vionet->data_fd); 288 close_fd(pipe_main.read); 289 close_fd(pipe_main.write); 290 close_fd(pipe_rx.write); 291 close_fd(pipe_tx.write); 292 close_fd(pipe_inject[READ]); 293 close_fd(pipe_inject[WRITE]); 294 _exit(ret); 295 /* NOTREACHED */ 296 } 297 fail: 298 /* Try firing off a message to the vm saying we're dying. */ 299 memset(&msg, 0, sizeof(msg)); 300 msg.type = VIODEV_MSG_ERROR; 301 msg.data = ret; 302 imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 303 sizeof(msg)); 304 imsgbuf_flush(&dev.sync_iev.ibuf); 305 306 close_fd(dev.sync_fd); 307 close_fd(dev.async_fd); 308 close_fd(pipe_inject[READ]); 309 close_fd(pipe_inject[WRITE]); 310 if (vionet != NULL) 311 close_fd(vionet->data_fd); 312 if (lock != NULL) 313 pthread_rwlock_destroy(&lock); 314 _exit(ret); 315 } 316 317 /* 318 * Update the gpa and hva of the virtqueue. 319 */ 320 static void 321 vionet_update_qa(struct vionet_dev *dev) 322 { 323 struct virtio_vq_info *vq_info; 324 void *hva = NULL; 325 326 /* Invalid queue? */ 327 if (dev->cfg.queue_select > 1) 328 return; 329 330 vq_info = &dev->vq[dev->cfg.queue_select]; 331 vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; 332 dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 333 334 if (vq_info->q_gpa == 0) 335 vq_info->q_hva = NULL; 336 337 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE)); 338 if (hva == NULL) 339 fatalx("%s: hva == NULL", __func__); 340 341 vq_info->q_hva = hva; 342 } 343 344 /* 345 * Update the queue size. 346 */ 347 static void 348 vionet_update_qs(struct vionet_dev *dev) 349 { 350 struct virtio_vq_info *vq_info; 351 352 /* Invalid queue? */ 353 if (dev->cfg.queue_select > 1) { 354 log_warnx("%s: !!! invalid queue selector %d", __func__, 355 dev->cfg.queue_select); 356 dev->cfg.queue_size = 0; 357 return; 358 } 359 360 vq_info = &dev->vq[dev->cfg.queue_select]; 361 362 /* Update queue pfn/size based on queue select */ 363 dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 364 dev->cfg.queue_size = vq_info->qs; 365 } 366 367 /* 368 * vionet_rx 369 * 370 * Pull packet from the provided fd and fill the receive-side virtqueue. We 371 * selectively use zero-copy approaches when possible. 372 * 373 * Returns 1 if guest notification is needed. Otherwise, returns -1 on failure 374 * or 0 if no notification is needed. 375 */ 376 static int 377 vionet_rx(struct vionet_dev *dev, int fd) 378 { 379 uint16_t idx, hdr_idx; 380 char *vr = NULL; 381 size_t chain_len = 0, iov_cnt; 382 struct vring_desc *desc, *table; 383 struct vring_avail *avail; 384 struct vring_used *used; 385 struct virtio_vq_info *vq_info; 386 struct iovec *iov; 387 int notify = 0; 388 ssize_t sz; 389 uint8_t status = 0; 390 391 status = dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; 392 if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) { 393 log_warnx("%s: driver not ready", __func__); 394 return (0); 395 } 396 397 vq_info = &dev->vq[RXQ]; 398 idx = vq_info->last_avail; 399 vr = vq_info->q_hva; 400 if (vr == NULL) 401 fatalx("%s: vr == NULL", __func__); 402 403 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 404 table = (struct vring_desc *)(vr); 405 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 406 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 407 used->flags |= VRING_USED_F_NO_NOTIFY; 408 409 while (idx != avail->idx) { 410 hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK]; 411 desc = &table[hdr_idx & VIONET_QUEUE_MASK]; 412 if (!DESC_WRITABLE(desc)) { 413 log_warnx("%s: invalid descriptor state", __func__); 414 goto reset; 415 } 416 417 iov = &iov_rx[0]; 418 iov_cnt = 1; 419 420 /* 421 * First descriptor should be at least as large as the 422 * virtio_net_hdr. It's not technically required, but in 423 * legacy devices it should be safe to assume. 424 */ 425 iov->iov_len = desc->len; 426 if (iov->iov_len < sizeof(struct virtio_net_hdr)) { 427 log_warnx("%s: invalid descriptor length", __func__); 428 goto reset; 429 } 430 431 /* 432 * Insert the virtio_net_hdr and adjust len/base. We do the 433 * pointer math here before it's a void*. 434 */ 435 iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); 436 if (iov->iov_base == NULL) 437 goto reset; 438 memset(iov->iov_base, 0, sizeof(struct virtio_net_hdr)); 439 440 /* Tweak the iovec to account for the virtio_net_hdr. */ 441 iov->iov_len -= sizeof(struct virtio_net_hdr); 442 iov->iov_base = hvaddr_mem(desc->addr + 443 sizeof(struct virtio_net_hdr), iov->iov_len); 444 if (iov->iov_base == NULL) 445 goto reset; 446 chain_len = iov->iov_len; 447 448 /* 449 * Walk the remaining chain and collect remaining addresses 450 * and lengths. 451 */ 452 while (desc->flags & VRING_DESC_F_NEXT) { 453 desc = &table[desc->next & VIONET_QUEUE_MASK]; 454 if (!DESC_WRITABLE(desc)) { 455 log_warnx("%s: invalid descriptor state", 456 __func__); 457 goto reset; 458 } 459 460 /* Collect our IO information. Translate gpa's. */ 461 iov = &iov_rx[iov_cnt]; 462 iov->iov_len = desc->len; 463 iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); 464 if (iov->iov_base == NULL) 465 goto reset; 466 chain_len += iov->iov_len; 467 468 /* Guard against infinitely looping chains. */ 469 if (++iov_cnt >= nitems(iov_rx)) { 470 log_warnx("%s: infinite chain detected", 471 __func__); 472 goto reset; 473 } 474 } 475 476 /* Make sure the driver gave us the bare minimum buffers. */ 477 if (chain_len < VIONET_MIN_TXLEN) { 478 log_warnx("%s: insufficient buffers provided", 479 __func__); 480 goto reset; 481 } 482 483 /* 484 * If we're enforcing hardware address or handling an injected 485 * packet, we need to use a copy-based approach. 486 */ 487 if (dev->lockedmac || fd != dev->data_fd) 488 sz = vionet_rx_copy(dev, fd, iov_rx, iov_cnt, 489 chain_len); 490 else 491 sz = vionet_rx_zerocopy(dev, fd, iov_rx, iov_cnt); 492 if (sz == -1) 493 goto reset; 494 if (sz == 0) /* No packets, so bail out for now. */ 495 break; 496 497 /* 498 * Account for the prefixed header since it wasn't included 499 * in the copy or zerocopy operations. 500 */ 501 sz += sizeof(struct virtio_net_hdr); 502 503 /* Mark our buffers as used. */ 504 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx; 505 used->ring[used->idx & VIONET_QUEUE_MASK].len = sz; 506 __sync_synchronize(); 507 used->idx++; 508 idx++; 509 } 510 511 if (idx != vq_info->last_avail && 512 !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 513 notify = 1; 514 } 515 516 vq_info->last_avail = idx; 517 return (notify); 518 reset: 519 return (-1); 520 } 521 522 /* 523 * vionet_rx_copy 524 * 525 * Read a packet off the provided file descriptor, validating packet 526 * characteristics, and copy into the provided buffers in the iovec array. 527 * 528 * It's assumed that the provided iovec array contains validated host virtual 529 * address translations and not guest physical addreses. 530 * 531 * Returns number of bytes copied on success, 0 if packet is dropped, and 532 * -1 on an error. 533 */ 534 ssize_t 535 vionet_rx_copy(struct vionet_dev *dev, int fd, const struct iovec *iov, 536 int iov_cnt, size_t chain_len) 537 { 538 static uint8_t buf[VIONET_HARD_MTU]; 539 struct packet *pkt = NULL; 540 struct ether_header *eh = NULL; 541 uint8_t *payload = buf; 542 size_t i, chunk, nbytes, copied = 0; 543 ssize_t sz; 544 545 /* If reading from the tap(4), try to right-size the read. */ 546 if (fd == dev->data_fd) 547 nbytes = MIN(chain_len, VIONET_HARD_MTU); 548 else if (fd == pipe_inject[READ]) 549 nbytes = sizeof(struct packet); 550 else { 551 log_warnx("%s: invalid fd: %d", __func__, fd); 552 return (-1); 553 } 554 555 /* 556 * Try to pull a packet. The fd should be non-blocking and we don't 557 * care if we under-read (i.e. sz != nbytes) as we may not have a 558 * packet large enough to fill the buffer. 559 */ 560 sz = read(fd, buf, nbytes); 561 if (sz == -1) { 562 if (errno != EAGAIN) { 563 log_warn("%s: error reading packet", __func__); 564 return (-1); 565 } 566 return (0); 567 } else if (fd == dev->data_fd && sz < VIONET_MIN_TXLEN) { 568 /* If reading the tap(4), we should get valid ethernet. */ 569 log_warnx("%s: invalid packet size", __func__); 570 return (0); 571 } else if (fd == pipe_inject[READ] && sz != sizeof(struct packet)) { 572 log_warnx("%s: invalid injected packet object (sz=%ld)", 573 __func__, sz); 574 return (0); 575 } 576 577 /* Decompose an injected packet, if that's what we're working with. */ 578 if (fd == pipe_inject[READ]) { 579 pkt = (struct packet *)buf; 580 if (pkt->buf == NULL) { 581 log_warnx("%s: invalid injected packet, no buffer", 582 __func__); 583 return (0); 584 } 585 if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) { 586 log_warnx("%s: invalid injected packet size", __func__); 587 goto drop; 588 } 589 payload = pkt->buf; 590 sz = (ssize_t)pkt->len; 591 } 592 593 /* Validate the ethernet header, if required. */ 594 if (dev->lockedmac) { 595 eh = (struct ether_header *)(payload); 596 if (!ETHER_IS_MULTICAST(eh->ether_dhost) && 597 memcmp(eh->ether_dhost, dev->mac, 598 sizeof(eh->ether_dhost)) != 0) 599 goto drop; 600 } 601 602 /* Truncate one last time to the chain length, if shorter. */ 603 sz = MIN(chain_len, (size_t)sz); 604 605 /* 606 * Copy the packet into the provided buffers. We can use memcpy(3) 607 * here as the gpa was validated and translated to an hva previously. 608 */ 609 for (i = 0; (int)i < iov_cnt && (size_t)sz > copied; i++) { 610 chunk = MIN(iov[i].iov_len, (size_t)(sz - copied)); 611 memcpy(iov[i].iov_base, payload + copied, chunk); 612 copied += chunk; 613 } 614 615 drop: 616 /* Free any injected packet buffer. */ 617 if (pkt != NULL) 618 free(pkt->buf); 619 620 return (copied); 621 } 622 623 /* 624 * vionet_rx_zerocopy 625 * 626 * Perform a vectorized read from the given fd into the guest physical memory 627 * pointed to by iovecs. 628 * 629 * Returns number of bytes read on success, -1 on error, or 0 if EAGAIN was 630 * returned by readv. 631 * 632 */ 633 static ssize_t 634 vionet_rx_zerocopy(struct vionet_dev *dev, int fd, const struct iovec *iov, 635 int iov_cnt) 636 { 637 ssize_t sz; 638 639 if (dev->lockedmac) { 640 log_warnx("%s: zerocopy not available for locked lladdr", 641 __func__); 642 return (-1); 643 } 644 645 sz = readv(fd, iov, iov_cnt); 646 if (sz == -1 && errno == EAGAIN) 647 return (0); 648 return (sz); 649 } 650 651 652 /* 653 * vionet_rx_event 654 * 655 * Called when new data can be received on the tap fd of a vionet device. 656 */ 657 static void 658 vionet_rx_event(int fd, short event, void *arg) 659 { 660 struct virtio_dev *dev = (struct virtio_dev *)arg; 661 struct vionet_dev *vionet = &dev->vionet; 662 int ret = 0; 663 664 if (!(event & EV_READ)) 665 fatalx("%s: invalid event type", __func__); 666 667 pthread_rwlock_rdlock(&lock); 668 ret = vionet_rx(vionet, fd); 669 pthread_rwlock_unlock(&lock); 670 671 if (ret == 0) { 672 /* Nothing to do. */ 673 return; 674 } 675 676 pthread_rwlock_wrlock(&lock); 677 if (ret == 1) { 678 /* Notify the driver. */ 679 vionet->cfg.isr_status |= 1; 680 } else { 681 /* Need a reset. Something went wrong. */ 682 log_warnx("%s: requesting device reset", __func__); 683 vionet->cfg.device_status |= DEVICE_NEEDS_RESET; 684 vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 685 } 686 pthread_rwlock_unlock(&lock); 687 688 vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ); 689 } 690 691 static void 692 vionet_notifyq(struct virtio_dev *dev) 693 { 694 struct vionet_dev *vionet = &dev->vionet; 695 696 switch (vionet->cfg.queue_notify) { 697 case RXQ: 698 rx_enabled = 1; 699 vm_pipe_send(&pipe_rx, VIRTIO_NOTIFY); 700 break; 701 case TXQ: 702 vm_pipe_send(&pipe_tx, VIRTIO_NOTIFY); 703 break; 704 default: 705 /* 706 * Catch the unimplemented queue ID 2 (control queue) as 707 * well as any bogus queue IDs. 708 */ 709 log_debug("%s: notify for unimplemented queue ID %d", 710 __func__, vionet->cfg.queue_notify); 711 break; 712 } 713 } 714 715 static int 716 vionet_tx(struct virtio_dev *dev) 717 { 718 uint16_t idx, hdr_idx; 719 size_t chain_len, iov_cnt; 720 ssize_t dhcpsz = 0, sz; 721 int notify = 0; 722 char *vr = NULL, *dhcppkt = NULL; 723 struct vionet_dev *vionet = &dev->vionet; 724 struct vring_desc *desc, *table; 725 struct vring_avail *avail; 726 struct vring_used *used; 727 struct virtio_vq_info *vq_info; 728 struct ether_header *eh; 729 struct iovec *iov; 730 struct packet pkt; 731 uint8_t status = 0; 732 733 status = vionet->cfg.device_status 734 & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; 735 if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) { 736 log_warnx("%s: driver not ready", __func__); 737 return (0); 738 } 739 740 vq_info = &vionet->vq[TXQ]; 741 idx = vq_info->last_avail; 742 vr = vq_info->q_hva; 743 if (vr == NULL) 744 fatalx("%s: vr == NULL", __func__); 745 746 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 747 table = (struct vring_desc *)(vr); 748 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 749 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 750 751 while (idx != avail->idx) { 752 hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK]; 753 desc = &table[hdr_idx & VIONET_QUEUE_MASK]; 754 if (DESC_WRITABLE(desc)) { 755 log_warnx("%s: invalid descriptor state", __func__); 756 goto reset; 757 } 758 759 iov = &iov_tx[0]; 760 iov_cnt = 0; 761 chain_len = 0; 762 763 /* 764 * As a legacy device, we most likely will receive a lead 765 * descriptor sized to the virtio_net_hdr. However, the framing 766 * is not guaranteed, so check for packet data. 767 */ 768 iov->iov_len = desc->len; 769 if (iov->iov_len < sizeof(struct virtio_net_hdr)) { 770 log_warnx("%s: invalid descriptor length", __func__); 771 goto reset; 772 } else if (iov->iov_len > sizeof(struct virtio_net_hdr)) { 773 /* Chop off the virtio header, leaving packet data. */ 774 iov->iov_len -= sizeof(struct virtio_net_hdr); 775 chain_len += iov->iov_len; 776 iov->iov_base = hvaddr_mem(desc->addr + 777 sizeof(struct virtio_net_hdr), iov->iov_len); 778 if (iov->iov_base == NULL) 779 goto reset; 780 iov_cnt++; 781 } 782 783 /* 784 * Walk the chain and collect remaining addresses and lengths. 785 */ 786 while (desc->flags & VRING_DESC_F_NEXT) { 787 desc = &table[desc->next & VIONET_QUEUE_MASK]; 788 if (DESC_WRITABLE(desc)) { 789 log_warnx("%s: invalid descriptor state", 790 __func__); 791 goto reset; 792 } 793 794 /* Collect our IO information, translating gpa's. */ 795 iov = &iov_tx[iov_cnt]; 796 iov->iov_len = desc->len; 797 iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); 798 if (iov->iov_base == NULL) 799 goto reset; 800 chain_len += iov->iov_len; 801 802 /* Guard against infinitely looping chains. */ 803 if (++iov_cnt >= nitems(iov_tx)) { 804 log_warnx("%s: infinite chain detected", 805 __func__); 806 goto reset; 807 } 808 } 809 810 /* Check if we've got a minimum viable amount of data. */ 811 if (chain_len < VIONET_MIN_TXLEN) 812 goto drop; 813 814 /* 815 * Packet inspection for ethernet header (if using a "local" 816 * interface) for possibility of a DHCP packet or (if using 817 * locked lladdr) for validating ethernet header. 818 * 819 * To help preserve zero-copy semantics, we require the first 820 * descriptor with packet data contains a large enough buffer 821 * for this inspection. 822 */ 823 iov = &iov_tx[0]; 824 if (vionet->lockedmac) { 825 if (iov->iov_len < ETHER_HDR_LEN) { 826 log_warnx("%s: insufficient header data", 827 __func__); 828 goto drop; 829 } 830 eh = (struct ether_header *)iov->iov_base; 831 if (memcmp(eh->ether_shost, vionet->mac, 832 sizeof(eh->ether_shost)) != 0) { 833 log_warnx("%s: bad source address %s", 834 __func__, ether_ntoa((struct ether_addr *) 835 eh->ether_shost)); 836 goto drop; 837 } 838 } 839 if (vionet->local) { 840 dhcpsz = dhcp_request(dev, iov->iov_base, iov->iov_len, 841 &dhcppkt); 842 if (dhcpsz > 0) { 843 log_debug("%s: detected dhcp request of %zu bytes", 844 __func__, dhcpsz); 845 goto drop; 846 } 847 } 848 849 /* Write our packet to the tap(4). */ 850 sz = writev(vionet->data_fd, iov_tx, iov_cnt); 851 if (sz == -1 && errno != ENOBUFS) { 852 log_warn("%s", __func__); 853 goto reset; 854 } 855 chain_len += sizeof(struct virtio_net_hdr); 856 drop: 857 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx; 858 used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len; 859 __sync_synchronize(); 860 used->idx++; 861 idx++; 862 863 /* Facilitate DHCP reply injection, if needed. */ 864 if (dhcpsz > 0) { 865 pkt.buf = dhcppkt; 866 pkt.len = dhcpsz; 867 sz = write(pipe_inject[WRITE], &pkt, sizeof(pkt)); 868 if (sz == -1 && errno != EAGAIN) { 869 log_warn("%s: packet injection", __func__); 870 free(pkt.buf); 871 } else if (sz == -1 && errno == EAGAIN) { 872 log_debug("%s: dropping dhcp reply", __func__); 873 free(pkt.buf); 874 } else if (sz != sizeof(pkt)) { 875 log_warnx("%s: failed packet injection", 876 __func__); 877 free(pkt.buf); 878 } 879 log_debug("%s: injected dhcp reply with %ld bytes", 880 __func__, sz); 881 } 882 } 883 884 if (idx != vq_info->last_avail && 885 !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 886 notify = 1; 887 888 889 vq_info->last_avail = idx; 890 return (notify); 891 reset: 892 return (-1); 893 } 894 895 static void 896 dev_dispatch_vm(int fd, short event, void *arg) 897 { 898 struct virtio_dev *dev = arg; 899 struct vionet_dev *vionet = &dev->vionet; 900 struct imsgev *iev = &dev->async_iev; 901 struct imsgbuf *ibuf = &iev->ibuf; 902 struct imsg imsg; 903 ssize_t n = 0; 904 int verbose; 905 906 if (dev == NULL) 907 fatalx("%s: missing vionet pointer", __func__); 908 909 if (event & EV_READ) { 910 if ((n = imsgbuf_read(ibuf)) == -1) 911 fatal("%s: imsgbuf_read", __func__); 912 if (n == 0) { 913 /* this pipe is dead, so remove the event handler */ 914 log_debug("%s: pipe dead (EV_READ)", __func__); 915 event_del(&iev->ev); 916 event_base_loopexit(ev_base_main, NULL); 917 return; 918 } 919 } 920 921 if (event & EV_WRITE) { 922 if (imsgbuf_write(ibuf) == -1) { 923 if (errno == EPIPE) { 924 /* this pipe is dead, remove the handler */ 925 log_debug("%s: pipe dead (EV_WRITE)", __func__); 926 event_del(&iev->ev); 927 event_loopexit(NULL); 928 return; 929 } 930 fatal("%s: imsgbuf_write", __func__); 931 } 932 } 933 934 for (;;) { 935 if ((n = imsg_get(ibuf, &imsg)) == -1) 936 fatal("%s: imsg_get", __func__); 937 if (n == 0) 938 break; 939 940 switch (imsg.hdr.type) { 941 case IMSG_DEVOP_HOSTMAC: 942 IMSG_SIZE_CHECK(&imsg, vionet->hostmac); 943 memcpy(vionet->hostmac, imsg.data, 944 sizeof(vionet->hostmac)); 945 log_debug("%s: set hostmac", __func__); 946 break; 947 case IMSG_VMDOP_PAUSE_VM: 948 log_debug("%s: pausing", __func__); 949 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE); 950 break; 951 case IMSG_VMDOP_UNPAUSE_VM: 952 log_debug("%s: unpausing", __func__); 953 if (rx_enabled) 954 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_START); 955 break; 956 case IMSG_CTL_VERBOSE: 957 IMSG_SIZE_CHECK(&imsg, &verbose); 958 memcpy(&verbose, imsg.data, sizeof(verbose)); 959 log_setverbose(verbose); 960 break; 961 } 962 imsg_free(&imsg); 963 } 964 imsg_event_add2(iev, ev_base_main); 965 } 966 967 /* 968 * Synchronous IO handler. 969 * 970 */ 971 static void 972 handle_sync_io(int fd, short event, void *arg) 973 { 974 struct virtio_dev *dev = (struct virtio_dev *)arg; 975 struct imsgev *iev = &dev->sync_iev; 976 struct imsgbuf *ibuf = &iev->ibuf; 977 struct viodev_msg msg; 978 struct imsg imsg; 979 ssize_t n; 980 int8_t intr = INTR_STATE_NOOP; 981 982 if (event & EV_READ) { 983 if ((n = imsgbuf_read(ibuf)) == -1) 984 fatal("%s: imsgbuf_read", __func__); 985 if (n == 0) { 986 /* this pipe is dead, so remove the event handler */ 987 log_debug("%s: pipe dead (EV_READ)", __func__); 988 event_del(&iev->ev); 989 event_base_loopexit(ev_base_main, NULL); 990 return; 991 } 992 } 993 994 if (event & EV_WRITE) { 995 if (imsgbuf_write(ibuf) == -1) { 996 if (errno == EPIPE) { 997 /* this pipe is dead, remove the handler */ 998 log_debug("%s: pipe dead (EV_WRITE)", __func__); 999 event_del(&iev->ev); 1000 event_loopexit(NULL); 1001 return; 1002 } 1003 fatal("%s: imsgbuf_write", __func__); 1004 } 1005 } 1006 1007 for (;;) { 1008 if ((n = imsg_get(ibuf, &imsg)) == -1) 1009 fatalx("%s: imsg_get (n=%ld)", __func__, n); 1010 if (n == 0) 1011 break; 1012 1013 /* Unpack our message. They ALL should be dev messeges! */ 1014 IMSG_SIZE_CHECK(&imsg, &msg); 1015 memcpy(&msg, imsg.data, sizeof(msg)); 1016 imsg_free(&imsg); 1017 1018 switch (msg.type) { 1019 case VIODEV_MSG_DUMP: 1020 /* Dump device */ 1021 n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev)); 1022 if (n != sizeof(*dev)) { 1023 log_warnx("%s: failed to dump vionet device", 1024 __func__); 1025 break; 1026 } 1027 case VIODEV_MSG_IO_READ: 1028 /* Read IO: make sure to send a reply */ 1029 msg.data = handle_io_read(&msg, dev, &intr); 1030 msg.data_valid = 1; 1031 msg.state = intr; 1032 imsg_compose_event2(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 1033 sizeof(msg), ev_base_main); 1034 break; 1035 case VIODEV_MSG_IO_WRITE: 1036 /* Write IO: no reply needed */ 1037 handle_io_write(&msg, dev); 1038 break; 1039 case VIODEV_MSG_SHUTDOWN: 1040 event_del(&dev->sync_iev.ev); 1041 event_base_loopbreak(ev_base_main); 1042 return; 1043 default: 1044 fatalx("%s: invalid msg type %d", __func__, msg.type); 1045 } 1046 } 1047 imsg_event_add2(iev, ev_base_main); 1048 } 1049 1050 static void 1051 handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) 1052 { 1053 struct vionet_dev *vionet = &dev->vionet; 1054 uint32_t data = msg->data; 1055 int pause_devices = 0; 1056 1057 pthread_rwlock_wrlock(&lock); 1058 1059 switch (msg->reg) { 1060 case VIRTIO_CONFIG_DEVICE_FEATURES: 1061 case VIRTIO_CONFIG_QUEUE_SIZE: 1062 case VIRTIO_CONFIG_ISR_STATUS: 1063 log_warnx("%s: illegal write %x to %s", __progname, data, 1064 virtio_reg_name(msg->reg)); 1065 break; 1066 case VIRTIO_CONFIG_GUEST_FEATURES: 1067 vionet->cfg.guest_feature = data; 1068 break; 1069 case VIRTIO_CONFIG_QUEUE_PFN: 1070 vionet->cfg.queue_pfn = data; 1071 vionet_update_qa(vionet); 1072 break; 1073 case VIRTIO_CONFIG_QUEUE_SELECT: 1074 vionet->cfg.queue_select = data; 1075 vionet_update_qs(vionet); 1076 break; 1077 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1078 vionet->cfg.queue_notify = data; 1079 vionet_notifyq(dev); 1080 break; 1081 case VIRTIO_CONFIG_DEVICE_STATUS: 1082 if (data == 0) { 1083 resetting = 2; /* Wait on two acks: rx & tx */ 1084 pause_devices = 1; 1085 } else { 1086 // XXX is this correct? 1087 vionet->cfg.device_status = data; 1088 } 1089 break; 1090 } 1091 1092 pthread_rwlock_unlock(&lock); 1093 if (pause_devices) { 1094 rx_enabled = 0; 1095 vionet_deassert_pic_irq(dev); 1096 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE); 1097 vm_pipe_send(&pipe_tx, VIRTIO_THREAD_PAUSE); 1098 } 1099 } 1100 1101 static uint32_t 1102 handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) 1103 { 1104 struct vionet_dev *vionet = &dev->vionet; 1105 uint32_t data; 1106 1107 pthread_rwlock_rdlock(&lock); 1108 1109 switch (msg->reg) { 1110 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1111 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 1112 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 1113 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 1114 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 1115 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 1116 data = vionet->mac[msg->reg - 1117 VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; 1118 break; 1119 case VIRTIO_CONFIG_DEVICE_FEATURES: 1120 data = vionet->cfg.device_feature; 1121 break; 1122 case VIRTIO_CONFIG_GUEST_FEATURES: 1123 data = vionet->cfg.guest_feature; 1124 break; 1125 case VIRTIO_CONFIG_QUEUE_PFN: 1126 data = vionet->cfg.queue_pfn; 1127 break; 1128 case VIRTIO_CONFIG_QUEUE_SIZE: 1129 data = vionet->cfg.queue_size; 1130 break; 1131 case VIRTIO_CONFIG_QUEUE_SELECT: 1132 data = vionet->cfg.queue_select; 1133 break; 1134 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1135 data = vionet->cfg.queue_notify; 1136 break; 1137 case VIRTIO_CONFIG_DEVICE_STATUS: 1138 data = vionet->cfg.device_status; 1139 break; 1140 case VIRTIO_CONFIG_ISR_STATUS: 1141 pthread_rwlock_unlock(&lock); 1142 pthread_rwlock_wrlock(&lock); 1143 data = vionet->cfg.isr_status; 1144 vionet->cfg.isr_status = 0; 1145 if (intr != NULL) 1146 *intr = INTR_STATE_DEASSERT; 1147 break; 1148 default: 1149 data = 0xFFFFFFFF; 1150 } 1151 1152 pthread_rwlock_unlock(&lock); 1153 return (data); 1154 } 1155 1156 /* 1157 * Handle the rx side processing, communicating to the main thread via pipe. 1158 */ 1159 static void * 1160 rx_run_loop(void *arg) 1161 { 1162 struct virtio_dev *dev = (struct virtio_dev *)arg; 1163 struct vionet_dev *vionet = &dev->vionet; 1164 int ret; 1165 1166 ev_base_rx = event_base_new(); 1167 1168 /* Wire up event handling for the tap fd. */ 1169 event_set(&ev_tap, vionet->data_fd, EV_READ | EV_PERSIST, 1170 vionet_rx_event, dev); 1171 event_base_set(ev_base_rx, &ev_tap); 1172 1173 /* Wire up event handling for the packet injection pipe. */ 1174 event_set(&ev_inject, pipe_inject[READ], EV_READ | EV_PERSIST, 1175 vionet_rx_event, dev); 1176 event_base_set(ev_base_rx, &ev_inject); 1177 1178 /* Wire up event handling for our inter-thread communication channel. */ 1179 event_base_set(ev_base_rx, &pipe_rx.read_ev); 1180 event_add(&pipe_rx.read_ev, NULL); 1181 1182 /* Begin our event loop with our channel event active. */ 1183 ret = event_base_dispatch(ev_base_rx); 1184 event_base_free(ev_base_rx); 1185 1186 log_debug("%s: exiting (%d)", __func__, ret); 1187 1188 close_fd(pipe_rx.read); 1189 close_fd(pipe_inject[READ]); 1190 1191 return (NULL); 1192 } 1193 1194 /* 1195 * Handle the tx side processing, communicating to the main thread via pipe. 1196 */ 1197 static void * 1198 tx_run_loop(void *arg) 1199 { 1200 int ret; 1201 1202 ev_base_tx = event_base_new(); 1203 1204 /* Wire up event handling for our inter-thread communication channel. */ 1205 event_base_set(ev_base_tx, &pipe_tx.read_ev); 1206 event_add(&pipe_tx.read_ev, NULL); 1207 1208 /* Begin our event loop with our channel event active. */ 1209 ret = event_base_dispatch(ev_base_tx); 1210 event_base_free(ev_base_tx); 1211 1212 log_debug("%s: exiting (%d)", __func__, ret); 1213 1214 close_fd(pipe_tx.read); 1215 1216 return (NULL); 1217 } 1218 1219 /* 1220 * Read events sent by the main thread to the rx thread. 1221 */ 1222 static void 1223 read_pipe_rx(int fd, short event, void *arg) 1224 { 1225 enum pipe_msg_type msg; 1226 1227 if (!(event & EV_READ)) 1228 fatalx("%s: invalid event type", __func__); 1229 1230 msg = vm_pipe_recv(&pipe_rx); 1231 1232 switch (msg) { 1233 case VIRTIO_NOTIFY: 1234 case VIRTIO_THREAD_START: 1235 event_add(&ev_tap, NULL); 1236 event_add(&ev_inject, NULL); 1237 break; 1238 case VIRTIO_THREAD_PAUSE: 1239 event_del(&ev_tap); 1240 event_del(&ev_inject); 1241 vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK); 1242 break; 1243 case VIRTIO_THREAD_STOP: 1244 event_del(&ev_tap); 1245 event_del(&ev_inject); 1246 event_base_loopexit(ev_base_rx, NULL); 1247 break; 1248 default: 1249 fatalx("%s: invalid channel message: %d", __func__, msg); 1250 } 1251 } 1252 1253 /* 1254 * Read events sent by the main thread to the tx thread. 1255 */ 1256 static void 1257 read_pipe_tx(int fd, short event, void *arg) 1258 { 1259 struct virtio_dev *dev = (struct virtio_dev*)arg; 1260 struct vionet_dev *vionet = &dev->vionet; 1261 enum pipe_msg_type msg; 1262 int ret = 0; 1263 1264 if (!(event & EV_READ)) 1265 fatalx("%s: invalid event type", __func__); 1266 1267 msg = vm_pipe_recv(&pipe_tx); 1268 1269 switch (msg) { 1270 case VIRTIO_NOTIFY: 1271 pthread_rwlock_rdlock(&lock); 1272 ret = vionet_tx(dev); 1273 pthread_rwlock_unlock(&lock); 1274 break; 1275 case VIRTIO_THREAD_START: 1276 /* Ignore Start messages. */ 1277 break; 1278 case VIRTIO_THREAD_PAUSE: 1279 /* 1280 * Nothing to do when pausing on the tx side, but ACK so main 1281 * thread knows we're not transmitting. 1282 */ 1283 vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK); 1284 break; 1285 case VIRTIO_THREAD_STOP: 1286 event_base_loopexit(ev_base_tx, NULL); 1287 break; 1288 default: 1289 fatalx("%s: invalid channel message: %d", __func__, msg); 1290 } 1291 1292 if (ret == 0) { 1293 /* No notification needed. Return early. */ 1294 return; 1295 } 1296 1297 pthread_rwlock_wrlock(&lock); 1298 if (ret == 1) { 1299 /* Notify the driver. */ 1300 vionet->cfg.isr_status |= 1; 1301 } else { 1302 /* Need a reset. Something went wrong. */ 1303 log_warnx("%s: requesting device reset", __func__); 1304 vionet->cfg.device_status |= DEVICE_NEEDS_RESET; 1305 vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1306 } 1307 pthread_rwlock_unlock(&lock); 1308 1309 vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ); 1310 } 1311 1312 /* 1313 * Read events sent by the rx/tx threads to the main thread. 1314 */ 1315 static void 1316 read_pipe_main(int fd, short event, void *arg) 1317 { 1318 struct virtio_dev *dev = (struct virtio_dev*)arg; 1319 struct vionet_dev *vionet = &dev->vionet; 1320 enum pipe_msg_type msg; 1321 1322 if (!(event & EV_READ)) 1323 fatalx("%s: invalid event type", __func__); 1324 1325 msg = vm_pipe_recv(&pipe_main); 1326 switch (msg) { 1327 case VIRTIO_RAISE_IRQ: 1328 vionet_assert_pic_irq(dev); 1329 break; 1330 case VIRTIO_THREAD_ACK: 1331 resetting--; 1332 if (resetting == 0) { 1333 log_debug("%s: resetting virtio network device %d", 1334 __func__, vionet->idx); 1335 1336 pthread_rwlock_wrlock(&lock); 1337 vionet->cfg.device_status = 0; 1338 vionet->cfg.guest_feature = 0; 1339 vionet->cfg.queue_pfn = 0; 1340 vionet_update_qa(vionet); 1341 vionet->cfg.queue_size = 0; 1342 vionet_update_qs(vionet); 1343 vionet->cfg.queue_select = 0; 1344 vionet->cfg.queue_notify = 0; 1345 vionet->cfg.isr_status = 0; 1346 vionet->vq[RXQ].last_avail = 0; 1347 vionet->vq[RXQ].notified_avail = 0; 1348 vionet->vq[TXQ].last_avail = 0; 1349 vionet->vq[TXQ].notified_avail = 0; 1350 pthread_rwlock_unlock(&lock); 1351 } 1352 break; 1353 default: 1354 fatalx("%s: invalid channel msg: %d", __func__, msg); 1355 } 1356 } 1357 1358 /* 1359 * Message the vm process asking to raise the irq. Must be called from the main 1360 * thread. 1361 */ 1362 static void 1363 vionet_assert_pic_irq(struct virtio_dev *dev) 1364 { 1365 struct viodev_msg msg; 1366 int ret; 1367 1368 memset(&msg, 0, sizeof(msg)); 1369 msg.irq = dev->irq; 1370 msg.vcpu = 0; // XXX 1371 msg.type = VIODEV_MSG_KICK; 1372 msg.state = INTR_STATE_ASSERT; 1373 1374 ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 1375 &msg, sizeof(msg), ev_base_main); 1376 if (ret == -1) 1377 log_warnx("%s: failed to assert irq %d", __func__, dev->irq); 1378 } 1379 1380 /* 1381 * Message the vm process asking to lower the irq. Must be called from the main 1382 * thread. 1383 */ 1384 static void 1385 vionet_deassert_pic_irq(struct virtio_dev *dev) 1386 { 1387 struct viodev_msg msg; 1388 int ret; 1389 1390 memset(&msg, 0, sizeof(msg)); 1391 msg.irq = dev->irq; 1392 msg.vcpu = 0; // XXX 1393 msg.type = VIODEV_MSG_KICK; 1394 msg.state = INTR_STATE_DEASSERT; 1395 1396 ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 1397 &msg, sizeof(msg), ev_base_main); 1398 if (ret == -1) 1399 log_warnx("%s: failed to assert irq %d", __func__, dev->irq); 1400 } 1401