1 /* $OpenBSD: virtio.c,v 1.99 2022/12/28 21:30:19 jmc Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* PAGE_SIZE */ 20 #include <sys/socket.h> 21 22 #include <machine/vmmvar.h> 23 #include <dev/pci/pcireg.h> 24 #include <dev/pci/pcidevs.h> 25 #include <dev/pv/virtioreg.h> 26 #include <dev/pci/virtio_pcireg.h> 27 #include <dev/pv/vioblkreg.h> 28 #include <dev/pv/vioscsireg.h> 29 30 #include <net/if.h> 31 #include <netinet/in.h> 32 #include <netinet/if_ether.h> 33 #include <netinet/ip.h> 34 35 #include <errno.h> 36 #include <event.h> 37 #include <poll.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #include "atomicio.h" 44 #include "pci.h" 45 #include "vioscsi.h" 46 #include "virtio.h" 47 #include "vmd.h" 48 #include "vmm.h" 49 50 extern char *__progname; 51 struct viornd_dev viornd; 52 struct vioblk_dev *vioblk; 53 struct vionet_dev *vionet; 54 struct vioscsi_dev *vioscsi; 55 struct vmmci_dev vmmci; 56 57 int nr_vionet; 58 int nr_vioblk; 59 60 #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ 61 62 #define VIRTIO_NET_F_MAC (1<<5) 63 64 #define VMMCI_F_TIMESYNC (1<<0) 65 #define VMMCI_F_ACK (1<<1) 66 #define VMMCI_F_SYNCRTC (1<<2) 67 68 #define RXQ 0 69 #define TXQ 1 70 71 const char * 72 vioblk_cmd_name(uint32_t type) 73 { 74 switch (type) { 75 case VIRTIO_BLK_T_IN: return "read"; 76 case VIRTIO_BLK_T_OUT: return "write"; 77 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 78 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 79 case VIRTIO_BLK_T_FLUSH: return "flush"; 80 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 81 case VIRTIO_BLK_T_GET_ID: return "get id"; 82 default: return "unknown"; 83 } 84 } 85 86 static const char * 87 virtio_reg_name(uint8_t reg) 88 { 89 switch (reg) { 90 case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; 91 case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; 92 case VIRTIO_CONFIG_QUEUE_PFN: return "queue address"; 93 case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; 94 case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; 95 case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; 96 case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; 97 case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; 98 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "device config 0"; 99 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "device config 1"; 100 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2"; 101 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3"; 102 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4"; 103 default: return "unknown"; 104 } 105 } 106 107 uint32_t 108 vring_size(uint32_t vq_size) 109 { 110 uint32_t allocsize1, allocsize2; 111 112 /* allocsize1: descriptor table + avail ring + pad */ 113 allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size 114 + sizeof(uint16_t) * (2 + vq_size)); 115 /* allocsize2: used ring + pad */ 116 allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2 117 + sizeof(struct vring_used_elem) * vq_size); 118 119 return allocsize1 + allocsize2; 120 } 121 122 /* Update queue select */ 123 void 124 viornd_update_qs(void) 125 { 126 struct virtio_vq_info *vq_info; 127 128 /* Invalid queue? */ 129 if (viornd.cfg.queue_select > 0) { 130 viornd.cfg.queue_size = 0; 131 return; 132 } 133 134 vq_info = &viornd.vq[viornd.cfg.queue_select]; 135 136 /* Update queue pfn/size based on queue select */ 137 viornd.cfg.queue_pfn = vq_info->q_gpa >> 12; 138 viornd.cfg.queue_size = vq_info->qs; 139 } 140 141 /* Update queue address */ 142 void 143 viornd_update_qa(void) 144 { 145 struct virtio_vq_info *vq_info; 146 void *hva = NULL; 147 148 /* Invalid queue? */ 149 if (viornd.cfg.queue_select > 0) 150 return; 151 152 vq_info = &viornd.vq[viornd.cfg.queue_select]; 153 vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE; 154 155 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE)); 156 if (hva == NULL) 157 fatal("viornd_update_qa"); 158 vq_info->q_hva = hva; 159 } 160 161 int 162 viornd_notifyq(void) 163 { 164 size_t sz; 165 int dxx, ret; 166 uint16_t aidx, uidx; 167 char *vr, *rnd_data; 168 struct vring_desc *desc; 169 struct vring_avail *avail; 170 struct vring_used *used; 171 struct virtio_vq_info *vq_info; 172 173 ret = 0; 174 175 /* Invalid queue? */ 176 if (viornd.cfg.queue_notify > 0) 177 return (0); 178 179 vq_info = &viornd.vq[viornd.cfg.queue_notify]; 180 vr = vq_info->q_hva; 181 if (vr == NULL) 182 fatalx("%s: null vring", __func__); 183 184 desc = (struct vring_desc *)(vr); 185 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 186 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 187 188 aidx = avail->idx & VIORND_QUEUE_MASK; 189 uidx = used->idx & VIORND_QUEUE_MASK; 190 191 dxx = avail->ring[aidx] & VIORND_QUEUE_MASK; 192 193 sz = desc[dxx].len; 194 if (sz > MAXPHYS) 195 fatalx("viornd descriptor size too large (%zu)", sz); 196 197 rnd_data = malloc(sz); 198 199 if (rnd_data != NULL) { 200 arc4random_buf(rnd_data, sz); 201 if (write_mem(desc[dxx].addr, rnd_data, sz)) { 202 log_warnx("viornd: can't write random data @ " 203 "0x%llx", 204 desc[dxx].addr); 205 } else { 206 /* ret == 1 -> interrupt needed */ 207 /* XXX check VIRTIO_F_NO_INTR */ 208 ret = 1; 209 viornd.cfg.isr_status = 1; 210 used->ring[uidx].id = dxx; 211 used->ring[uidx].len = sz; 212 __sync_synchronize(); 213 used->idx++; 214 } 215 free(rnd_data); 216 } else 217 fatal("memory allocation error for viornd data"); 218 219 return (ret); 220 } 221 222 int 223 virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 224 void *unused, uint8_t sz) 225 { 226 *intr = 0xFF; 227 228 if (dir == 0) { 229 switch (reg) { 230 case VIRTIO_CONFIG_DEVICE_FEATURES: 231 case VIRTIO_CONFIG_QUEUE_SIZE: 232 case VIRTIO_CONFIG_ISR_STATUS: 233 log_warnx("%s: illegal write %x to %s", 234 __progname, *data, virtio_reg_name(reg)); 235 break; 236 case VIRTIO_CONFIG_GUEST_FEATURES: 237 viornd.cfg.guest_feature = *data; 238 break; 239 case VIRTIO_CONFIG_QUEUE_PFN: 240 viornd.cfg.queue_pfn = *data; 241 viornd_update_qa(); 242 break; 243 case VIRTIO_CONFIG_QUEUE_SELECT: 244 viornd.cfg.queue_select = *data; 245 viornd_update_qs(); 246 break; 247 case VIRTIO_CONFIG_QUEUE_NOTIFY: 248 viornd.cfg.queue_notify = *data; 249 if (viornd_notifyq()) 250 *intr = 1; 251 break; 252 case VIRTIO_CONFIG_DEVICE_STATUS: 253 viornd.cfg.device_status = *data; 254 break; 255 } 256 } else { 257 switch (reg) { 258 case VIRTIO_CONFIG_DEVICE_FEATURES: 259 *data = viornd.cfg.device_feature; 260 break; 261 case VIRTIO_CONFIG_GUEST_FEATURES: 262 *data = viornd.cfg.guest_feature; 263 break; 264 case VIRTIO_CONFIG_QUEUE_PFN: 265 *data = viornd.cfg.queue_pfn; 266 break; 267 case VIRTIO_CONFIG_QUEUE_SIZE: 268 *data = viornd.cfg.queue_size; 269 break; 270 case VIRTIO_CONFIG_QUEUE_SELECT: 271 *data = viornd.cfg.queue_select; 272 break; 273 case VIRTIO_CONFIG_QUEUE_NOTIFY: 274 *data = viornd.cfg.queue_notify; 275 break; 276 case VIRTIO_CONFIG_DEVICE_STATUS: 277 *data = viornd.cfg.device_status; 278 break; 279 case VIRTIO_CONFIG_ISR_STATUS: 280 *data = viornd.cfg.isr_status; 281 viornd.cfg.isr_status = 0; 282 vcpu_deassert_pic_irq(viornd.vm_id, 0, viornd.irq); 283 break; 284 } 285 } 286 return (0); 287 } 288 289 void 290 vioblk_update_qa(struct vioblk_dev *dev) 291 { 292 struct virtio_vq_info *vq_info; 293 void *hva = NULL; 294 295 /* Invalid queue? */ 296 if (dev->cfg.queue_select > 0) 297 return; 298 299 vq_info = &dev->vq[dev->cfg.queue_select]; 300 vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; 301 302 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE)); 303 if (hva == NULL) 304 fatal("vioblk_update_qa"); 305 vq_info->q_hva = hva; 306 } 307 308 void 309 vioblk_update_qs(struct vioblk_dev *dev) 310 { 311 struct virtio_vq_info *vq_info; 312 313 /* Invalid queue? */ 314 if (dev->cfg.queue_select > 0) { 315 dev->cfg.queue_size = 0; 316 return; 317 } 318 319 vq_info = &dev->vq[dev->cfg.queue_select]; 320 321 /* Update queue pfn/size based on queue select */ 322 dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 323 dev->cfg.queue_size = vq_info->qs; 324 } 325 326 static void 327 vioblk_free_info(struct ioinfo *info) 328 { 329 if (!info) 330 return; 331 free(info->buf); 332 free(info); 333 } 334 335 static struct ioinfo * 336 vioblk_start_read(struct vioblk_dev *dev, off_t sector, size_t sz) 337 { 338 struct ioinfo *info; 339 340 /* Limit to 64M for now */ 341 if (sz > (1 << 26)) { 342 log_warnx("%s: read size exceeded 64M", __func__); 343 return (NULL); 344 } 345 346 info = calloc(1, sizeof(*info)); 347 if (!info) 348 goto nomem; 349 info->buf = malloc(sz); 350 if (info->buf == NULL) 351 goto nomem; 352 info->len = sz; 353 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 354 info->file = &dev->file; 355 356 return info; 357 358 nomem: 359 free(info); 360 log_warn("malloc error vioblk read"); 361 return (NULL); 362 } 363 364 365 static const uint8_t * 366 vioblk_finish_read(struct ioinfo *info) 367 { 368 struct virtio_backing *file; 369 370 file = info->file; 371 if (file->pread(file->p, info->buf, info->len, info->offset) != info->len) { 372 info->error = errno; 373 log_warn("vioblk read error"); 374 return NULL; 375 } 376 377 return info->buf; 378 } 379 380 static struct ioinfo * 381 vioblk_start_write(struct vioblk_dev *dev, off_t sector, 382 paddr_t addr, size_t len) 383 { 384 struct ioinfo *info; 385 386 /* Limit to 64M for now */ 387 if (len > (1 << 26)) { 388 log_warnx("%s: write size exceeded 64M", __func__); 389 return (NULL); 390 } 391 392 info = calloc(1, sizeof(*info)); 393 if (!info) 394 goto nomem; 395 396 info->buf = malloc(len); 397 if (info->buf == NULL) 398 goto nomem; 399 info->len = len; 400 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 401 info->file = &dev->file; 402 403 if (read_mem(addr, info->buf, info->len)) { 404 vioblk_free_info(info); 405 return NULL; 406 } 407 408 return info; 409 410 nomem: 411 free(info); 412 log_warn("malloc error vioblk write"); 413 return (NULL); 414 } 415 416 static int 417 vioblk_finish_write(struct ioinfo *info) 418 { 419 struct virtio_backing *file; 420 421 file = info->file; 422 if (file->pwrite(file->p, info->buf, info->len, info->offset) != info->len) { 423 log_warn("vioblk write error"); 424 return EIO; 425 } 426 return 0; 427 } 428 429 /* 430 * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can 431 */ 432 int 433 vioblk_notifyq(struct vioblk_dev *dev) 434 { 435 uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; 436 uint8_t ds; 437 int cnt; 438 off_t secbias; 439 char *vr; 440 struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; 441 struct vring_avail *avail; 442 struct vring_used *used; 443 struct virtio_blk_req_hdr cmd; 444 struct virtio_vq_info *vq_info; 445 446 /* Invalid queue? */ 447 if (dev->cfg.queue_notify > 0) 448 return (0); 449 450 vq_info = &dev->vq[dev->cfg.queue_notify]; 451 vr = vq_info->q_hva; 452 if (vr == NULL) 453 fatalx("%s: null vring", __func__); 454 455 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 456 desc = (struct vring_desc *)(vr); 457 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 458 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 459 460 idx = vq_info->last_avail & VIOBLK_QUEUE_MASK; 461 462 if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { 463 log_debug("%s - nothing to do?", __func__); 464 return (0); 465 } 466 467 while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { 468 469 ds = VIRTIO_BLK_S_IOERR; 470 cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; 471 cmd_desc = &desc[cmd_desc_idx]; 472 473 if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { 474 log_warnx("unchained vioblk cmd descriptor received " 475 "(idx %d)", cmd_desc_idx); 476 goto out; 477 } 478 479 /* Read command from descriptor ring */ 480 if (cmd_desc->flags & VRING_DESC_F_WRITE) { 481 log_warnx("vioblk: unexpected writable cmd descriptor " 482 "%d", cmd_desc_idx); 483 goto out; 484 } 485 if (read_mem(cmd_desc->addr, &cmd, sizeof(cmd))) { 486 log_warnx("vioblk: command read_mem error @ 0x%llx", 487 cmd_desc->addr); 488 goto out; 489 } 490 491 switch (cmd.type) { 492 case VIRTIO_BLK_T_IN: 493 /* first descriptor */ 494 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 495 secdata_desc = &desc[secdata_desc_idx]; 496 497 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 498 log_warnx("unchained vioblk data descriptor " 499 "received (idx %d)", cmd_desc_idx); 500 goto out; 501 } 502 503 cnt = 0; 504 secbias = 0; 505 do { 506 struct ioinfo *info; 507 const uint8_t *secdata; 508 509 if ((secdata_desc->flags & VRING_DESC_F_WRITE) 510 == 0) { 511 log_warnx("vioblk: unwritable data " 512 "descriptor %d", secdata_desc_idx); 513 goto out; 514 } 515 516 info = vioblk_start_read(dev, 517 cmd.sector + secbias, secdata_desc->len); 518 519 if (info == NULL) { 520 log_warnx("vioblk: can't start read"); 521 goto out; 522 } 523 524 /* read the data, use current data descriptor */ 525 secdata = vioblk_finish_read(info); 526 if (secdata == NULL) { 527 vioblk_free_info(info); 528 log_warnx("vioblk: block read error, " 529 "sector %lld", cmd.sector); 530 goto out; 531 } 532 533 if (write_mem(secdata_desc->addr, secdata, 534 secdata_desc->len)) { 535 log_warnx("can't write sector " 536 "data to gpa @ 0x%llx", 537 secdata_desc->addr); 538 vioblk_free_info(info); 539 goto out; 540 } 541 542 vioblk_free_info(info); 543 544 secbias += (secdata_desc->len / 545 VIRTIO_BLK_SECTOR_SIZE); 546 secdata_desc_idx = secdata_desc->next & 547 VIOBLK_QUEUE_MASK; 548 secdata_desc = &desc[secdata_desc_idx]; 549 550 /* Guard against infinite chains */ 551 if (++cnt >= VIOBLK_QUEUE_SIZE) { 552 log_warnx("%s: descriptor table " 553 "invalid", __func__); 554 goto out; 555 } 556 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 557 558 ds_desc_idx = secdata_desc_idx; 559 ds_desc = secdata_desc; 560 561 ds = VIRTIO_BLK_S_OK; 562 break; 563 case VIRTIO_BLK_T_OUT: 564 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 565 secdata_desc = &desc[secdata_desc_idx]; 566 567 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 568 log_warnx("wr vioblk: unchained vioblk data " 569 "descriptor received (idx %d)", 570 cmd_desc_idx); 571 goto out; 572 } 573 574 if (secdata_desc->len > dev->max_xfer) { 575 log_warnx("%s: invalid read size %d requested", 576 __func__, secdata_desc->len); 577 goto out; 578 } 579 580 cnt = 0; 581 secbias = 0; 582 do { 583 struct ioinfo *info; 584 585 if (secdata_desc->flags & VRING_DESC_F_WRITE) { 586 log_warnx("wr vioblk: unexpected " 587 "writable data descriptor %d", 588 secdata_desc_idx); 589 goto out; 590 } 591 592 info = vioblk_start_write(dev, 593 cmd.sector + secbias, 594 secdata_desc->addr, secdata_desc->len); 595 596 if (info == NULL) { 597 log_warnx("wr vioblk: can't read " 598 "sector data @ 0x%llx", 599 secdata_desc->addr); 600 goto out; 601 } 602 603 if (vioblk_finish_write(info)) { 604 log_warnx("wr vioblk: disk write " 605 "error"); 606 vioblk_free_info(info); 607 goto out; 608 } 609 610 vioblk_free_info(info); 611 612 secbias += secdata_desc->len / 613 VIRTIO_BLK_SECTOR_SIZE; 614 615 secdata_desc_idx = secdata_desc->next & 616 VIOBLK_QUEUE_MASK; 617 secdata_desc = &desc[secdata_desc_idx]; 618 619 /* Guard against infinite chains */ 620 if (++cnt >= VIOBLK_QUEUE_SIZE) { 621 log_warnx("%s: descriptor table " 622 "invalid", __func__); 623 goto out; 624 } 625 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 626 627 ds_desc_idx = secdata_desc_idx; 628 ds_desc = secdata_desc; 629 630 ds = VIRTIO_BLK_S_OK; 631 break; 632 case VIRTIO_BLK_T_FLUSH: 633 case VIRTIO_BLK_T_FLUSH_OUT: 634 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 635 ds_desc = &desc[ds_desc_idx]; 636 637 ds = VIRTIO_BLK_S_UNSUPP; 638 break; 639 case VIRTIO_BLK_T_GET_ID: 640 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 641 secdata_desc = &desc[secdata_desc_idx]; 642 643 /* 644 * We don't support this command yet. While it's not 645 * officially part of the virtio spec (will be in v1.2) 646 * there's no feature to negotiate. Linux drivers will 647 * often send this command regardless. 648 * 649 * When the command is received, it should appear as a 650 * chain of 3 descriptors, similar to the IN/OUT 651 * commands. The middle descriptor should have have a 652 * length of VIRTIO_BLK_ID_BYTES bytes. 653 */ 654 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 655 log_warnx("id vioblk: unchained vioblk data " 656 "descriptor received (idx %d)", 657 cmd_desc_idx); 658 goto out; 659 } 660 661 /* Skip the data descriptor. */ 662 ds_desc_idx = secdata_desc->next & VIOBLK_QUEUE_MASK; 663 ds_desc = &desc[ds_desc_idx]; 664 665 ds = VIRTIO_BLK_S_UNSUPP; 666 break; 667 default: 668 log_warnx("%s: unsupported command 0x%x", __func__, 669 cmd.type); 670 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 671 ds_desc = &desc[ds_desc_idx]; 672 673 ds = VIRTIO_BLK_S_UNSUPP; 674 break; 675 } 676 677 if ((ds_desc->flags & VRING_DESC_F_WRITE) == 0) { 678 log_warnx("%s: ds descriptor %d unwritable", __func__, 679 ds_desc_idx); 680 goto out; 681 } 682 if (write_mem(ds_desc->addr, &ds, sizeof(ds))) { 683 log_warnx("%s: can't write device status data @ 0x%llx", 684 __func__, ds_desc->addr); 685 goto out; 686 } 687 688 dev->cfg.isr_status = 1; 689 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 690 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; 691 __sync_synchronize(); 692 used->idx++; 693 694 vq_info->last_avail = avail->idx & VIOBLK_QUEUE_MASK; 695 idx = (idx + 1) & VIOBLK_QUEUE_MASK; 696 } 697 out: 698 return (1); 699 } 700 701 int 702 virtio_blk_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 703 void *cookie, uint8_t sz) 704 { 705 struct vioblk_dev *dev = (struct vioblk_dev *)cookie; 706 707 *intr = 0xFF; 708 709 710 if (dir == 0) { 711 switch (reg) { 712 case VIRTIO_CONFIG_DEVICE_FEATURES: 713 case VIRTIO_CONFIG_QUEUE_SIZE: 714 case VIRTIO_CONFIG_ISR_STATUS: 715 log_warnx("%s: illegal write %x to %s", 716 __progname, *data, virtio_reg_name(reg)); 717 break; 718 case VIRTIO_CONFIG_GUEST_FEATURES: 719 dev->cfg.guest_feature = *data; 720 break; 721 case VIRTIO_CONFIG_QUEUE_PFN: 722 dev->cfg.queue_pfn = *data; 723 vioblk_update_qa(dev); 724 break; 725 case VIRTIO_CONFIG_QUEUE_SELECT: 726 dev->cfg.queue_select = *data; 727 vioblk_update_qs(dev); 728 break; 729 case VIRTIO_CONFIG_QUEUE_NOTIFY: 730 dev->cfg.queue_notify = *data; 731 if (vioblk_notifyq(dev)) 732 *intr = 1; 733 break; 734 case VIRTIO_CONFIG_DEVICE_STATUS: 735 dev->cfg.device_status = *data; 736 if (dev->cfg.device_status == 0) { 737 log_debug("%s: device reset", __func__); 738 dev->cfg.guest_feature = 0; 739 dev->cfg.queue_pfn = 0; 740 vioblk_update_qa(dev); 741 dev->cfg.queue_size = 0; 742 vioblk_update_qs(dev); 743 dev->cfg.queue_select = 0; 744 dev->cfg.queue_notify = 0; 745 dev->cfg.isr_status = 0; 746 dev->vq[0].last_avail = 0; 747 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 748 } 749 break; 750 default: 751 break; 752 } 753 } else { 754 switch (reg) { 755 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 756 switch (sz) { 757 case 4: 758 *data = (uint32_t)(dev->sz); 759 break; 760 case 2: 761 *data &= 0xFFFF0000; 762 *data |= (uint32_t)(dev->sz) & 0xFFFF; 763 break; 764 case 1: 765 *data &= 0xFFFFFF00; 766 *data |= (uint32_t)(dev->sz) & 0xFF; 767 break; 768 } 769 /* XXX handle invalid sz */ 770 break; 771 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 772 if (sz == 1) { 773 *data &= 0xFFFFFF00; 774 *data |= (uint32_t)(dev->sz >> 8) & 0xFF; 775 } 776 /* XXX handle invalid sz */ 777 break; 778 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 779 if (sz == 1) { 780 *data &= 0xFFFFFF00; 781 *data |= (uint32_t)(dev->sz >> 16) & 0xFF; 782 } else if (sz == 2) { 783 *data &= 0xFFFF0000; 784 *data |= (uint32_t)(dev->sz >> 16) & 0xFFFF; 785 } 786 /* XXX handle invalid sz */ 787 break; 788 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 789 if (sz == 1) { 790 *data &= 0xFFFFFF00; 791 *data |= (uint32_t)(dev->sz >> 24) & 0xFF; 792 } 793 /* XXX handle invalid sz */ 794 break; 795 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 796 switch (sz) { 797 case 4: 798 *data = (uint32_t)(dev->sz >> 32); 799 break; 800 case 2: 801 *data &= 0xFFFF0000; 802 *data |= (uint32_t)(dev->sz >> 32) & 0xFFFF; 803 break; 804 case 1: 805 *data &= 0xFFFFFF00; 806 *data |= (uint32_t)(dev->sz >> 32) & 0xFF; 807 break; 808 } 809 /* XXX handle invalid sz */ 810 break; 811 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 812 if (sz == 1) { 813 *data &= 0xFFFFFF00; 814 *data |= (uint32_t)(dev->sz >> 40) & 0xFF; 815 } 816 /* XXX handle invalid sz */ 817 break; 818 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: 819 if (sz == 1) { 820 *data &= 0xFFFFFF00; 821 *data |= (uint32_t)(dev->sz >> 48) & 0xFF; 822 } else if (sz == 2) { 823 *data &= 0xFFFF0000; 824 *data |= (uint32_t)(dev->sz >> 48) & 0xFFFF; 825 } 826 /* XXX handle invalid sz */ 827 break; 828 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: 829 if (sz == 1) { 830 *data &= 0xFFFFFF00; 831 *data |= (uint32_t)(dev->sz >> 56) & 0xFF; 832 } 833 /* XXX handle invalid sz */ 834 break; 835 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 836 switch (sz) { 837 case 4: 838 *data = (uint32_t)(dev->max_xfer); 839 break; 840 case 2: 841 *data &= 0xFFFF0000; 842 *data |= (uint32_t)(dev->max_xfer) & 0xFFFF; 843 break; 844 case 1: 845 *data &= 0xFFFFFF00; 846 *data |= (uint32_t)(dev->max_xfer) & 0xFF; 847 break; 848 } 849 /* XXX handle invalid sz */ 850 break; 851 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9: 852 if (sz == 1) { 853 *data &= 0xFFFFFF00; 854 *data |= (uint32_t)(dev->max_xfer >> 8) & 0xFF; 855 } 856 /* XXX handle invalid sz */ 857 break; 858 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10: 859 if (sz == 1) { 860 *data &= 0xFFFFFF00; 861 *data |= (uint32_t)(dev->max_xfer >> 16) & 0xFF; 862 } else if (sz == 2) { 863 *data &= 0xFFFF0000; 864 *data |= (uint32_t)(dev->max_xfer >> 16) 865 & 0xFFFF; 866 } 867 /* XXX handle invalid sz */ 868 break; 869 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11: 870 if (sz == 1) { 871 *data &= 0xFFFFFF00; 872 *data |= (uint32_t)(dev->max_xfer >> 24) & 0xFF; 873 } 874 /* XXX handle invalid sz */ 875 break; 876 case VIRTIO_CONFIG_DEVICE_FEATURES: 877 *data = dev->cfg.device_feature; 878 break; 879 case VIRTIO_CONFIG_GUEST_FEATURES: 880 *data = dev->cfg.guest_feature; 881 break; 882 case VIRTIO_CONFIG_QUEUE_PFN: 883 *data = dev->cfg.queue_pfn; 884 break; 885 case VIRTIO_CONFIG_QUEUE_SIZE: 886 if (sz == 4) 887 *data = dev->cfg.queue_size; 888 else if (sz == 2) { 889 *data &= 0xFFFF0000; 890 *data |= (uint16_t)dev->cfg.queue_size; 891 } else if (sz == 1) { 892 *data &= 0xFFFFFF00; 893 *data |= (uint8_t)dev->cfg.queue_size; 894 } 895 break; 896 case VIRTIO_CONFIG_QUEUE_SELECT: 897 *data = dev->cfg.queue_select; 898 break; 899 case VIRTIO_CONFIG_QUEUE_NOTIFY: 900 *data = dev->cfg.queue_notify; 901 break; 902 case VIRTIO_CONFIG_DEVICE_STATUS: 903 if (sz == 4) 904 *data = dev->cfg.device_status; 905 else if (sz == 2) { 906 *data &= 0xFFFF0000; 907 *data |= (uint16_t)dev->cfg.device_status; 908 } else if (sz == 1) { 909 *data &= 0xFFFFFF00; 910 *data |= (uint8_t)dev->cfg.device_status; 911 } 912 break; 913 case VIRTIO_CONFIG_ISR_STATUS: 914 *data = dev->cfg.isr_status; 915 dev->cfg.isr_status = 0; 916 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 917 break; 918 } 919 } 920 return (0); 921 } 922 923 int 924 virtio_net_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 925 void *cookie, uint8_t sz) 926 { 927 struct vionet_dev *dev = (struct vionet_dev *)cookie; 928 929 *intr = 0xFF; 930 mutex_lock(&dev->mutex); 931 932 if (dir == 0) { 933 switch (reg) { 934 case VIRTIO_CONFIG_DEVICE_FEATURES: 935 case VIRTIO_CONFIG_QUEUE_SIZE: 936 case VIRTIO_CONFIG_ISR_STATUS: 937 log_warnx("%s: illegal write %x to %s", 938 __progname, *data, virtio_reg_name(reg)); 939 break; 940 case VIRTIO_CONFIG_GUEST_FEATURES: 941 dev->cfg.guest_feature = *data; 942 break; 943 case VIRTIO_CONFIG_QUEUE_PFN: 944 dev->cfg.queue_pfn = *data; 945 vionet_update_qa(dev); 946 break; 947 case VIRTIO_CONFIG_QUEUE_SELECT: 948 dev->cfg.queue_select = *data; 949 vionet_update_qs(dev); 950 break; 951 case VIRTIO_CONFIG_QUEUE_NOTIFY: 952 dev->cfg.queue_notify = *data; 953 if (vionet_notifyq(dev)) 954 *intr = 1; 955 break; 956 case VIRTIO_CONFIG_DEVICE_STATUS: 957 dev->cfg.device_status = *data; 958 if (dev->cfg.device_status == 0) { 959 log_debug("%s: device reset", __func__); 960 dev->cfg.guest_feature = 0; 961 dev->cfg.queue_pfn = 0; 962 vionet_update_qa(dev); 963 dev->cfg.queue_size = 0; 964 vionet_update_qs(dev); 965 dev->cfg.queue_select = 0; 966 dev->cfg.queue_notify = 0; 967 dev->cfg.isr_status = 0; 968 dev->vq[RXQ].last_avail = 0; 969 dev->vq[RXQ].notified_avail = 0; 970 dev->vq[TXQ].last_avail = 0; 971 dev->vq[TXQ].notified_avail = 0; 972 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 973 } 974 break; 975 default: 976 break; 977 } 978 } else { 979 switch (reg) { 980 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 981 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 982 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 983 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 984 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 985 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 986 *data = dev->mac[reg - 987 VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; 988 break; 989 case VIRTIO_CONFIG_DEVICE_FEATURES: 990 *data = dev->cfg.device_feature; 991 break; 992 case VIRTIO_CONFIG_GUEST_FEATURES: 993 *data = dev->cfg.guest_feature; 994 break; 995 case VIRTIO_CONFIG_QUEUE_PFN: 996 *data = dev->cfg.queue_pfn; 997 break; 998 case VIRTIO_CONFIG_QUEUE_SIZE: 999 *data = dev->cfg.queue_size; 1000 break; 1001 case VIRTIO_CONFIG_QUEUE_SELECT: 1002 *data = dev->cfg.queue_select; 1003 break; 1004 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1005 *data = dev->cfg.queue_notify; 1006 break; 1007 case VIRTIO_CONFIG_DEVICE_STATUS: 1008 *data = dev->cfg.device_status; 1009 break; 1010 case VIRTIO_CONFIG_ISR_STATUS: 1011 *data = dev->cfg.isr_status; 1012 dev->cfg.isr_status = 0; 1013 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 1014 break; 1015 } 1016 } 1017 1018 mutex_unlock(&dev->mutex); 1019 return (0); 1020 } 1021 1022 /* 1023 * Must be called with dev->mutex acquired. 1024 */ 1025 void 1026 vionet_update_qa(struct vionet_dev *dev) 1027 { 1028 struct virtio_vq_info *vq_info; 1029 void *hva = NULL; 1030 1031 /* Invalid queue? */ 1032 if (dev->cfg.queue_select > 1) 1033 return; 1034 1035 vq_info = &dev->vq[dev->cfg.queue_select]; 1036 vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; 1037 1038 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE)); 1039 if (hva == NULL) 1040 fatal("vionet_update_qa"); 1041 vq_info->q_hva = hva; 1042 } 1043 1044 /* 1045 * Must be called with dev->mutex acquired. 1046 */ 1047 void 1048 vionet_update_qs(struct vionet_dev *dev) 1049 { 1050 struct virtio_vq_info *vq_info; 1051 1052 /* Invalid queue? */ 1053 if (dev->cfg.queue_select > 1) { 1054 dev->cfg.queue_size = 0; 1055 return; 1056 } 1057 1058 vq_info = &dev->vq[dev->cfg.queue_select]; 1059 1060 /* Update queue pfn/size based on queue select */ 1061 dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 1062 dev->cfg.queue_size = vq_info->qs; 1063 } 1064 1065 /* 1066 * vionet_enq_rx 1067 * 1068 * Take a given packet from the host-side tap and copy it into the guest's 1069 * buffers utilizing the rx virtio ring. If the packet length is invalid 1070 * (too small or too large) or if there are not enough buffers available, 1071 * the packet is dropped. 1072 * 1073 * Must be called with dev->mutex acquired. 1074 */ 1075 int 1076 vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc) 1077 { 1078 uint16_t dxx, idx, hdr_desc_idx, chain_hdr_idx; 1079 char *vr = NULL; 1080 size_t bufsz = 0, off = 0, pkt_offset = 0, chunk_size = 0; 1081 size_t chain_len = 0; 1082 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1083 struct vring_avail *avail; 1084 struct vring_used *used; 1085 struct virtio_vq_info *vq_info; 1086 struct virtio_net_hdr hdr; 1087 size_t hdr_sz; 1088 1089 if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) { 1090 log_warn("%s: invalid packet size", __func__); 1091 return (0); 1092 } 1093 1094 hdr_sz = sizeof(hdr); 1095 1096 if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK)) 1097 return (0); 1098 1099 vq_info = &dev->vq[RXQ]; 1100 vr = vq_info->q_hva; 1101 if (vr == NULL) 1102 fatalx("%s: null vring", __func__); 1103 1104 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1105 desc = (struct vring_desc *)(vr); 1106 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 1107 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 1108 1109 idx = vq_info->last_avail & VIONET_QUEUE_MASK; 1110 if ((vq_info->notified_avail & VIONET_QUEUE_MASK) == idx) { 1111 log_debug("%s: insufficient available buffer capacity, " 1112 "dropping packet.", __func__); 1113 return (0); 1114 } 1115 1116 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1117 hdr_desc = &desc[hdr_desc_idx]; 1118 1119 dxx = hdr_desc_idx; 1120 chain_hdr_idx = dxx; 1121 chain_len = 0; 1122 1123 /* Process the descriptor and walk any potential chain. */ 1124 do { 1125 off = 0; 1126 pkt_desc = &desc[dxx]; 1127 if (!(pkt_desc->flags & VRING_DESC_F_WRITE)) { 1128 log_warnx("%s: invalid descriptor, not writable", 1129 __func__); 1130 return (0); 1131 } 1132 1133 /* How much data do we get to write? */ 1134 if (sz - bufsz > pkt_desc->len) 1135 chunk_size = pkt_desc->len; 1136 else 1137 chunk_size = sz - bufsz; 1138 1139 if (chain_len == 0) { 1140 off = hdr_sz; 1141 if (chunk_size == pkt_desc->len) 1142 chunk_size -= off; 1143 } 1144 1145 /* Write a chunk of data if we need to */ 1146 if (chunk_size && write_mem(pkt_desc->addr + off, 1147 pkt + pkt_offset, chunk_size)) { 1148 log_warnx("%s: failed to write to buffer 0x%llx", 1149 __func__, pkt_desc->addr); 1150 return (0); 1151 } 1152 1153 chain_len += chunk_size + off; 1154 bufsz += chunk_size; 1155 pkt_offset += chunk_size; 1156 1157 dxx = pkt_desc->next & VIONET_QUEUE_MASK; 1158 } while (bufsz < sz && pkt_desc->flags & VRING_DESC_F_NEXT); 1159 1160 /* Move our marker in the ring...*/ 1161 vq_info->last_avail = (vq_info->last_avail + 1) & 1162 VIONET_QUEUE_MASK; 1163 1164 /* Prepend the virtio net header in the first buffer. */ 1165 memset(&hdr, 0, sizeof(hdr)); 1166 hdr.hdr_len = hdr_sz; 1167 if (write_mem(hdr_desc->addr, &hdr, hdr_sz)) { 1168 log_warnx("vionet: rx enq header write_mem error @ 0x%llx", 1169 hdr_desc->addr); 1170 return (0); 1171 } 1172 1173 /* Update the index field in the used ring. This must be done last. */ 1174 dev->cfg.isr_status = 1; 1175 *spc = (vq_info->notified_avail - vq_info->last_avail) 1176 & VIONET_QUEUE_MASK; 1177 1178 /* Update the list of used buffers. */ 1179 used->ring[used->idx & VIONET_QUEUE_MASK].id = chain_hdr_idx; 1180 used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len; 1181 __sync_synchronize(); 1182 used->idx++; 1183 1184 return (1); 1185 } 1186 1187 /* 1188 * vionet_rx 1189 * 1190 * Enqueue data that was received on a tap file descriptor 1191 * to the vionet device queue. 1192 * 1193 * Must be called with dev->mutex acquired. 1194 */ 1195 static int 1196 vionet_rx(struct vionet_dev *dev) 1197 { 1198 char buf[PAGE_SIZE]; 1199 int num_enq = 0, spc = 0; 1200 struct ether_header *eh; 1201 ssize_t sz; 1202 1203 do { 1204 sz = read(dev->fd, buf, sizeof(buf)); 1205 if (sz == -1) { 1206 /* 1207 * If we get EAGAIN, No data is currently available. 1208 * Do not treat this as an error. 1209 */ 1210 if (errno != EAGAIN) 1211 log_warn("unexpected read error on vionet " 1212 "device"); 1213 } else if (sz > 0) { 1214 eh = (struct ether_header *)buf; 1215 if (!dev->lockedmac || 1216 ETHER_IS_MULTICAST(eh->ether_dhost) || 1217 memcmp(eh->ether_dhost, dev->mac, 1218 sizeof(eh->ether_dhost)) == 0) 1219 num_enq += vionet_enq_rx(dev, buf, sz, &spc); 1220 } else if (sz == 0) { 1221 log_debug("process_rx: no data"); 1222 break; 1223 } 1224 } while (spc > 0 && sz > 0); 1225 1226 return (num_enq); 1227 } 1228 1229 /* 1230 * vionet_rx_event 1231 * 1232 * Called from the event handling thread when new data can be 1233 * received on the tap fd of a vionet device. 1234 */ 1235 static void 1236 vionet_rx_event(int fd, short kind, void *arg) 1237 { 1238 struct vionet_dev *dev = arg; 1239 1240 mutex_lock(&dev->mutex); 1241 1242 if (vionet_rx(dev) > 0) { 1243 /* XXX: vcpu_id */ 1244 vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); 1245 } 1246 1247 mutex_unlock(&dev->mutex); 1248 } 1249 1250 /* 1251 * Must be called with dev->mutex acquired. 1252 */ 1253 void 1254 vionet_notify_rx(struct vionet_dev *dev) 1255 { 1256 char *vr; 1257 struct vring_avail *avail; 1258 struct virtio_vq_info *vq_info; 1259 1260 vq_info = &dev->vq[RXQ]; 1261 vr = vq_info->q_hva; 1262 if (vr == NULL) 1263 fatalx("%s: null vring", __func__); 1264 1265 /* Compute offset into avail ring */ 1266 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 1267 vq_info->notified_avail = avail->idx - 1; 1268 } 1269 1270 /* 1271 * Must be called with dev->mutex acquired. 1272 */ 1273 int 1274 vionet_notifyq(struct vionet_dev *dev) 1275 { 1276 int ret = 0; 1277 1278 switch (dev->cfg.queue_notify) { 1279 case RXQ: 1280 vionet_notify_rx(dev); 1281 break; 1282 case TXQ: 1283 ret = vionet_notify_tx(dev); 1284 break; 1285 default: 1286 /* 1287 * Catch the unimplemented queue ID 2 (control queue) as 1288 * well as any bogus queue IDs. 1289 */ 1290 log_debug("%s: notify for unimplemented queue ID %d", 1291 __func__, dev->cfg.queue_notify); 1292 break; 1293 } 1294 1295 return (ret); 1296 } 1297 1298 /* 1299 * Must be called with dev->mutex acquired. 1300 */ 1301 int 1302 vionet_notify_tx(struct vionet_dev *dev) 1303 { 1304 uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx, cnt; 1305 size_t pktsz, chunk_size = 0; 1306 ssize_t dhcpsz = 0; 1307 int num_enq, ofs, spc = 0; 1308 char *vr = NULL, *pkt = NULL, *dhcppkt = NULL; 1309 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1310 struct vring_avail *avail; 1311 struct vring_used *used; 1312 struct virtio_vq_info *vq_info; 1313 struct ether_header *eh; 1314 1315 vq_info = &dev->vq[TXQ]; 1316 vr = vq_info->q_hva; 1317 if (vr == NULL) 1318 fatalx("%s: null vring", __func__); 1319 1320 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1321 desc = (struct vring_desc *)(vr); 1322 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 1323 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 1324 1325 num_enq = 0; 1326 1327 idx = vq_info->last_avail & VIONET_QUEUE_MASK; 1328 1329 if ((avail->idx & VIONET_QUEUE_MASK) == idx) { 1330 log_debug("%s - nothing to do?", __func__); 1331 return (0); 1332 } 1333 1334 while ((avail->idx & VIONET_QUEUE_MASK) != idx) { 1335 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1336 hdr_desc = &desc[hdr_desc_idx]; 1337 pktsz = 0; 1338 1339 cnt = 0; 1340 dxx = hdr_desc_idx; 1341 do { 1342 pktsz += desc[dxx].len; 1343 dxx = desc[dxx].next & VIONET_QUEUE_MASK; 1344 1345 /* 1346 * Virtio 1.0, cs04, section 2.4.5: 1347 * "The number of descriptors in the table is defined 1348 * by the queue size for this virtqueue: this is the 1349 * maximum possible descriptor chain length." 1350 */ 1351 if (++cnt >= VIONET_QUEUE_SIZE) { 1352 log_warnx("%s: descriptor table invalid", 1353 __func__); 1354 goto out; 1355 } 1356 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 1357 1358 pktsz += desc[dxx].len; 1359 1360 /* Remove virtio header descriptor len */ 1361 pktsz -= hdr_desc->len; 1362 1363 /* Drop packets violating device MTU-based limits */ 1364 if (pktsz < VIONET_MIN_TXLEN || pktsz > VIONET_MAX_TXLEN) { 1365 log_warnx("%s: invalid packet size %lu", __func__, 1366 pktsz); 1367 goto drop_packet; 1368 } 1369 pkt = malloc(pktsz); 1370 if (pkt == NULL) { 1371 log_warn("malloc error alloc packet buf"); 1372 goto out; 1373 } 1374 1375 ofs = 0; 1376 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 1377 pkt_desc = &desc[pkt_desc_idx]; 1378 1379 while (pkt_desc->flags & VRING_DESC_F_NEXT) { 1380 /* must be not writable */ 1381 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1382 log_warnx("unexpected writable tx desc " 1383 "%d", pkt_desc_idx); 1384 goto out; 1385 } 1386 1387 /* Check we don't read beyond allocated pktsz */ 1388 if (pkt_desc->len > pktsz - ofs) { 1389 log_warnx("%s: descriptor len past pkt len", 1390 __func__); 1391 chunk_size = pktsz - ofs; 1392 } else 1393 chunk_size = pkt_desc->len; 1394 1395 /* Read packet from descriptor ring */ 1396 if (read_mem(pkt_desc->addr, pkt + ofs, chunk_size)) { 1397 log_warnx("vionet: packet read_mem error " 1398 "@ 0x%llx", pkt_desc->addr); 1399 goto out; 1400 } 1401 1402 ofs += pkt_desc->len; 1403 pkt_desc_idx = pkt_desc->next & VIONET_QUEUE_MASK; 1404 pkt_desc = &desc[pkt_desc_idx]; 1405 } 1406 1407 /* Now handle tail descriptor - must be not writable */ 1408 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1409 log_warnx("unexpected writable tx descriptor %d", 1410 pkt_desc_idx); 1411 goto out; 1412 } 1413 1414 /* Check we don't read beyond allocated pktsz */ 1415 if (pkt_desc->len > pktsz - ofs) { 1416 log_warnx("%s: descriptor len past pkt len", __func__); 1417 chunk_size = pktsz - ofs - pkt_desc->len; 1418 } else 1419 chunk_size = pkt_desc->len; 1420 1421 /* Read packet from descriptor ring */ 1422 if (read_mem(pkt_desc->addr, pkt + ofs, chunk_size)) { 1423 log_warnx("vionet: packet read_mem error @ " 1424 "0x%llx", pkt_desc->addr); 1425 goto out; 1426 } 1427 1428 /* reject other source addresses */ 1429 if (dev->lockedmac && pktsz >= ETHER_HDR_LEN && 1430 (eh = (struct ether_header *)pkt) && 1431 memcmp(eh->ether_shost, dev->mac, 1432 sizeof(eh->ether_shost)) != 0) 1433 log_debug("vionet: wrong source address %s for vm %d", 1434 ether_ntoa((struct ether_addr *) 1435 eh->ether_shost), dev->vm_id); 1436 else if (dev->local && 1437 (dhcpsz = dhcp_request(dev, pkt, pktsz, &dhcppkt)) != -1) { 1438 log_debug("vionet: dhcp request," 1439 " local response size %zd", dhcpsz); 1440 1441 /* XXX signed vs unsigned here, funky cast */ 1442 } else if (write(dev->fd, pkt, pktsz) != (int)pktsz) { 1443 log_warnx("vionet: tx failed writing to tap: " 1444 "%d", errno); 1445 goto out; 1446 } 1447 1448 drop_packet: 1449 dev->cfg.isr_status = 1; 1450 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; 1451 used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len; 1452 __sync_synchronize(); 1453 used->idx++; 1454 1455 vq_info->last_avail = avail->idx & VIONET_QUEUE_MASK; 1456 idx = (idx + 1) & VIONET_QUEUE_MASK; 1457 1458 num_enq++; 1459 1460 free(pkt); 1461 pkt = NULL; 1462 } 1463 1464 if (dhcpsz > 0) 1465 vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc); 1466 1467 out: 1468 free(pkt); 1469 free(dhcppkt); 1470 1471 return (1); 1472 } 1473 1474 int 1475 vmmci_ctl(unsigned int cmd) 1476 { 1477 struct timeval tv = { 0, 0 }; 1478 1479 if ((vmmci.cfg.device_status & 1480 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) 1481 return (-1); 1482 1483 if (cmd == vmmci.cmd) 1484 return (0); 1485 1486 switch (cmd) { 1487 case VMMCI_NONE: 1488 break; 1489 case VMMCI_SHUTDOWN: 1490 case VMMCI_REBOOT: 1491 /* Update command */ 1492 vmmci.cmd = cmd; 1493 1494 /* 1495 * vmm VMs do not support powerdown, send a reboot request 1496 * instead and turn it off after the triple fault. 1497 */ 1498 if (cmd == VMMCI_SHUTDOWN) 1499 cmd = VMMCI_REBOOT; 1500 1501 /* Trigger interrupt */ 1502 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1503 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1504 1505 /* Add ACK timeout */ 1506 tv.tv_sec = VMMCI_TIMEOUT; 1507 evtimer_add(&vmmci.timeout, &tv); 1508 break; 1509 case VMMCI_SYNCRTC: 1510 if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) { 1511 /* RTC updated, request guest VM resync of its RTC */ 1512 vmmci.cmd = cmd; 1513 1514 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1515 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1516 } else { 1517 log_debug("%s: RTC sync skipped (guest does not " 1518 "support RTC sync)\n", __func__); 1519 } 1520 break; 1521 default: 1522 fatalx("invalid vmmci command: %d", cmd); 1523 } 1524 1525 return (0); 1526 } 1527 1528 void 1529 vmmci_ack(unsigned int cmd) 1530 { 1531 struct timeval tv = { 0, 0 }; 1532 1533 switch (cmd) { 1534 case VMMCI_NONE: 1535 break; 1536 case VMMCI_SHUTDOWN: 1537 /* 1538 * The shutdown was requested by the VM if we don't have 1539 * a pending shutdown request. In this case add a short 1540 * timeout to give the VM a chance to reboot before the 1541 * timer is expired. 1542 */ 1543 if (vmmci.cmd == 0) { 1544 log_debug("%s: vm %u requested shutdown", __func__, 1545 vmmci.vm_id); 1546 tv.tv_sec = VMMCI_TIMEOUT; 1547 evtimer_add(&vmmci.timeout, &tv); 1548 return; 1549 } 1550 /* FALLTHROUGH */ 1551 case VMMCI_REBOOT: 1552 /* 1553 * If the VM acknowledged our shutdown request, give it 1554 * enough time to shutdown or reboot gracefully. This 1555 * might take a considerable amount of time (running 1556 * rc.shutdown on the VM), so increase the timeout before 1557 * killing it forcefully. 1558 */ 1559 if (cmd == vmmci.cmd && 1560 evtimer_pending(&vmmci.timeout, NULL)) { 1561 log_debug("%s: vm %u acknowledged shutdown request", 1562 __func__, vmmci.vm_id); 1563 tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT; 1564 evtimer_add(&vmmci.timeout, &tv); 1565 } 1566 break; 1567 case VMMCI_SYNCRTC: 1568 log_debug("%s: vm %u acknowledged RTC sync request", 1569 __func__, vmmci.vm_id); 1570 vmmci.cmd = VMMCI_NONE; 1571 break; 1572 default: 1573 log_warnx("%s: illegal request %u", __func__, cmd); 1574 break; 1575 } 1576 } 1577 1578 void 1579 vmmci_timeout(int fd, short type, void *arg) 1580 { 1581 log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); 1582 vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); 1583 } 1584 1585 int 1586 vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 1587 void *unused, uint8_t sz) 1588 { 1589 *intr = 0xFF; 1590 1591 if (dir == 0) { 1592 switch (reg) { 1593 case VIRTIO_CONFIG_DEVICE_FEATURES: 1594 case VIRTIO_CONFIG_QUEUE_SIZE: 1595 case VIRTIO_CONFIG_ISR_STATUS: 1596 log_warnx("%s: illegal write %x to %s", 1597 __progname, *data, virtio_reg_name(reg)); 1598 break; 1599 case VIRTIO_CONFIG_GUEST_FEATURES: 1600 vmmci.cfg.guest_feature = *data; 1601 break; 1602 case VIRTIO_CONFIG_QUEUE_PFN: 1603 vmmci.cfg.queue_pfn = *data; 1604 break; 1605 case VIRTIO_CONFIG_QUEUE_SELECT: 1606 vmmci.cfg.queue_select = *data; 1607 break; 1608 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1609 vmmci.cfg.queue_notify = *data; 1610 break; 1611 case VIRTIO_CONFIG_DEVICE_STATUS: 1612 vmmci.cfg.device_status = *data; 1613 break; 1614 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1615 vmmci_ack(*data); 1616 break; 1617 } 1618 } else { 1619 switch (reg) { 1620 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1621 *data = vmmci.cmd; 1622 break; 1623 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 1624 /* Update time once when reading the first register */ 1625 gettimeofday(&vmmci.time, NULL); 1626 *data = (uint64_t)vmmci.time.tv_sec; 1627 break; 1628 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 1629 *data = (uint64_t)vmmci.time.tv_sec << 32; 1630 break; 1631 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: 1632 *data = (uint64_t)vmmci.time.tv_usec; 1633 break; 1634 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: 1635 *data = (uint64_t)vmmci.time.tv_usec << 32; 1636 break; 1637 case VIRTIO_CONFIG_DEVICE_FEATURES: 1638 *data = vmmci.cfg.device_feature; 1639 break; 1640 case VIRTIO_CONFIG_GUEST_FEATURES: 1641 *data = vmmci.cfg.guest_feature; 1642 break; 1643 case VIRTIO_CONFIG_QUEUE_PFN: 1644 *data = vmmci.cfg.queue_pfn; 1645 break; 1646 case VIRTIO_CONFIG_QUEUE_SIZE: 1647 *data = vmmci.cfg.queue_size; 1648 break; 1649 case VIRTIO_CONFIG_QUEUE_SELECT: 1650 *data = vmmci.cfg.queue_select; 1651 break; 1652 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1653 *data = vmmci.cfg.queue_notify; 1654 break; 1655 case VIRTIO_CONFIG_DEVICE_STATUS: 1656 *data = vmmci.cfg.device_status; 1657 break; 1658 case VIRTIO_CONFIG_ISR_STATUS: 1659 *data = vmmci.cfg.isr_status; 1660 vmmci.cfg.isr_status = 0; 1661 vcpu_deassert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1662 break; 1663 } 1664 } 1665 return (0); 1666 } 1667 1668 int 1669 virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath) 1670 { 1671 switch (type) { 1672 case VMDF_RAW: 1673 return 0; 1674 case VMDF_QCOW2: 1675 return virtio_qcow2_get_base(fd, path, npath, dpath); 1676 } 1677 log_warnx("%s: invalid disk format", __func__); 1678 return -1; 1679 } 1680 1681 /* 1682 * Initializes a struct virtio_backing using the list of fds. 1683 */ 1684 static int 1685 virtio_init_disk(struct virtio_backing *file, off_t *sz, 1686 int *fd, size_t nfd, int type) 1687 { 1688 /* 1689 * probe disk types in order of preference, first one to work wins. 1690 * TODO: provide a way of specifying the type and options. 1691 */ 1692 switch (type) { 1693 case VMDF_RAW: 1694 return virtio_raw_init(file, sz, fd, nfd); 1695 case VMDF_QCOW2: 1696 return virtio_qcow2_init(file, sz, fd, nfd); 1697 } 1698 log_warnx("%s: invalid disk format", __func__); 1699 return -1; 1700 } 1701 1702 void 1703 virtio_init(struct vmd_vm *vm, int child_cdrom, 1704 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 1705 { 1706 struct vmop_create_params *vmc = &vm->vm_params; 1707 struct vm_create_params *vcp = &vmc->vmc_params; 1708 uint8_t id; 1709 uint8_t i; 1710 int ret; 1711 1712 /* Virtio entropy device */ 1713 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1714 PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, 1715 PCI_SUBCLASS_SYSTEM_MISC, 1716 PCI_VENDOR_OPENBSD, 1717 PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { 1718 log_warnx("%s: can't add PCI virtio rng device", 1719 __progname); 1720 return; 1721 } 1722 1723 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { 1724 log_warnx("%s: can't add bar for virtio rng device", 1725 __progname); 1726 return; 1727 } 1728 1729 memset(&viornd, 0, sizeof(viornd)); 1730 viornd.vq[0].qs = VIORND_QUEUE_SIZE; 1731 viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * 1732 VIORND_QUEUE_SIZE; 1733 viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1734 sizeof(struct vring_desc) * VIORND_QUEUE_SIZE 1735 + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); 1736 viornd.pci_id = id; 1737 viornd.irq = pci_get_dev_irq(id); 1738 viornd.vm_id = vcp->vcp_id; 1739 1740 if (vcp->vcp_nnics > 0) { 1741 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 1742 if (vionet == NULL) { 1743 log_warn("%s: calloc failure allocating vionets", 1744 __progname); 1745 return; 1746 } 1747 1748 nr_vionet = vcp->vcp_nnics; 1749 /* Virtio network */ 1750 for (i = 0; i < vcp->vcp_nnics; i++) { 1751 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1752 PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, 1753 PCI_SUBCLASS_SYSTEM_MISC, 1754 PCI_VENDOR_OPENBSD, 1755 PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { 1756 log_warnx("%s: can't add PCI virtio net device", 1757 __progname); 1758 return; 1759 } 1760 1761 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io, 1762 &vionet[i])) { 1763 log_warnx("%s: can't add bar for virtio net " 1764 "device", __progname); 1765 return; 1766 } 1767 1768 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 1769 if (ret) { 1770 errno = ret; 1771 log_warn("%s: could not initialize mutex " 1772 "for vionet device", __progname); 1773 return; 1774 } 1775 1776 vionet[i].vq[RXQ].qs = VIONET_QUEUE_SIZE; 1777 vionet[i].vq[RXQ].vq_availoffset = 1778 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1779 vionet[i].vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1780 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1781 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1782 vionet[i].vq[RXQ].last_avail = 0; 1783 vionet[i].vq[RXQ].notified_avail = 0; 1784 1785 vionet[i].vq[TXQ].qs = VIONET_QUEUE_SIZE; 1786 vionet[i].vq[TXQ].vq_availoffset = 1787 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1788 vionet[i].vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1789 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1790 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1791 vionet[i].vq[TXQ].last_avail = 0; 1792 vionet[i].vq[TXQ].notified_avail = 0; 1793 vionet[i].fd = child_taps[i]; 1794 vionet[i].vm_id = vcp->vcp_id; 1795 vionet[i].vm_vmid = vm->vm_vmid; 1796 vionet[i].irq = pci_get_dev_irq(id); 1797 1798 event_set(&vionet[i].event, vionet[i].fd, 1799 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 1800 if (event_add(&vionet[i].event, NULL)) { 1801 log_warn("could not initialize vionet event " 1802 "handler"); 1803 return; 1804 } 1805 1806 /* MAC address has been assigned by the parent */ 1807 memcpy(&vionet[i].mac, &vcp->vcp_macs[i], 6); 1808 vionet[i].cfg.device_feature = VIRTIO_NET_F_MAC; 1809 1810 vionet[i].lockedmac = 1811 vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; 1812 vionet[i].local = 1813 vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0; 1814 if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET) 1815 vionet[i].pxeboot = 1; 1816 vionet[i].idx = i; 1817 vionet[i].pci_id = id; 1818 1819 log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s", 1820 __func__, vcp->vcp_name, i, 1821 ether_ntoa((void *)vionet[i].mac), 1822 vionet[i].lockedmac ? ", locked" : "", 1823 vionet[i].local ? ", local" : "", 1824 vionet[i].pxeboot ? ", pxeboot" : ""); 1825 } 1826 } 1827 1828 if (vcp->vcp_ndisks > 0) { 1829 nr_vioblk = vcp->vcp_ndisks; 1830 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 1831 if (vioblk == NULL) { 1832 log_warn("%s: calloc failure allocating vioblks", 1833 __progname); 1834 return; 1835 } 1836 1837 /* One virtio block device for each disk defined in vcp */ 1838 for (i = 0; i < vcp->vcp_ndisks; i++) { 1839 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1840 PCI_PRODUCT_QUMRANET_VIO_BLOCK, 1841 PCI_CLASS_MASS_STORAGE, 1842 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1843 PCI_VENDOR_OPENBSD, 1844 PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { 1845 log_warnx("%s: can't add PCI virtio block " 1846 "device", __progname); 1847 return; 1848 } 1849 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io, 1850 &vioblk[i])) { 1851 log_warnx("%s: can't add bar for virtio block " 1852 "device", __progname); 1853 return; 1854 } 1855 vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; 1856 vioblk[i].vq[0].vq_availoffset = 1857 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; 1858 vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1859 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE 1860 + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); 1861 vioblk[i].vq[0].last_avail = 0; 1862 vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX; 1863 vioblk[i].max_xfer = 1048576; 1864 vioblk[i].pci_id = id; 1865 vioblk[i].vm_id = vcp->vcp_id; 1866 vioblk[i].irq = pci_get_dev_irq(id); 1867 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 1868 child_disks[i], vmc->vmc_diskbases[i], 1869 vmc->vmc_disktypes[i]) == -1) { 1870 log_warnx("%s: unable to determine disk format", 1871 __func__); 1872 return; 1873 } 1874 vioblk[i].sz /= 512; 1875 } 1876 } 1877 1878 /* vioscsi cdrom */ 1879 if (strlen(vcp->vcp_cdrom)) { 1880 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 1881 if (vioscsi == NULL) { 1882 log_warn("%s: calloc failure allocating vioscsi", 1883 __progname); 1884 return; 1885 } 1886 1887 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1888 PCI_PRODUCT_QUMRANET_VIO_SCSI, 1889 PCI_CLASS_MASS_STORAGE, 1890 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1891 PCI_VENDOR_OPENBSD, 1892 PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) { 1893 log_warnx("%s: can't add PCI vioscsi device", 1894 __progname); 1895 return; 1896 } 1897 1898 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) { 1899 log_warnx("%s: can't add bar for vioscsi device", 1900 __progname); 1901 return; 1902 } 1903 1904 for ( i = 0; i < VIRTIO_MAX_QUEUES; i++) { 1905 vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE; 1906 vioscsi->vq[i].vq_availoffset = 1907 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE; 1908 vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN( 1909 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE 1910 + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE)); 1911 vioscsi->vq[i].last_avail = 0; 1912 } 1913 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, 1914 &child_cdrom, 1, VMDF_RAW) == -1) { 1915 log_warnx("%s: unable to determine iso format", 1916 __func__); 1917 return; 1918 } 1919 vioscsi->locked = 0; 1920 vioscsi->lba = 0; 1921 vioscsi->n_blocks = vioscsi->sz >> 11; /* num of 2048 blocks in file */ 1922 vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM; 1923 vioscsi->pci_id = id; 1924 vioscsi->vm_id = vcp->vcp_id; 1925 vioscsi->irq = pci_get_dev_irq(id); 1926 } 1927 1928 /* virtio control device */ 1929 if (pci_add_device(&id, PCI_VENDOR_OPENBSD, 1930 PCI_PRODUCT_OPENBSD_CONTROL, 1931 PCI_CLASS_COMMUNICATIONS, 1932 PCI_SUBCLASS_COMMUNICATIONS_MISC, 1933 PCI_VENDOR_OPENBSD, 1934 PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) { 1935 log_warnx("%s: can't add PCI vmm control device", 1936 __progname); 1937 return; 1938 } 1939 1940 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) { 1941 log_warnx("%s: can't add bar for vmm control device", 1942 __progname); 1943 return; 1944 } 1945 1946 memset(&vmmci, 0, sizeof(vmmci)); 1947 vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK | 1948 VMMCI_F_SYNCRTC; 1949 vmmci.vm_id = vcp->vcp_id; 1950 vmmci.irq = pci_get_dev_irq(id); 1951 vmmci.pci_id = id; 1952 1953 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 1954 } 1955 1956 /* 1957 * vionet_set_hostmac 1958 * 1959 * Sets the hardware address for the host-side tap(4) on a vionet_dev. 1960 * 1961 * This should only be called from the event-loop thread 1962 * 1963 * vm: pointer to the current vmd_vm instance 1964 * idx: index into the array of vionet_dev's for the target vionet_dev 1965 * addr: ethernet address to set 1966 */ 1967 void 1968 vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr) 1969 { 1970 struct vmop_create_params *vmc = &vm->vm_params; 1971 struct vm_create_params *vcp = &vmc->vmc_params; 1972 struct vionet_dev *dev; 1973 1974 if (idx > vcp->vcp_nnics) 1975 fatalx("vionet_set_hostmac"); 1976 1977 dev = &vionet[idx]; 1978 memcpy(dev->hostmac, addr, sizeof(dev->hostmac)); 1979 } 1980 1981 void 1982 virtio_shutdown(struct vmd_vm *vm) 1983 { 1984 int i; 1985 1986 /* ensure that our disks are synced */ 1987 if (vioscsi != NULL) 1988 vioscsi->file.close(vioscsi->file.p, 0); 1989 1990 for (i = 0; i < nr_vioblk; i++) 1991 vioblk[i].file.close(vioblk[i].file.p, 0); 1992 } 1993 1994 int 1995 vmmci_restore(int fd, uint32_t vm_id) 1996 { 1997 log_debug("%s: receiving vmmci", __func__); 1998 if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 1999 log_warnx("%s: error reading vmmci from fd", __func__); 2000 return (-1); 2001 } 2002 2003 if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) { 2004 log_warnx("%s: can't set bar fn for vmm control device", 2005 __progname); 2006 return (-1); 2007 } 2008 vmmci.vm_id = vm_id; 2009 vmmci.irq = pci_get_dev_irq(vmmci.pci_id); 2010 memset(&vmmci.timeout, 0, sizeof(struct event)); 2011 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 2012 return (0); 2013 } 2014 2015 int 2016 viornd_restore(int fd, struct vm_create_params *vcp) 2017 { 2018 log_debug("%s: receiving viornd", __func__); 2019 if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2020 log_warnx("%s: error reading viornd from fd", __func__); 2021 return (-1); 2022 } 2023 if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) { 2024 log_warnx("%s: can't set bar fn for virtio rng device", 2025 __progname); 2026 return (-1); 2027 } 2028 viornd.vm_id = vcp->vcp_id; 2029 viornd.irq = pci_get_dev_irq(viornd.pci_id); 2030 2031 return (0); 2032 } 2033 2034 int 2035 vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) 2036 { 2037 struct vmop_create_params *vmc = &vm->vm_params; 2038 struct vm_create_params *vcp = &vmc->vmc_params; 2039 uint8_t i; 2040 int ret; 2041 2042 nr_vionet = vcp->vcp_nnics; 2043 if (vcp->vcp_nnics > 0) { 2044 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 2045 if (vionet == NULL) { 2046 log_warn("%s: calloc failure allocating vionets", 2047 __progname); 2048 return (-1); 2049 } 2050 log_debug("%s: receiving vionet", __func__); 2051 if (atomicio(read, fd, vionet, 2052 vcp->vcp_nnics * sizeof(struct vionet_dev)) != 2053 vcp->vcp_nnics * sizeof(struct vionet_dev)) { 2054 log_warnx("%s: error reading vionet from fd", 2055 __func__); 2056 return (-1); 2057 } 2058 2059 /* Virtio network */ 2060 for (i = 0; i < vcp->vcp_nnics; i++) { 2061 if (pci_set_bar_fn(vionet[i].pci_id, 0, virtio_net_io, 2062 &vionet[i])) { 2063 log_warnx("%s: can't set bar fn for virtio net " 2064 "device", __progname); 2065 return (-1); 2066 } 2067 2068 memset(&vionet[i].mutex, 0, sizeof(pthread_mutex_t)); 2069 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 2070 2071 if (ret) { 2072 errno = ret; 2073 log_warn("%s: could not initialize mutex " 2074 "for vionet device", __progname); 2075 return (-1); 2076 } 2077 vionet[i].fd = child_taps[i]; 2078 vionet[i].vm_id = vcp->vcp_id; 2079 vionet[i].vm_vmid = vm->vm_vmid; 2080 vionet[i].irq = pci_get_dev_irq(vionet[i].pci_id); 2081 2082 memset(&vionet[i].event, 0, sizeof(struct event)); 2083 event_set(&vionet[i].event, vionet[i].fd, 2084 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 2085 } 2086 } 2087 return (0); 2088 } 2089 2090 int 2091 vioblk_restore(int fd, struct vmop_create_params *vmc, 2092 int child_disks[][VM_MAX_BASE_PER_DISK]) 2093 { 2094 struct vm_create_params *vcp = &vmc->vmc_params; 2095 uint8_t i; 2096 2097 nr_vioblk = vcp->vcp_ndisks; 2098 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 2099 if (vioblk == NULL) { 2100 log_warn("%s: calloc failure allocating vioblks", __progname); 2101 return (-1); 2102 } 2103 log_debug("%s: receiving vioblk", __func__); 2104 if (atomicio(read, fd, vioblk, 2105 nr_vioblk * sizeof(struct vioblk_dev)) != 2106 nr_vioblk * sizeof(struct vioblk_dev)) { 2107 log_warnx("%s: error reading vioblk from fd", __func__); 2108 return (-1); 2109 } 2110 for (i = 0; i < vcp->vcp_ndisks; i++) { 2111 if (pci_set_bar_fn(vioblk[i].pci_id, 0, virtio_blk_io, 2112 &vioblk[i])) { 2113 log_warnx("%s: can't set bar fn for virtio block " 2114 "device", __progname); 2115 return (-1); 2116 } 2117 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 2118 child_disks[i], vmc->vmc_diskbases[i], 2119 vmc->vmc_disktypes[i]) == -1) { 2120 log_warnx("%s: unable to determine disk format", 2121 __func__); 2122 return (-1); 2123 } 2124 vioblk[i].vm_id = vcp->vcp_id; 2125 vioblk[i].irq = pci_get_dev_irq(vioblk[i].pci_id); 2126 } 2127 return (0); 2128 } 2129 2130 int 2131 vioscsi_restore(int fd, struct vm_create_params *vcp, int child_cdrom) 2132 { 2133 if (!strlen(vcp->vcp_cdrom)) 2134 return (0); 2135 2136 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 2137 if (vioscsi == NULL) { 2138 log_warn("%s: calloc failure allocating vioscsi", __progname); 2139 return (-1); 2140 } 2141 2142 log_debug("%s: receiving vioscsi", __func__); 2143 2144 if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2145 sizeof(struct vioscsi_dev)) { 2146 log_warnx("%s: error reading vioscsi from fd", __func__); 2147 return (-1); 2148 } 2149 2150 if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) { 2151 log_warnx("%s: can't set bar fn for vmm control device", 2152 __progname); 2153 return (-1); 2154 } 2155 2156 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1, 2157 VMDF_RAW) == -1) { 2158 log_warnx("%s: unable to determine iso format", __func__); 2159 return (-1); 2160 } 2161 vioscsi->vm_id = vcp->vcp_id; 2162 vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id); 2163 2164 return (0); 2165 } 2166 2167 int 2168 virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, 2169 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 2170 { 2171 struct vmop_create_params *vmc = &vm->vm_params; 2172 struct vm_create_params *vcp = &vmc->vmc_params; 2173 int ret; 2174 2175 if ((ret = viornd_restore(fd, vcp)) == -1) 2176 return ret; 2177 2178 if ((ret = vioblk_restore(fd, vmc, child_disks)) == -1) 2179 return ret; 2180 2181 if ((ret = vioscsi_restore(fd, vcp, child_cdrom)) == -1) 2182 return ret; 2183 2184 if ((ret = vionet_restore(fd, vm, child_taps)) == -1) 2185 return ret; 2186 2187 if ((ret = vmmci_restore(fd, vcp->vcp_id)) == -1) 2188 return ret; 2189 2190 return (0); 2191 } 2192 2193 int 2194 viornd_dump(int fd) 2195 { 2196 log_debug("%s: sending viornd", __func__); 2197 if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2198 log_warnx("%s: error writing viornd to fd", __func__); 2199 return (-1); 2200 } 2201 return (0); 2202 } 2203 2204 int 2205 vmmci_dump(int fd) 2206 { 2207 log_debug("%s: sending vmmci", __func__); 2208 if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 2209 log_warnx("%s: error writing vmmci to fd", __func__); 2210 return (-1); 2211 } 2212 return (0); 2213 } 2214 2215 int 2216 vionet_dump(int fd) 2217 { 2218 log_debug("%s: sending vionet", __func__); 2219 if (atomicio(vwrite, fd, vionet, 2220 nr_vionet * sizeof(struct vionet_dev)) != 2221 nr_vionet * sizeof(struct vionet_dev)) { 2222 log_warnx("%s: error writing vionet to fd", __func__); 2223 return (-1); 2224 } 2225 return (0); 2226 } 2227 2228 int 2229 vioblk_dump(int fd) 2230 { 2231 log_debug("%s: sending vioblk", __func__); 2232 if (atomicio(vwrite, fd, vioblk, 2233 nr_vioblk * sizeof(struct vioblk_dev)) != 2234 nr_vioblk * sizeof(struct vioblk_dev)) { 2235 log_warnx("%s: error writing vioblk to fd", __func__); 2236 return (-1); 2237 } 2238 return (0); 2239 } 2240 2241 int 2242 vioscsi_dump(int fd) 2243 { 2244 if (vioscsi == NULL) 2245 return (0); 2246 2247 log_debug("%s: sending vioscsi", __func__); 2248 if (atomicio(vwrite, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2249 sizeof(struct vioscsi_dev)) { 2250 log_warnx("%s: error writing vioscsi to fd", __func__); 2251 return (-1); 2252 } 2253 return (0); 2254 } 2255 2256 int 2257 virtio_dump(int fd) 2258 { 2259 int ret; 2260 2261 if ((ret = viornd_dump(fd)) == -1) 2262 return ret; 2263 2264 if ((ret = vioblk_dump(fd)) == -1) 2265 return ret; 2266 2267 if ((ret = vioscsi_dump(fd)) == -1) 2268 return ret; 2269 2270 if ((ret = vionet_dump(fd)) == -1) 2271 return ret; 2272 2273 if ((ret = vmmci_dump(fd)) == -1) 2274 return ret; 2275 2276 return (0); 2277 } 2278 2279 void 2280 virtio_stop(struct vm_create_params *vcp) 2281 { 2282 uint8_t i; 2283 for (i = 0; i < vcp->vcp_nnics; i++) { 2284 if (event_del(&vionet[i].event)) { 2285 log_warn("could not initialize vionet event " 2286 "handler"); 2287 return; 2288 } 2289 } 2290 } 2291 2292 void 2293 virtio_start(struct vm_create_params *vcp) 2294 { 2295 uint8_t i; 2296 for (i = 0; i < vcp->vcp_nnics; i++) { 2297 if (event_add(&vionet[i].event, NULL)) { 2298 log_warn("could not initialize vionet event " 2299 "handler"); 2300 return; 2301 } 2302 } 2303 } 2304