1 /* $OpenBSD: virtio.c,v 1.86 2021/04/22 18:40:21 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* PAGE_SIZE */ 20 #include <sys/socket.h> 21 22 #include <machine/vmmvar.h> 23 #include <dev/pci/pcireg.h> 24 #include <dev/pci/pcidevs.h> 25 #include <dev/pv/virtioreg.h> 26 #include <dev/pci/virtio_pcireg.h> 27 #include <dev/pv/vioblkreg.h> 28 #include <dev/pv/vioscsireg.h> 29 30 #include <net/if.h> 31 #include <netinet/in.h> 32 #include <netinet/if_ether.h> 33 #include <netinet/ip.h> 34 35 #include <errno.h> 36 #include <event.h> 37 #include <poll.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #include "pci.h" 44 #include "vmd.h" 45 #include "vmm.h" 46 #include "virtio.h" 47 #include "vioscsi.h" 48 #include "loadfile.h" 49 #include "atomicio.h" 50 51 extern char *__progname; 52 struct viornd_dev viornd; 53 struct vioblk_dev *vioblk; 54 struct vionet_dev *vionet; 55 struct vioscsi_dev *vioscsi; 56 struct vmmci_dev vmmci; 57 58 int nr_vionet; 59 int nr_vioblk; 60 61 #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ 62 63 #define VIRTIO_NET_F_MAC (1<<5) 64 65 #define VMMCI_F_TIMESYNC (1<<0) 66 #define VMMCI_F_ACK (1<<1) 67 #define VMMCI_F_SYNCRTC (1<<2) 68 69 #define RXQ 0 70 #define TXQ 1 71 72 const char * 73 vioblk_cmd_name(uint32_t type) 74 { 75 switch (type) { 76 case VIRTIO_BLK_T_IN: return "read"; 77 case VIRTIO_BLK_T_OUT: return "write"; 78 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 79 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 80 case VIRTIO_BLK_T_FLUSH: return "flush"; 81 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 82 case VIRTIO_BLK_T_GET_ID: return "get id"; 83 default: return "unknown"; 84 } 85 } 86 87 static void 88 dump_descriptor_chain(struct vring_desc *desc, int16_t dxx) 89 { 90 unsigned int cnt = 0; 91 92 log_debug("descriptor chain @ %d", dxx); 93 do { 94 log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x " 95 "/ 0x%x / 0x%x", 96 dxx, 97 desc[dxx].addr, 98 desc[dxx].len, 99 desc[dxx].flags, 100 desc[dxx].next); 101 dxx = desc[dxx].next; 102 103 /* 104 * Dump up to the max number of descriptor for the largest 105 * queue we support, which currently is VIONET_QUEUE_SIZE. 106 */ 107 if (++cnt >= VIONET_QUEUE_SIZE) { 108 log_warnx("%s: descriptor table invalid", __func__); 109 return; 110 } 111 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 112 113 log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x / 0x%x " 114 "/ 0x%x", 115 dxx, 116 desc[dxx].addr, 117 desc[dxx].len, 118 desc[dxx].flags, 119 desc[dxx].next); 120 } 121 122 static const char * 123 virtio_reg_name(uint8_t reg) 124 { 125 switch (reg) { 126 case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; 127 case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; 128 case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address"; 129 case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; 130 case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; 131 case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; 132 case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; 133 case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; 134 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "device config 0"; 135 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "device config 1"; 136 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2"; 137 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3"; 138 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4"; 139 default: return "unknown"; 140 } 141 } 142 143 uint32_t 144 vring_size(uint32_t vq_size) 145 { 146 uint32_t allocsize1, allocsize2; 147 148 /* allocsize1: descriptor table + avail ring + pad */ 149 allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size 150 + sizeof(uint16_t) * (2 + vq_size)); 151 /* allocsize2: used ring + pad */ 152 allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2 153 + sizeof(struct vring_used_elem) * vq_size); 154 155 return allocsize1 + allocsize2; 156 } 157 158 /* Update queue select */ 159 void 160 viornd_update_qs(void) 161 { 162 /* Invalid queue? */ 163 if (viornd.cfg.queue_select > 0) { 164 viornd.cfg.queue_size = 0; 165 return; 166 } 167 168 /* Update queue address/size based on queue select */ 169 viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa; 170 viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs; 171 } 172 173 /* Update queue address */ 174 void 175 viornd_update_qa(void) 176 { 177 /* Invalid queue? */ 178 if (viornd.cfg.queue_select > 0) 179 return; 180 181 viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address; 182 } 183 184 int 185 viornd_notifyq(void) 186 { 187 uint64_t q_gpa; 188 uint32_t vr_sz; 189 size_t sz; 190 int ret; 191 uint16_t aidx, uidx; 192 char *buf, *rnd_data; 193 struct vring_desc *desc; 194 struct vring_avail *avail; 195 struct vring_used *used; 196 197 ret = 0; 198 199 /* Invalid queue? */ 200 if (viornd.cfg.queue_notify > 0) 201 return (0); 202 203 vr_sz = vring_size(VIORND_QUEUE_SIZE); 204 q_gpa = viornd.vq[viornd.cfg.queue_notify].qa; 205 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 206 207 buf = calloc(1, vr_sz); 208 if (buf == NULL) { 209 log_warn("calloc error getting viornd ring"); 210 return (0); 211 } 212 213 if (read_mem(q_gpa, buf, vr_sz)) { 214 free(buf); 215 return (0); 216 } 217 218 desc = (struct vring_desc *)(buf); 219 avail = (struct vring_avail *)(buf + 220 viornd.vq[viornd.cfg.queue_notify].vq_availoffset); 221 used = (struct vring_used *)(buf + 222 viornd.vq[viornd.cfg.queue_notify].vq_usedoffset); 223 224 aidx = avail->idx & VIORND_QUEUE_MASK; 225 uidx = used->idx & VIORND_QUEUE_MASK; 226 227 sz = desc[avail->ring[aidx]].len; 228 if (sz > MAXPHYS) 229 fatal("viornd descriptor size too large (%zu)", sz); 230 231 rnd_data = malloc(sz); 232 233 if (rnd_data != NULL) { 234 arc4random_buf(rnd_data, desc[avail->ring[aidx]].len); 235 if (write_mem(desc[avail->ring[aidx]].addr, 236 rnd_data, desc[avail->ring[aidx]].len)) { 237 log_warnx("viornd: can't write random data @ " 238 "0x%llx", 239 desc[avail->ring[aidx]].addr); 240 } else { 241 /* ret == 1 -> interrupt needed */ 242 /* XXX check VIRTIO_F_NO_INTR */ 243 ret = 1; 244 viornd.cfg.isr_status = 1; 245 used->ring[uidx].id = avail->ring[aidx] & 246 VIORND_QUEUE_MASK; 247 used->ring[uidx].len = desc[avail->ring[aidx]].len; 248 used->idx++; 249 250 if (write_mem(q_gpa, buf, vr_sz)) { 251 log_warnx("viornd: error writing vio ring"); 252 } 253 } 254 free(rnd_data); 255 } else 256 fatal("memory allocation error for viornd data"); 257 258 free(buf); 259 260 return (ret); 261 } 262 263 int 264 virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 265 void *unused, uint8_t sz) 266 { 267 *intr = 0xFF; 268 269 if (dir == 0) { 270 switch (reg) { 271 case VIRTIO_CONFIG_DEVICE_FEATURES: 272 case VIRTIO_CONFIG_QUEUE_SIZE: 273 case VIRTIO_CONFIG_ISR_STATUS: 274 log_warnx("%s: illegal write %x to %s", 275 __progname, *data, virtio_reg_name(reg)); 276 break; 277 case VIRTIO_CONFIG_GUEST_FEATURES: 278 viornd.cfg.guest_feature = *data; 279 break; 280 case VIRTIO_CONFIG_QUEUE_ADDRESS: 281 viornd.cfg.queue_address = *data; 282 viornd_update_qa(); 283 break; 284 case VIRTIO_CONFIG_QUEUE_SELECT: 285 viornd.cfg.queue_select = *data; 286 viornd_update_qs(); 287 break; 288 case VIRTIO_CONFIG_QUEUE_NOTIFY: 289 viornd.cfg.queue_notify = *data; 290 if (viornd_notifyq()) 291 *intr = 1; 292 break; 293 case VIRTIO_CONFIG_DEVICE_STATUS: 294 viornd.cfg.device_status = *data; 295 break; 296 } 297 } else { 298 switch (reg) { 299 case VIRTIO_CONFIG_DEVICE_FEATURES: 300 *data = viornd.cfg.device_feature; 301 break; 302 case VIRTIO_CONFIG_GUEST_FEATURES: 303 *data = viornd.cfg.guest_feature; 304 break; 305 case VIRTIO_CONFIG_QUEUE_ADDRESS: 306 *data = viornd.cfg.queue_address; 307 break; 308 case VIRTIO_CONFIG_QUEUE_SIZE: 309 *data = viornd.cfg.queue_size; 310 break; 311 case VIRTIO_CONFIG_QUEUE_SELECT: 312 *data = viornd.cfg.queue_select; 313 break; 314 case VIRTIO_CONFIG_QUEUE_NOTIFY: 315 *data = viornd.cfg.queue_notify; 316 break; 317 case VIRTIO_CONFIG_DEVICE_STATUS: 318 *data = viornd.cfg.device_status; 319 break; 320 case VIRTIO_CONFIG_ISR_STATUS: 321 *data = viornd.cfg.isr_status; 322 viornd.cfg.isr_status = 0; 323 vcpu_deassert_pic_irq(viornd.vm_id, 0, viornd.irq); 324 break; 325 } 326 } 327 return (0); 328 } 329 330 void 331 vioblk_update_qa(struct vioblk_dev *dev) 332 { 333 /* Invalid queue? */ 334 if (dev->cfg.queue_select > 0) 335 return; 336 337 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 338 } 339 340 void 341 vioblk_update_qs(struct vioblk_dev *dev) 342 { 343 /* Invalid queue? */ 344 if (dev->cfg.queue_select > 0) { 345 dev->cfg.queue_size = 0; 346 return; 347 } 348 349 /* Update queue address/size based on queue select */ 350 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 351 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 352 } 353 354 static void 355 vioblk_free_info(struct ioinfo *info) 356 { 357 if (!info) 358 return; 359 free(info->buf); 360 free(info); 361 } 362 363 static struct ioinfo * 364 vioblk_start_read(struct vioblk_dev *dev, off_t sector, size_t sz) 365 { 366 struct ioinfo *info; 367 368 info = calloc(1, sizeof(*info)); 369 if (!info) 370 goto nomem; 371 info->buf = malloc(sz); 372 if (info->buf == NULL) 373 goto nomem; 374 info->len = sz; 375 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 376 info->file = &dev->file; 377 378 return info; 379 380 nomem: 381 free(info); 382 log_warn("malloc error vioblk read"); 383 return (NULL); 384 } 385 386 387 static const uint8_t * 388 vioblk_finish_read(struct ioinfo *info) 389 { 390 struct virtio_backing *file; 391 392 file = info->file; 393 if (file->pread(file->p, info->buf, info->len, info->offset) != info->len) { 394 info->error = errno; 395 log_warn("vioblk read error"); 396 return NULL; 397 } 398 399 return info->buf; 400 } 401 402 static struct ioinfo * 403 vioblk_start_write(struct vioblk_dev *dev, off_t sector, 404 paddr_t addr, size_t len) 405 { 406 struct ioinfo *info; 407 408 info = calloc(1, sizeof(*info)); 409 if (!info) 410 goto nomem; 411 info->buf = malloc(len); 412 if (info->buf == NULL) 413 goto nomem; 414 info->len = len; 415 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 416 info->file = &dev->file; 417 418 if (read_mem(addr, info->buf, len)) { 419 vioblk_free_info(info); 420 return NULL; 421 } 422 423 return info; 424 425 nomem: 426 free(info); 427 log_warn("malloc error vioblk write"); 428 return (NULL); 429 } 430 431 static int 432 vioblk_finish_write(struct ioinfo *info) 433 { 434 struct virtio_backing *file; 435 436 file = info->file; 437 if (file->pwrite(file->p, info->buf, info->len, info->offset) != info->len) { 438 log_warn("vioblk write error"); 439 return EIO; 440 } 441 return 0; 442 } 443 444 /* 445 * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can 446 * XXX cant trust ring data from VM, be extra cautious. 447 */ 448 int 449 vioblk_notifyq(struct vioblk_dev *dev) 450 { 451 uint64_t q_gpa; 452 uint32_t vr_sz; 453 uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; 454 uint8_t ds; 455 int cnt, ret; 456 off_t secbias; 457 char *vr; 458 struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; 459 struct vring_avail *avail; 460 struct vring_used *used; 461 struct virtio_blk_req_hdr cmd; 462 463 ret = 0; 464 465 /* Invalid queue? */ 466 if (dev->cfg.queue_notify > 0) 467 return (0); 468 469 vr_sz = vring_size(VIOBLK_QUEUE_SIZE); 470 q_gpa = dev->vq[dev->cfg.queue_notify].qa; 471 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 472 473 vr = calloc(1, vr_sz); 474 if (vr == NULL) { 475 log_warn("calloc error getting vioblk ring"); 476 return (0); 477 } 478 479 if (read_mem(q_gpa, vr, vr_sz)) { 480 log_warnx("error reading gpa 0x%llx", q_gpa); 481 goto out; 482 } 483 484 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 485 desc = (struct vring_desc *)(vr); 486 avail = (struct vring_avail *)(vr + 487 dev->vq[dev->cfg.queue_notify].vq_availoffset); 488 used = (struct vring_used *)(vr + 489 dev->vq[dev->cfg.queue_notify].vq_usedoffset); 490 491 idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; 492 493 if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { 494 log_warnx("vioblk queue notify - nothing to do?"); 495 goto out; 496 } 497 498 while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { 499 500 cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; 501 cmd_desc = &desc[cmd_desc_idx]; 502 503 if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { 504 log_warnx("unchained vioblk cmd descriptor received " 505 "(idx %d)", cmd_desc_idx); 506 goto out; 507 } 508 509 /* Read command from descriptor ring */ 510 if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) { 511 log_warnx("vioblk: command read_mem error @ 0x%llx", 512 cmd_desc->addr); 513 goto out; 514 } 515 516 switch (cmd.type) { 517 case VIRTIO_BLK_T_IN: 518 /* first descriptor */ 519 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 520 secdata_desc = &desc[secdata_desc_idx]; 521 522 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 523 log_warnx("unchained vioblk data descriptor " 524 "received (idx %d)", cmd_desc_idx); 525 goto out; 526 } 527 528 cnt = 0; 529 secbias = 0; 530 do { 531 struct ioinfo *info; 532 const uint8_t *secdata; 533 534 info = vioblk_start_read(dev, 535 cmd.sector + secbias, secdata_desc->len); 536 537 /* read the data, use current data descriptor */ 538 secdata = vioblk_finish_read(info); 539 if (secdata == NULL) { 540 vioblk_free_info(info); 541 log_warnx("vioblk: block read error, " 542 "sector %lld", cmd.sector); 543 goto out; 544 } 545 546 if (write_mem(secdata_desc->addr, secdata, 547 secdata_desc->len)) { 548 log_warnx("can't write sector " 549 "data to gpa @ 0x%llx", 550 secdata_desc->addr); 551 dump_descriptor_chain(desc, 552 cmd_desc_idx); 553 vioblk_free_info(info); 554 goto out; 555 } 556 557 vioblk_free_info(info); 558 559 secbias += (secdata_desc->len / 560 VIRTIO_BLK_SECTOR_SIZE); 561 secdata_desc_idx = secdata_desc->next & 562 VIOBLK_QUEUE_MASK; 563 secdata_desc = &desc[secdata_desc_idx]; 564 565 /* Guard against infinite chains */ 566 if (++cnt >= VIOBLK_QUEUE_SIZE) { 567 log_warnx("%s: descriptor table " 568 "invalid", __func__); 569 goto out; 570 } 571 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 572 573 ds_desc_idx = secdata_desc_idx; 574 ds_desc = secdata_desc; 575 576 ds = VIRTIO_BLK_S_OK; 577 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 578 log_warnx("can't write device status data @ " 579 "0x%llx", ds_desc->addr); 580 dump_descriptor_chain(desc, cmd_desc_idx); 581 goto out; 582 } 583 584 ret = 1; 585 dev->cfg.isr_status = 1; 586 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 587 cmd_desc_idx; 588 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 589 cmd_desc->len; 590 used->idx++; 591 592 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 593 VIOBLK_QUEUE_MASK; 594 595 if (write_mem(q_gpa, vr, vr_sz)) { 596 log_warnx("vioblk: error writing vio ring"); 597 } 598 break; 599 case VIRTIO_BLK_T_OUT: 600 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 601 secdata_desc = &desc[secdata_desc_idx]; 602 603 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 604 log_warnx("wr vioblk: unchained vioblk data " 605 "descriptor received (idx %d)", 606 cmd_desc_idx); 607 goto out; 608 } 609 610 if (secdata_desc->len > dev->max_xfer) { 611 log_warnx("%s: invalid read size %d requested", 612 __func__, secdata_desc->len); 613 goto out; 614 } 615 616 cnt = 0; 617 secbias = 0; 618 do { 619 struct ioinfo *info; 620 621 info = vioblk_start_write(dev, 622 cmd.sector + secbias, 623 secdata_desc->addr, secdata_desc->len); 624 625 if (info == NULL) { 626 log_warnx("wr vioblk: can't read " 627 "sector data @ 0x%llx", 628 secdata_desc->addr); 629 dump_descriptor_chain(desc, 630 cmd_desc_idx); 631 goto out; 632 } 633 634 if (vioblk_finish_write(info)) { 635 log_warnx("wr vioblk: disk write " 636 "error"); 637 vioblk_free_info(info); 638 goto out; 639 } 640 641 vioblk_free_info(info); 642 643 secbias += secdata_desc->len / 644 VIRTIO_BLK_SECTOR_SIZE; 645 646 secdata_desc_idx = secdata_desc->next & 647 VIOBLK_QUEUE_MASK; 648 secdata_desc = &desc[secdata_desc_idx]; 649 650 /* Guard against infinite chains */ 651 if (++cnt >= VIOBLK_QUEUE_SIZE) { 652 log_warnx("%s: descriptor table " 653 "invalid", __func__); 654 goto out; 655 } 656 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 657 658 ds_desc_idx = secdata_desc_idx; 659 ds_desc = secdata_desc; 660 661 ds = VIRTIO_BLK_S_OK; 662 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 663 log_warnx("wr vioblk: can't write device " 664 "status data @ 0x%llx", ds_desc->addr); 665 dump_descriptor_chain(desc, cmd_desc_idx); 666 goto out; 667 } 668 669 ret = 1; 670 dev->cfg.isr_status = 1; 671 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 672 cmd_desc_idx; 673 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 674 cmd_desc->len; 675 used->idx++; 676 677 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 678 VIOBLK_QUEUE_MASK; 679 if (write_mem(q_gpa, vr, vr_sz)) 680 log_warnx("wr vioblk: error writing vio ring"); 681 break; 682 case VIRTIO_BLK_T_FLUSH: 683 case VIRTIO_BLK_T_FLUSH_OUT: 684 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 685 ds_desc = &desc[ds_desc_idx]; 686 687 ds = VIRTIO_BLK_S_OK; 688 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 689 log_warnx("fl vioblk: " 690 "can't write device status " 691 "data @ 0x%llx", ds_desc->addr); 692 dump_descriptor_chain(desc, cmd_desc_idx); 693 goto out; 694 } 695 696 ret = 1; 697 dev->cfg.isr_status = 1; 698 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 699 cmd_desc_idx; 700 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 701 cmd_desc->len; 702 used->idx++; 703 704 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 705 VIOBLK_QUEUE_MASK; 706 if (write_mem(q_gpa, vr, vr_sz)) { 707 log_warnx("fl vioblk: error writing vio ring"); 708 } 709 break; 710 default: 711 log_warnx("%s: unsupported command 0x%x", __func__, 712 cmd.type); 713 714 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 715 ds_desc = &desc[ds_desc_idx]; 716 717 ds = VIRTIO_BLK_S_UNSUPP; 718 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 719 log_warnx("%s: get id : can't write device " 720 "status data @ 0x%llx", __func__, 721 ds_desc->addr); 722 dump_descriptor_chain(desc, cmd_desc_idx); 723 goto out; 724 } 725 726 ret = 1; 727 dev->cfg.isr_status = 1; 728 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 729 cmd_desc_idx; 730 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 731 cmd_desc->len; 732 used->idx++; 733 734 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 735 VIOBLK_QUEUE_MASK; 736 if (write_mem(q_gpa, vr, vr_sz)) { 737 log_warnx("%s: get id : error writing vio ring", 738 __func__); 739 } 740 break; 741 } 742 743 idx = (idx + 1) & VIOBLK_QUEUE_MASK; 744 } 745 out: 746 free(vr); 747 return (ret); 748 } 749 750 int 751 virtio_blk_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 752 void *cookie, uint8_t sz) 753 { 754 struct vioblk_dev *dev = (struct vioblk_dev *)cookie; 755 756 *intr = 0xFF; 757 758 759 if (dir == 0) { 760 switch (reg) { 761 case VIRTIO_CONFIG_DEVICE_FEATURES: 762 case VIRTIO_CONFIG_QUEUE_SIZE: 763 case VIRTIO_CONFIG_ISR_STATUS: 764 log_warnx("%s: illegal write %x to %s", 765 __progname, *data, virtio_reg_name(reg)); 766 break; 767 case VIRTIO_CONFIG_GUEST_FEATURES: 768 dev->cfg.guest_feature = *data; 769 break; 770 case VIRTIO_CONFIG_QUEUE_ADDRESS: 771 dev->cfg.queue_address = *data; 772 vioblk_update_qa(dev); 773 break; 774 case VIRTIO_CONFIG_QUEUE_SELECT: 775 dev->cfg.queue_select = *data; 776 vioblk_update_qs(dev); 777 break; 778 case VIRTIO_CONFIG_QUEUE_NOTIFY: 779 dev->cfg.queue_notify = *data; 780 if (vioblk_notifyq(dev)) 781 *intr = 1; 782 break; 783 case VIRTIO_CONFIG_DEVICE_STATUS: 784 dev->cfg.device_status = *data; 785 if (dev->cfg.device_status == 0) { 786 log_debug("%s: device reset", __func__); 787 dev->cfg.guest_feature = 0; 788 dev->cfg.queue_address = 0; 789 vioblk_update_qa(dev); 790 dev->cfg.queue_size = 0; 791 vioblk_update_qs(dev); 792 dev->cfg.queue_select = 0; 793 dev->cfg.queue_notify = 0; 794 dev->cfg.isr_status = 0; 795 dev->vq[0].last_avail = 0; 796 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 797 } 798 break; 799 default: 800 break; 801 } 802 } else { 803 switch (reg) { 804 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 805 switch (sz) { 806 case 4: 807 *data = (uint32_t)(dev->sz); 808 break; 809 case 2: 810 *data &= 0xFFFF0000; 811 *data |= (uint32_t)(dev->sz) & 0xFFFF; 812 break; 813 case 1: 814 *data &= 0xFFFFFF00; 815 *data |= (uint32_t)(dev->sz) & 0xFF; 816 break; 817 } 818 /* XXX handle invalid sz */ 819 break; 820 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 821 if (sz == 1) { 822 *data &= 0xFFFFFF00; 823 *data |= (uint32_t)(dev->sz >> 8) & 0xFF; 824 } 825 /* XXX handle invalid sz */ 826 break; 827 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 828 if (sz == 1) { 829 *data &= 0xFFFFFF00; 830 *data |= (uint32_t)(dev->sz >> 16) & 0xFF; 831 } else if (sz == 2) { 832 *data &= 0xFFFF0000; 833 *data |= (uint32_t)(dev->sz >> 16) & 0xFFFF; 834 } 835 /* XXX handle invalid sz */ 836 break; 837 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 838 if (sz == 1) { 839 *data &= 0xFFFFFF00; 840 *data |= (uint32_t)(dev->sz >> 24) & 0xFF; 841 } 842 /* XXX handle invalid sz */ 843 break; 844 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 845 switch (sz) { 846 case 4: 847 *data = (uint32_t)(dev->sz >> 32); 848 break; 849 case 2: 850 *data &= 0xFFFF0000; 851 *data |= (uint32_t)(dev->sz >> 32) & 0xFFFF; 852 break; 853 case 1: 854 *data &= 0xFFFFFF00; 855 *data |= (uint32_t)(dev->sz >> 32) & 0xFF; 856 break; 857 } 858 /* XXX handle invalid sz */ 859 break; 860 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 861 if (sz == 1) { 862 *data &= 0xFFFFFF00; 863 *data |= (uint32_t)(dev->sz >> 40) & 0xFF; 864 } 865 /* XXX handle invalid sz */ 866 break; 867 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: 868 if (sz == 1) { 869 *data &= 0xFFFFFF00; 870 *data |= (uint32_t)(dev->sz >> 48) & 0xFF; 871 } else if (sz == 2) { 872 *data &= 0xFFFF0000; 873 *data |= (uint32_t)(dev->sz >> 48) & 0xFFFF; 874 } 875 /* XXX handle invalid sz */ 876 break; 877 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: 878 if (sz == 1) { 879 *data &= 0xFFFFFF00; 880 *data |= (uint32_t)(dev->sz >> 56) & 0xFF; 881 } 882 /* XXX handle invalid sz */ 883 break; 884 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 885 switch (sz) { 886 case 4: 887 *data = (uint32_t)(dev->max_xfer); 888 break; 889 case 2: 890 *data &= 0xFFFF0000; 891 *data |= (uint32_t)(dev->max_xfer) & 0xFFFF; 892 break; 893 case 1: 894 *data &= 0xFFFFFF00; 895 *data |= (uint32_t)(dev->max_xfer) & 0xFF; 896 break; 897 } 898 /* XXX handle invalid sz */ 899 break; 900 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9: 901 if (sz == 1) { 902 *data &= 0xFFFFFF00; 903 *data |= (uint32_t)(dev->max_xfer >> 8) & 0xFF; 904 } 905 /* XXX handle invalid sz */ 906 break; 907 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10: 908 if (sz == 1) { 909 *data &= 0xFFFFFF00; 910 *data |= (uint32_t)(dev->max_xfer >> 16) & 0xFF; 911 } else if (sz == 2) { 912 *data &= 0xFFFF0000; 913 *data |= (uint32_t)(dev->max_xfer >> 16) 914 & 0xFFFF; 915 } 916 /* XXX handle invalid sz */ 917 break; 918 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11: 919 if (sz == 1) { 920 *data &= 0xFFFFFF00; 921 *data |= (uint32_t)(dev->max_xfer >> 24) & 0xFF; 922 } 923 /* XXX handle invalid sz */ 924 break; 925 case VIRTIO_CONFIG_DEVICE_FEATURES: 926 *data = dev->cfg.device_feature; 927 break; 928 case VIRTIO_CONFIG_GUEST_FEATURES: 929 *data = dev->cfg.guest_feature; 930 break; 931 case VIRTIO_CONFIG_QUEUE_ADDRESS: 932 *data = dev->cfg.queue_address; 933 break; 934 case VIRTIO_CONFIG_QUEUE_SIZE: 935 if (sz == 4) 936 *data = dev->cfg.queue_size; 937 else if (sz == 2) { 938 *data &= 0xFFFF0000; 939 *data |= (uint16_t)dev->cfg.queue_size; 940 } else if (sz == 1) { 941 *data &= 0xFFFFFF00; 942 *data |= (uint8_t)dev->cfg.queue_size; 943 } 944 break; 945 case VIRTIO_CONFIG_QUEUE_SELECT: 946 *data = dev->cfg.queue_select; 947 break; 948 case VIRTIO_CONFIG_QUEUE_NOTIFY: 949 *data = dev->cfg.queue_notify; 950 break; 951 case VIRTIO_CONFIG_DEVICE_STATUS: 952 if (sz == 4) 953 *data = dev->cfg.device_status; 954 else if (sz == 2) { 955 *data &= 0xFFFF0000; 956 *data |= (uint16_t)dev->cfg.device_status; 957 } else if (sz == 1) { 958 *data &= 0xFFFFFF00; 959 *data |= (uint8_t)dev->cfg.device_status; 960 } 961 break; 962 case VIRTIO_CONFIG_ISR_STATUS: 963 *data = dev->cfg.isr_status; 964 dev->cfg.isr_status = 0; 965 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 966 break; 967 } 968 } 969 return (0); 970 } 971 972 int 973 virtio_net_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 974 void *cookie, uint8_t sz) 975 { 976 struct vionet_dev *dev = (struct vionet_dev *)cookie; 977 978 *intr = 0xFF; 979 mutex_lock(&dev->mutex); 980 981 if (dir == 0) { 982 switch (reg) { 983 case VIRTIO_CONFIG_DEVICE_FEATURES: 984 case VIRTIO_CONFIG_QUEUE_SIZE: 985 case VIRTIO_CONFIG_ISR_STATUS: 986 log_warnx("%s: illegal write %x to %s", 987 __progname, *data, virtio_reg_name(reg)); 988 break; 989 case VIRTIO_CONFIG_GUEST_FEATURES: 990 dev->cfg.guest_feature = *data; 991 break; 992 case VIRTIO_CONFIG_QUEUE_ADDRESS: 993 dev->cfg.queue_address = *data; 994 vionet_update_qa(dev); 995 break; 996 case VIRTIO_CONFIG_QUEUE_SELECT: 997 dev->cfg.queue_select = *data; 998 vionet_update_qs(dev); 999 break; 1000 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1001 dev->cfg.queue_notify = *data; 1002 if (vionet_notifyq(dev)) 1003 *intr = 1; 1004 break; 1005 case VIRTIO_CONFIG_DEVICE_STATUS: 1006 dev->cfg.device_status = *data; 1007 if (dev->cfg.device_status == 0) { 1008 log_debug("%s: device reset", __func__); 1009 dev->cfg.guest_feature = 0; 1010 dev->cfg.queue_address = 0; 1011 vionet_update_qa(dev); 1012 dev->cfg.queue_size = 0; 1013 vionet_update_qs(dev); 1014 dev->cfg.queue_select = 0; 1015 dev->cfg.queue_notify = 0; 1016 dev->cfg.isr_status = 0; 1017 dev->vq[RXQ].last_avail = 0; 1018 dev->vq[RXQ].notified_avail = 0; 1019 dev->vq[TXQ].last_avail = 0; 1020 dev->vq[TXQ].notified_avail = 0; 1021 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 1022 } 1023 break; 1024 default: 1025 break; 1026 } 1027 } else { 1028 switch (reg) { 1029 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1030 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 1031 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 1032 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 1033 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 1034 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 1035 *data = dev->mac[reg - 1036 VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; 1037 break; 1038 case VIRTIO_CONFIG_DEVICE_FEATURES: 1039 *data = dev->cfg.device_feature; 1040 break; 1041 case VIRTIO_CONFIG_GUEST_FEATURES: 1042 *data = dev->cfg.guest_feature; 1043 break; 1044 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1045 *data = dev->cfg.queue_address; 1046 break; 1047 case VIRTIO_CONFIG_QUEUE_SIZE: 1048 *data = dev->cfg.queue_size; 1049 break; 1050 case VIRTIO_CONFIG_QUEUE_SELECT: 1051 *data = dev->cfg.queue_select; 1052 break; 1053 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1054 *data = dev->cfg.queue_notify; 1055 break; 1056 case VIRTIO_CONFIG_DEVICE_STATUS: 1057 *data = dev->cfg.device_status; 1058 break; 1059 case VIRTIO_CONFIG_ISR_STATUS: 1060 *data = dev->cfg.isr_status; 1061 dev->cfg.isr_status = 0; 1062 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 1063 break; 1064 } 1065 } 1066 1067 mutex_unlock(&dev->mutex); 1068 return (0); 1069 } 1070 1071 /* 1072 * Must be called with dev->mutex acquired. 1073 */ 1074 void 1075 vionet_update_qa(struct vionet_dev *dev) 1076 { 1077 /* Invalid queue? */ 1078 if (dev->cfg.queue_select > 1) 1079 return; 1080 1081 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 1082 } 1083 1084 /* 1085 * Must be called with dev->mutex acquired. 1086 */ 1087 void 1088 vionet_update_qs(struct vionet_dev *dev) 1089 { 1090 /* Invalid queue? */ 1091 if (dev->cfg.queue_select > 1) { 1092 dev->cfg.queue_size = 0; 1093 return; 1094 } 1095 1096 /* Update queue address/size based on queue select */ 1097 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 1098 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 1099 } 1100 1101 /* 1102 * Must be called with dev->mutex acquired. 1103 */ 1104 int 1105 vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc) 1106 { 1107 uint64_t q_gpa; 1108 uint32_t vr_sz; 1109 uint16_t idx, pkt_desc_idx, hdr_desc_idx; 1110 ptrdiff_t off; 1111 int ret; 1112 char *vr; 1113 size_t rem; 1114 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1115 struct vring_avail *avail; 1116 struct vring_used *used; 1117 struct vring_used_elem *ue; 1118 struct virtio_net_hdr hdr; 1119 1120 ret = 0; 1121 1122 if (sz < 1) { 1123 log_warn("%s: invalid packet size", __func__); 1124 return (0); 1125 } 1126 1127 if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK)) 1128 return ret; 1129 1130 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1131 q_gpa = dev->vq[RXQ].qa; 1132 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1133 1134 vr = calloc(1, vr_sz); 1135 if (vr == NULL) { 1136 log_warn("rx enq: calloc error getting vionet ring"); 1137 return (0); 1138 } 1139 1140 if (read_mem(q_gpa, vr, vr_sz)) { 1141 log_warnx("rx enq: error reading gpa 0x%llx", q_gpa); 1142 goto out; 1143 } 1144 1145 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1146 desc = (struct vring_desc *)(vr); 1147 avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); 1148 used = (struct vring_used *)(vr + dev->vq[RXQ].vq_usedoffset); 1149 1150 idx = dev->vq[RXQ].last_avail & VIONET_QUEUE_MASK; 1151 1152 if ((dev->vq[RXQ].notified_avail & VIONET_QUEUE_MASK) == idx) { 1153 log_debug("vionet queue notify - no space, dropping packet"); 1154 goto out; 1155 } 1156 1157 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1158 hdr_desc = &desc[hdr_desc_idx]; 1159 1160 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 1161 pkt_desc = &desc[pkt_desc_idx]; 1162 1163 /* Set up the virtio header (written first, before the packet data) */ 1164 memset(&hdr, 0, sizeof(struct virtio_net_hdr)); 1165 hdr.hdr_len = sizeof(struct virtio_net_hdr); 1166 1167 /* Check size of header descriptor */ 1168 if (hdr_desc->len < sizeof(struct virtio_net_hdr)) { 1169 log_warnx("%s: invalid header descriptor (too small)", 1170 __func__); 1171 goto out; 1172 } 1173 1174 /* Write out virtio header */ 1175 if (write_mem(hdr_desc->addr, &hdr, sizeof(struct virtio_net_hdr))) { 1176 log_warnx("vionet: rx enq header write_mem error @ " 1177 "0x%llx", hdr_desc->addr); 1178 goto out; 1179 } 1180 1181 /* 1182 * Compute remaining space in the first (header) descriptor, and 1183 * copy the packet data after if space is available. Otherwise, 1184 * copy to the pkt_desc descriptor. 1185 */ 1186 rem = hdr_desc->len - sizeof(struct virtio_net_hdr); 1187 1188 if (rem >= sz) { 1189 if (write_mem(hdr_desc->addr + sizeof(struct virtio_net_hdr), 1190 pkt, sz)) { 1191 log_warnx("vionet: rx enq packet write_mem error @ " 1192 "0x%llx", pkt_desc->addr); 1193 goto out; 1194 } 1195 } else { 1196 /* Fallback to pkt_desc descriptor */ 1197 if (pkt_desc->len >= sz) { 1198 /* Must be not readable */ 1199 if ((pkt_desc->flags & VRING_DESC_F_WRITE) == 0) { 1200 log_warnx("unexpected readable rx desc %d", 1201 pkt_desc_idx); 1202 goto out; 1203 } 1204 1205 /* Write packet to descriptor ring */ 1206 if (write_mem(pkt_desc->addr, pkt, sz)) { 1207 log_warnx("vionet: rx enq packet write_mem " 1208 "error @ 0x%llx", pkt_desc->addr); 1209 goto out; 1210 } 1211 } else { 1212 log_warnx("%s: descriptor too small for packet data", 1213 __func__); 1214 goto out; 1215 } 1216 } 1217 1218 ret = 1; 1219 dev->cfg.isr_status = 1; 1220 ue = &used->ring[used->idx & VIONET_QUEUE_MASK]; 1221 ue->id = hdr_desc_idx; 1222 ue->len = sz + sizeof(struct virtio_net_hdr); 1223 used->idx++; 1224 dev->vq[RXQ].last_avail++; 1225 *spc = dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail; 1226 1227 off = (char *)ue - vr; 1228 if (write_mem(q_gpa + off, ue, sizeof *ue)) 1229 log_warnx("vionet: error writing vio ring"); 1230 else { 1231 off = (char *)&used->idx - vr; 1232 if (write_mem(q_gpa + off, &used->idx, sizeof used->idx)) 1233 log_warnx("vionet: error writing vio ring"); 1234 } 1235 out: 1236 free(vr); 1237 return (ret); 1238 } 1239 1240 /* 1241 * vionet_rx 1242 * 1243 * Enqueue data that was received on a tap file descriptor 1244 * to the vionet device queue. 1245 * 1246 * Must be called with dev->mutex acquired. 1247 */ 1248 static int 1249 vionet_rx(struct vionet_dev *dev) 1250 { 1251 char buf[PAGE_SIZE]; 1252 int hasdata, num_enq = 0, spc = 0; 1253 struct ether_header *eh; 1254 ssize_t sz; 1255 1256 do { 1257 sz = read(dev->fd, buf, sizeof buf); 1258 if (sz == -1) { 1259 /* 1260 * If we get EAGAIN, No data is currently available. 1261 * Do not treat this as an error. 1262 */ 1263 if (errno != EAGAIN) 1264 log_warn("unexpected read error on vionet " 1265 "device"); 1266 } else if (sz > 0) { 1267 eh = (struct ether_header *)buf; 1268 if (!dev->lockedmac || sz < ETHER_HDR_LEN || 1269 ETHER_IS_MULTICAST(eh->ether_dhost) || 1270 memcmp(eh->ether_dhost, dev->mac, 1271 sizeof(eh->ether_dhost)) == 0) 1272 num_enq += vionet_enq_rx(dev, buf, sz, &spc); 1273 } else if (sz == 0) { 1274 log_debug("process_rx: no data"); 1275 hasdata = 0; 1276 break; 1277 } 1278 1279 hasdata = fd_hasdata(dev->fd); 1280 } while (spc && hasdata); 1281 1282 dev->rx_pending = hasdata; 1283 return (num_enq); 1284 } 1285 1286 /* 1287 * vionet_rx_event 1288 * 1289 * Called from the event handling thread when new data can be 1290 * received on the tap fd of a vionet device. 1291 */ 1292 static void 1293 vionet_rx_event(int fd, short kind, void *arg) 1294 { 1295 struct vionet_dev *dev = arg; 1296 1297 mutex_lock(&dev->mutex); 1298 1299 /* 1300 * We already have other data pending to be received. The data that 1301 * has become available now will be enqueued to the vionet_dev 1302 * later. 1303 */ 1304 if (dev->rx_pending) { 1305 mutex_unlock(&dev->mutex); 1306 return; 1307 } 1308 1309 if (vionet_rx(dev) > 0) { 1310 /* XXX: vcpu_id */ 1311 vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); 1312 } 1313 1314 mutex_unlock(&dev->mutex); 1315 } 1316 1317 /* 1318 * vionet_process_rx 1319 * 1320 * Processes any remaining pending receivable data for a vionet device. 1321 * Called on VCPU exit. Although we poll on the tap file descriptor of 1322 * a vionet_dev in a separate thread, this function still needs to be 1323 * called on VCPU exit: it can happen that not all data fits into the 1324 * receive queue of the vionet_dev immediately. So any outstanding data 1325 * is handled here. 1326 * 1327 * Parameters: 1328 * vm_id: VM ID of the VM for which to process vionet events 1329 */ 1330 void 1331 vionet_process_rx(uint32_t vm_id) 1332 { 1333 int i; 1334 1335 for (i = 0 ; i < nr_vionet; i++) { 1336 mutex_lock(&vionet[i].mutex); 1337 if (!vionet[i].rx_added) { 1338 mutex_unlock(&vionet[i].mutex); 1339 continue; 1340 } 1341 1342 if (vionet[i].rx_pending) { 1343 if (vionet_rx(&vionet[i])) { 1344 vcpu_assert_pic_irq(vm_id, 0, vionet[i].irq); 1345 } 1346 } 1347 mutex_unlock(&vionet[i].mutex); 1348 } 1349 } 1350 1351 /* 1352 * Must be called with dev->mutex acquired. 1353 */ 1354 void 1355 vionet_notify_rx(struct vionet_dev *dev) 1356 { 1357 uint64_t q_gpa; 1358 uint32_t vr_sz; 1359 char *vr; 1360 struct vring_avail *avail; 1361 1362 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1363 q_gpa = dev->vq[RXQ].qa; 1364 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1365 1366 vr = malloc(vr_sz); 1367 if (vr == NULL) { 1368 log_warn("malloc error getting vionet ring"); 1369 return; 1370 } 1371 1372 if (read_mem(q_gpa, vr, vr_sz)) { 1373 log_warnx("error reading gpa 0x%llx", q_gpa); 1374 free(vr); 1375 return; 1376 } 1377 1378 /* Compute offset into avail ring */ 1379 avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); 1380 1381 dev->rx_added = 1; 1382 dev->vq[RXQ].notified_avail = avail->idx - 1; 1383 1384 free(vr); 1385 } 1386 1387 /* 1388 * Must be called with dev->mutex acquired. 1389 */ 1390 int 1391 vionet_notifyq(struct vionet_dev *dev) 1392 { 1393 int ret; 1394 1395 switch (dev->cfg.queue_notify) { 1396 case RXQ: 1397 vionet_notify_rx(dev); 1398 ret = 0; 1399 break; 1400 case TXQ: 1401 ret = vionet_notify_tx(dev); 1402 break; 1403 default: 1404 /* 1405 * Catch the unimplemented queue ID 2 (control queue) as 1406 * well as any bogus queue IDs. 1407 */ 1408 log_debug("%s: notify for unimplemented queue ID %d", 1409 __func__, dev->cfg.queue_notify); 1410 ret = 0; 1411 break; 1412 } 1413 1414 return (ret); 1415 } 1416 1417 /* 1418 * Must be called with dev->mutex acquired. 1419 * 1420 * XXX cant trust ring data from VM, be extra cautious. 1421 */ 1422 int 1423 vionet_notify_tx(struct vionet_dev *dev) 1424 { 1425 uint64_t q_gpa; 1426 uint32_t vr_sz; 1427 uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx, cnt; 1428 size_t pktsz; 1429 ssize_t dhcpsz; 1430 int ret, num_enq, ofs, spc; 1431 char *vr, *pkt, *dhcppkt; 1432 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1433 struct vring_avail *avail; 1434 struct vring_used *used; 1435 struct ether_header *eh; 1436 1437 dhcpsz = 0; 1438 vr = pkt = dhcppkt = NULL; 1439 ret = spc = 0; 1440 1441 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1442 q_gpa = dev->vq[TXQ].qa; 1443 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1444 1445 vr = calloc(1, vr_sz); 1446 if (vr == NULL) { 1447 log_warn("calloc error getting vionet ring"); 1448 goto out; 1449 } 1450 1451 if (read_mem(q_gpa, vr, vr_sz)) { 1452 log_warnx("error reading gpa 0x%llx", q_gpa); 1453 goto out; 1454 } 1455 1456 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1457 desc = (struct vring_desc *)(vr); 1458 avail = (struct vring_avail *)(vr + dev->vq[TXQ].vq_availoffset); 1459 used = (struct vring_used *)(vr + dev->vq[TXQ].vq_usedoffset); 1460 1461 num_enq = 0; 1462 1463 idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; 1464 1465 if ((avail->idx & VIONET_QUEUE_MASK) == idx) { 1466 log_warnx("vionet tx queue notify - nothing to do?"); 1467 goto out; 1468 } 1469 1470 while ((avail->idx & VIONET_QUEUE_MASK) != idx) { 1471 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1472 hdr_desc = &desc[hdr_desc_idx]; 1473 pktsz = 0; 1474 1475 cnt = 0; 1476 dxx = hdr_desc_idx; 1477 do { 1478 pktsz += desc[dxx].len; 1479 dxx = desc[dxx].next; 1480 1481 /* 1482 * Virtio 1.0, cs04, section 2.4.5: 1483 * "The number of descriptors in the table is defined 1484 * by the queue size for this virtqueue: this is the 1485 * maximum possible descriptor chain length." 1486 */ 1487 if (++cnt >= VIONET_QUEUE_SIZE) { 1488 log_warnx("%s: descriptor table invalid", 1489 __func__); 1490 goto out; 1491 } 1492 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 1493 1494 pktsz += desc[dxx].len; 1495 1496 /* Remove virtio header descriptor len */ 1497 pktsz -= hdr_desc->len; 1498 1499 /* Only allow buffer len < max IP packet + Ethernet header */ 1500 if (pktsz > IP_MAXPACKET + ETHER_HDR_LEN) { 1501 log_warnx("%s: invalid packet size %lu", __func__, 1502 pktsz); 1503 goto out; 1504 } 1505 pkt = malloc(pktsz); 1506 if (pkt == NULL) { 1507 log_warn("malloc error alloc packet buf"); 1508 goto out; 1509 } 1510 1511 ofs = 0; 1512 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 1513 pkt_desc = &desc[pkt_desc_idx]; 1514 1515 while (pkt_desc->flags & VRING_DESC_F_NEXT) { 1516 /* must be not writable */ 1517 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1518 log_warnx("unexpected writable tx desc " 1519 "%d", pkt_desc_idx); 1520 goto out; 1521 } 1522 1523 /* Read packet from descriptor ring */ 1524 if (read_mem(pkt_desc->addr, pkt + ofs, 1525 pkt_desc->len)) { 1526 log_warnx("vionet: packet read_mem error " 1527 "@ 0x%llx", pkt_desc->addr); 1528 goto out; 1529 } 1530 1531 ofs += pkt_desc->len; 1532 pkt_desc_idx = pkt_desc->next & VIONET_QUEUE_MASK; 1533 pkt_desc = &desc[pkt_desc_idx]; 1534 } 1535 1536 /* Now handle tail descriptor - must be not writable */ 1537 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1538 log_warnx("unexpected writable tx descriptor %d", 1539 pkt_desc_idx); 1540 goto out; 1541 } 1542 1543 /* Read packet from descriptor ring */ 1544 if (read_mem(pkt_desc->addr, pkt + ofs, 1545 pkt_desc->len)) { 1546 log_warnx("vionet: packet read_mem error @ " 1547 "0x%llx", pkt_desc->addr); 1548 goto out; 1549 } 1550 1551 /* reject other source addresses */ 1552 if (dev->lockedmac && pktsz >= ETHER_HDR_LEN && 1553 (eh = (struct ether_header *)pkt) && 1554 memcmp(eh->ether_shost, dev->mac, 1555 sizeof(eh->ether_shost)) != 0) 1556 log_debug("vionet: wrong source address %s for vm %d", 1557 ether_ntoa((struct ether_addr *) 1558 eh->ether_shost), dev->vm_id); 1559 else if (dev->local && 1560 (dhcpsz = dhcp_request(dev, pkt, pktsz, &dhcppkt)) != -1) { 1561 log_debug("vionet: dhcp request," 1562 " local response size %zd", dhcpsz); 1563 1564 /* XXX signed vs unsigned here, funky cast */ 1565 } else if (write(dev->fd, pkt, pktsz) != (int)pktsz) { 1566 log_warnx("vionet: tx failed writing to tap: " 1567 "%d", errno); 1568 goto out; 1569 } 1570 1571 ret = 1; 1572 dev->cfg.isr_status = 1; 1573 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; 1574 used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len; 1575 used->idx++; 1576 1577 dev->vq[TXQ].last_avail++; 1578 num_enq++; 1579 1580 idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; 1581 1582 free(pkt); 1583 pkt = NULL; 1584 } 1585 1586 if (write_mem(q_gpa, vr, vr_sz)) { 1587 log_warnx("vionet: tx error writing vio ring"); 1588 } 1589 1590 if (dhcpsz > 0) { 1591 if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc)) 1592 ret = 1; 1593 } 1594 1595 out: 1596 free(vr); 1597 free(pkt); 1598 free(dhcppkt); 1599 1600 return (ret); 1601 } 1602 1603 int 1604 vmmci_ctl(unsigned int cmd) 1605 { 1606 struct timeval tv = { 0, 0 }; 1607 1608 if ((vmmci.cfg.device_status & 1609 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) 1610 return (-1); 1611 1612 if (cmd == vmmci.cmd) 1613 return (0); 1614 1615 switch (cmd) { 1616 case VMMCI_NONE: 1617 break; 1618 case VMMCI_SHUTDOWN: 1619 case VMMCI_REBOOT: 1620 /* Update command */ 1621 vmmci.cmd = cmd; 1622 1623 /* 1624 * vmm VMs do not support powerdown, send a reboot request 1625 * instead and turn it off after the triple fault. 1626 */ 1627 if (cmd == VMMCI_SHUTDOWN) 1628 cmd = VMMCI_REBOOT; 1629 1630 /* Trigger interrupt */ 1631 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1632 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1633 1634 /* Add ACK timeout */ 1635 tv.tv_sec = VMMCI_TIMEOUT; 1636 evtimer_add(&vmmci.timeout, &tv); 1637 break; 1638 case VMMCI_SYNCRTC: 1639 if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) { 1640 /* RTC updated, request guest VM resync of its RTC */ 1641 vmmci.cmd = cmd; 1642 1643 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1644 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1645 } else { 1646 log_debug("%s: RTC sync skipped (guest does not " 1647 "support RTC sync)\n", __func__); 1648 } 1649 break; 1650 default: 1651 fatalx("invalid vmmci command: %d", cmd); 1652 } 1653 1654 return (0); 1655 } 1656 1657 void 1658 vmmci_ack(unsigned int cmd) 1659 { 1660 struct timeval tv = { 0, 0 }; 1661 1662 switch (cmd) { 1663 case VMMCI_NONE: 1664 break; 1665 case VMMCI_SHUTDOWN: 1666 /* 1667 * The shutdown was requested by the VM if we don't have 1668 * a pending shutdown request. In this case add a short 1669 * timeout to give the VM a chance to reboot before the 1670 * timer is expired. 1671 */ 1672 if (vmmci.cmd == 0) { 1673 log_debug("%s: vm %u requested shutdown", __func__, 1674 vmmci.vm_id); 1675 tv.tv_sec = VMMCI_TIMEOUT; 1676 evtimer_add(&vmmci.timeout, &tv); 1677 return; 1678 } 1679 /* FALLTHROUGH */ 1680 case VMMCI_REBOOT: 1681 /* 1682 * If the VM acknowleged our shutdown request, give it 1683 * enough time to shutdown or reboot gracefully. This 1684 * might take a considerable amount of time (running 1685 * rc.shutdown on the VM), so increase the timeout before 1686 * killing it forcefully. 1687 */ 1688 if (cmd == vmmci.cmd && 1689 evtimer_pending(&vmmci.timeout, NULL)) { 1690 log_debug("%s: vm %u acknowledged shutdown request", 1691 __func__, vmmci.vm_id); 1692 tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT; 1693 evtimer_add(&vmmci.timeout, &tv); 1694 } 1695 break; 1696 case VMMCI_SYNCRTC: 1697 log_debug("%s: vm %u acknowledged RTC sync request", 1698 __func__, vmmci.vm_id); 1699 vmmci.cmd = VMMCI_NONE; 1700 break; 1701 default: 1702 log_warnx("%s: illegal request %u", __func__, cmd); 1703 break; 1704 } 1705 } 1706 1707 void 1708 vmmci_timeout(int fd, short type, void *arg) 1709 { 1710 log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); 1711 vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); 1712 } 1713 1714 int 1715 vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 1716 void *unused, uint8_t sz) 1717 { 1718 *intr = 0xFF; 1719 1720 if (dir == 0) { 1721 switch (reg) { 1722 case VIRTIO_CONFIG_DEVICE_FEATURES: 1723 case VIRTIO_CONFIG_QUEUE_SIZE: 1724 case VIRTIO_CONFIG_ISR_STATUS: 1725 log_warnx("%s: illegal write %x to %s", 1726 __progname, *data, virtio_reg_name(reg)); 1727 break; 1728 case VIRTIO_CONFIG_GUEST_FEATURES: 1729 vmmci.cfg.guest_feature = *data; 1730 break; 1731 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1732 vmmci.cfg.queue_address = *data; 1733 break; 1734 case VIRTIO_CONFIG_QUEUE_SELECT: 1735 vmmci.cfg.queue_select = *data; 1736 break; 1737 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1738 vmmci.cfg.queue_notify = *data; 1739 break; 1740 case VIRTIO_CONFIG_DEVICE_STATUS: 1741 vmmci.cfg.device_status = *data; 1742 break; 1743 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1744 vmmci_ack(*data); 1745 break; 1746 } 1747 } else { 1748 switch (reg) { 1749 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1750 *data = vmmci.cmd; 1751 break; 1752 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 1753 /* Update time once when reading the first register */ 1754 gettimeofday(&vmmci.time, NULL); 1755 *data = (uint64_t)vmmci.time.tv_sec; 1756 break; 1757 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 1758 *data = (uint64_t)vmmci.time.tv_sec << 32; 1759 break; 1760 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: 1761 *data = (uint64_t)vmmci.time.tv_usec; 1762 break; 1763 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: 1764 *data = (uint64_t)vmmci.time.tv_usec << 32; 1765 break; 1766 case VIRTIO_CONFIG_DEVICE_FEATURES: 1767 *data = vmmci.cfg.device_feature; 1768 break; 1769 case VIRTIO_CONFIG_GUEST_FEATURES: 1770 *data = vmmci.cfg.guest_feature; 1771 break; 1772 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1773 *data = vmmci.cfg.queue_address; 1774 break; 1775 case VIRTIO_CONFIG_QUEUE_SIZE: 1776 *data = vmmci.cfg.queue_size; 1777 break; 1778 case VIRTIO_CONFIG_QUEUE_SELECT: 1779 *data = vmmci.cfg.queue_select; 1780 break; 1781 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1782 *data = vmmci.cfg.queue_notify; 1783 break; 1784 case VIRTIO_CONFIG_DEVICE_STATUS: 1785 *data = vmmci.cfg.device_status; 1786 break; 1787 case VIRTIO_CONFIG_ISR_STATUS: 1788 *data = vmmci.cfg.isr_status; 1789 vmmci.cfg.isr_status = 0; 1790 vcpu_deassert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1791 break; 1792 } 1793 } 1794 return (0); 1795 } 1796 1797 int 1798 virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath) 1799 { 1800 switch (type) { 1801 case VMDF_RAW: 1802 return 0; 1803 case VMDF_QCOW2: 1804 return virtio_qcow2_get_base(fd, path, npath, dpath); 1805 } 1806 log_warnx("%s: invalid disk format", __func__); 1807 return -1; 1808 } 1809 1810 /* 1811 * Initializes a struct virtio_backing using the list of fds. 1812 */ 1813 static int 1814 virtio_init_disk(struct virtio_backing *file, off_t *sz, 1815 int *fd, size_t nfd, int type) 1816 { 1817 /* 1818 * probe disk types in order of preference, first one to work wins. 1819 * TODO: provide a way of specifying the type and options. 1820 */ 1821 switch (type) { 1822 case VMDF_RAW: 1823 return virtio_raw_init(file, sz, fd, nfd); 1824 case VMDF_QCOW2: 1825 return virtio_qcow2_init(file, sz, fd, nfd); 1826 } 1827 log_warnx("%s: invalid disk format", __func__); 1828 return -1; 1829 } 1830 1831 void 1832 virtio_init(struct vmd_vm *vm, int child_cdrom, 1833 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 1834 { 1835 struct vmop_create_params *vmc = &vm->vm_params; 1836 struct vm_create_params *vcp = &vmc->vmc_params; 1837 uint8_t id; 1838 uint8_t i; 1839 int ret; 1840 1841 /* Virtio entropy device */ 1842 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1843 PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, 1844 PCI_SUBCLASS_SYSTEM_MISC, 1845 PCI_VENDOR_OPENBSD, 1846 PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { 1847 log_warnx("%s: can't add PCI virtio rng device", 1848 __progname); 1849 return; 1850 } 1851 1852 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { 1853 log_warnx("%s: can't add bar for virtio rng device", 1854 __progname); 1855 return; 1856 } 1857 1858 memset(&viornd, 0, sizeof(viornd)); 1859 viornd.vq[0].qs = VIORND_QUEUE_SIZE; 1860 viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * 1861 VIORND_QUEUE_SIZE; 1862 viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1863 sizeof(struct vring_desc) * VIORND_QUEUE_SIZE 1864 + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); 1865 viornd.pci_id = id; 1866 viornd.irq = pci_get_dev_irq(id); 1867 viornd.vm_id = vcp->vcp_id; 1868 1869 if (vcp->vcp_nnics > 0) { 1870 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 1871 if (vionet == NULL) { 1872 log_warn("%s: calloc failure allocating vionets", 1873 __progname); 1874 return; 1875 } 1876 1877 nr_vionet = vcp->vcp_nnics; 1878 /* Virtio network */ 1879 for (i = 0; i < vcp->vcp_nnics; i++) { 1880 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1881 PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, 1882 PCI_SUBCLASS_SYSTEM_MISC, 1883 PCI_VENDOR_OPENBSD, 1884 PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { 1885 log_warnx("%s: can't add PCI virtio net device", 1886 __progname); 1887 return; 1888 } 1889 1890 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io, 1891 &vionet[i])) { 1892 log_warnx("%s: can't add bar for virtio net " 1893 "device", __progname); 1894 return; 1895 } 1896 1897 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 1898 if (ret) { 1899 errno = ret; 1900 log_warn("%s: could not initialize mutex " 1901 "for vionet device", __progname); 1902 return; 1903 } 1904 1905 vionet[i].vq[RXQ].qs = VIONET_QUEUE_SIZE; 1906 vionet[i].vq[RXQ].vq_availoffset = 1907 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1908 vionet[i].vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1909 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1910 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1911 vionet[i].vq[RXQ].last_avail = 0; 1912 vionet[i].vq[TXQ].qs = VIONET_QUEUE_SIZE; 1913 vionet[i].vq[TXQ].vq_availoffset = 1914 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1915 vionet[i].vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1916 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1917 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1918 vionet[i].vq[TXQ].last_avail = 0; 1919 vionet[i].vq[TXQ].notified_avail = 0; 1920 vionet[i].fd = child_taps[i]; 1921 vionet[i].rx_pending = 0; 1922 vionet[i].vm_id = vcp->vcp_id; 1923 vionet[i].vm_vmid = vm->vm_vmid; 1924 vionet[i].irq = pci_get_dev_irq(id); 1925 1926 event_set(&vionet[i].event, vionet[i].fd, 1927 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 1928 if (event_add(&vionet[i].event, NULL)) { 1929 log_warn("could not initialize vionet event " 1930 "handler"); 1931 return; 1932 } 1933 1934 /* MAC address has been assigned by the parent */ 1935 memcpy(&vionet[i].mac, &vcp->vcp_macs[i], 6); 1936 vionet[i].cfg.device_feature = VIRTIO_NET_F_MAC; 1937 1938 vionet[i].lockedmac = 1939 vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; 1940 vionet[i].local = 1941 vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0; 1942 if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET) 1943 vionet[i].pxeboot = 1; 1944 vionet[i].idx = i; 1945 vionet[i].pci_id = id; 1946 1947 log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s", 1948 __func__, vcp->vcp_name, i, 1949 ether_ntoa((void *)vionet[i].mac), 1950 vionet[i].lockedmac ? ", locked" : "", 1951 vionet[i].local ? ", local" : "", 1952 vionet[i].pxeboot ? ", pxeboot" : ""); 1953 } 1954 } 1955 1956 if (vcp->vcp_ndisks > 0) { 1957 nr_vioblk = vcp->vcp_ndisks; 1958 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 1959 if (vioblk == NULL) { 1960 log_warn("%s: calloc failure allocating vioblks", 1961 __progname); 1962 return; 1963 } 1964 1965 /* One virtio block device for each disk defined in vcp */ 1966 for (i = 0; i < vcp->vcp_ndisks; i++) { 1967 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1968 PCI_PRODUCT_QUMRANET_VIO_BLOCK, 1969 PCI_CLASS_MASS_STORAGE, 1970 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1971 PCI_VENDOR_OPENBSD, 1972 PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { 1973 log_warnx("%s: can't add PCI virtio block " 1974 "device", __progname); 1975 return; 1976 } 1977 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io, 1978 &vioblk[i])) { 1979 log_warnx("%s: can't add bar for virtio block " 1980 "device", __progname); 1981 return; 1982 } 1983 vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; 1984 vioblk[i].vq[0].vq_availoffset = 1985 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; 1986 vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1987 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE 1988 + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); 1989 vioblk[i].vq[0].last_avail = 0; 1990 vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX; 1991 vioblk[i].max_xfer = 1048576; 1992 vioblk[i].pci_id = id; 1993 vioblk[i].vm_id = vcp->vcp_id; 1994 vioblk[i].irq = pci_get_dev_irq(id); 1995 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 1996 child_disks[i], vmc->vmc_diskbases[i], 1997 vmc->vmc_disktypes[i]) == -1) { 1998 log_warnx("%s: unable to determine disk format", 1999 __func__); 2000 return; 2001 } 2002 vioblk[i].sz /= 512; 2003 } 2004 } 2005 2006 /* vioscsi cdrom */ 2007 if (strlen(vcp->vcp_cdrom)) { 2008 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 2009 if (vioscsi == NULL) { 2010 log_warn("%s: calloc failure allocating vioscsi", 2011 __progname); 2012 return; 2013 } 2014 2015 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 2016 PCI_PRODUCT_QUMRANET_VIO_SCSI, 2017 PCI_CLASS_MASS_STORAGE, 2018 PCI_SUBCLASS_MASS_STORAGE_SCSI, 2019 PCI_VENDOR_OPENBSD, 2020 PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) { 2021 log_warnx("%s: can't add PCI vioscsi device", 2022 __progname); 2023 return; 2024 } 2025 2026 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) { 2027 log_warnx("%s: can't add bar for vioscsi device", 2028 __progname); 2029 return; 2030 } 2031 2032 for ( i = 0; i < VIRTIO_MAX_QUEUES; i++) { 2033 vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE; 2034 vioscsi->vq[i].vq_availoffset = 2035 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE; 2036 vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN( 2037 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE 2038 + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE)); 2039 vioscsi->vq[i].last_avail = 0; 2040 } 2041 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, 2042 &child_cdrom, 1, VMDF_RAW) == -1) { 2043 log_warnx("%s: unable to determine iso format", 2044 __func__); 2045 return; 2046 } 2047 vioscsi->locked = 0; 2048 vioscsi->lba = 0; 2049 vioscsi->n_blocks = vioscsi->sz >> 11; /* num of 2048 blocks in file */ 2050 vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM; 2051 vioscsi->pci_id = id; 2052 vioscsi->vm_id = vcp->vcp_id; 2053 vioscsi->irq = pci_get_dev_irq(id); 2054 } 2055 2056 /* virtio control device */ 2057 if (pci_add_device(&id, PCI_VENDOR_OPENBSD, 2058 PCI_PRODUCT_OPENBSD_CONTROL, 2059 PCI_CLASS_COMMUNICATIONS, 2060 PCI_SUBCLASS_COMMUNICATIONS_MISC, 2061 PCI_VENDOR_OPENBSD, 2062 PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) { 2063 log_warnx("%s: can't add PCI vmm control device", 2064 __progname); 2065 return; 2066 } 2067 2068 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) { 2069 log_warnx("%s: can't add bar for vmm control device", 2070 __progname); 2071 return; 2072 } 2073 2074 memset(&vmmci, 0, sizeof(vmmci)); 2075 vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK | 2076 VMMCI_F_SYNCRTC; 2077 vmmci.vm_id = vcp->vcp_id; 2078 vmmci.irq = pci_get_dev_irq(id); 2079 vmmci.pci_id = id; 2080 2081 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 2082 } 2083 2084 /* 2085 * vionet_set_hostmac 2086 * 2087 * Sets the hardware address for the host-side tap(4) on a vionet_dev. 2088 * 2089 * This should only be called from the event-loop thread 2090 * 2091 * vm: pointer to the current vmd_vm instance 2092 * idx: index into the array of vionet_dev's for the target vionet_dev 2093 * addr: ethernet address to set 2094 */ 2095 void 2096 vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr) 2097 { 2098 struct vmop_create_params *vmc = &vm->vm_params; 2099 struct vm_create_params *vcp = &vmc->vmc_params; 2100 struct vionet_dev *dev; 2101 2102 if (idx > vcp->vcp_nnics) 2103 fatalx("vionet_set_hostmac"); 2104 2105 dev = &vionet[idx]; 2106 memcpy(dev->hostmac, addr, sizeof(dev->hostmac)); 2107 } 2108 2109 void 2110 virtio_shutdown(struct vmd_vm *vm) 2111 { 2112 int i; 2113 2114 /* ensure that our disks are synced */ 2115 if (vioscsi != NULL) 2116 vioscsi->file.close(vioscsi->file.p, 0); 2117 2118 for (i = 0; i < nr_vioblk; i++) 2119 vioblk[i].file.close(vioblk[i].file.p, 0); 2120 } 2121 2122 int 2123 vmmci_restore(int fd, uint32_t vm_id) 2124 { 2125 log_debug("%s: receiving vmmci", __func__); 2126 if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 2127 log_warnx("%s: error reading vmmci from fd", __func__); 2128 return (-1); 2129 } 2130 2131 if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) { 2132 log_warnx("%s: can't set bar fn for vmm control device", 2133 __progname); 2134 return (-1); 2135 } 2136 vmmci.vm_id = vm_id; 2137 vmmci.irq = pci_get_dev_irq(vmmci.pci_id); 2138 memset(&vmmci.timeout, 0, sizeof(struct event)); 2139 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 2140 return (0); 2141 } 2142 2143 int 2144 viornd_restore(int fd, struct vm_create_params *vcp) 2145 { 2146 log_debug("%s: receiving viornd", __func__); 2147 if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2148 log_warnx("%s: error reading viornd from fd", __func__); 2149 return (-1); 2150 } 2151 if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) { 2152 log_warnx("%s: can't set bar fn for virtio rng device", 2153 __progname); 2154 return (-1); 2155 } 2156 viornd.vm_id = vcp->vcp_id; 2157 viornd.irq = pci_get_dev_irq(viornd.pci_id); 2158 2159 return (0); 2160 } 2161 2162 int 2163 vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) 2164 { 2165 struct vmop_create_params *vmc = &vm->vm_params; 2166 struct vm_create_params *vcp = &vmc->vmc_params; 2167 uint8_t i; 2168 int ret; 2169 2170 nr_vionet = vcp->vcp_nnics; 2171 if (vcp->vcp_nnics > 0) { 2172 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 2173 if (vionet == NULL) { 2174 log_warn("%s: calloc failure allocating vionets", 2175 __progname); 2176 return (-1); 2177 } 2178 log_debug("%s: receiving vionet", __func__); 2179 if (atomicio(read, fd, vionet, 2180 vcp->vcp_nnics * sizeof(struct vionet_dev)) != 2181 vcp->vcp_nnics * sizeof(struct vionet_dev)) { 2182 log_warnx("%s: error reading vionet from fd", 2183 __func__); 2184 return (-1); 2185 } 2186 2187 /* Virtio network */ 2188 for (i = 0; i < vcp->vcp_nnics; i++) { 2189 if (pci_set_bar_fn(vionet[i].pci_id, 0, virtio_net_io, 2190 &vionet[i])) { 2191 log_warnx("%s: can't set bar fn for virtio net " 2192 "device", __progname); 2193 return (-1); 2194 } 2195 2196 memset(&vionet[i].mutex, 0, sizeof(pthread_mutex_t)); 2197 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 2198 2199 if (ret) { 2200 errno = ret; 2201 log_warn("%s: could not initialize mutex " 2202 "for vionet device", __progname); 2203 return (-1); 2204 } 2205 vionet[i].fd = child_taps[i]; 2206 vionet[i].rx_pending = 0; 2207 vionet[i].vm_id = vcp->vcp_id; 2208 vionet[i].vm_vmid = vm->vm_vmid; 2209 vionet[i].irq = pci_get_dev_irq(vionet[i].pci_id); 2210 2211 memset(&vionet[i].event, 0, sizeof(struct event)); 2212 event_set(&vionet[i].event, vionet[i].fd, 2213 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 2214 } 2215 } 2216 return (0); 2217 } 2218 2219 int 2220 vioblk_restore(int fd, struct vmop_create_params *vmc, 2221 int child_disks[][VM_MAX_BASE_PER_DISK]) 2222 { 2223 struct vm_create_params *vcp = &vmc->vmc_params; 2224 uint8_t i; 2225 2226 nr_vioblk = vcp->vcp_ndisks; 2227 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 2228 if (vioblk == NULL) { 2229 log_warn("%s: calloc failure allocating vioblks", __progname); 2230 return (-1); 2231 } 2232 log_debug("%s: receiving vioblk", __func__); 2233 if (atomicio(read, fd, vioblk, 2234 nr_vioblk * sizeof(struct vioblk_dev)) != 2235 nr_vioblk * sizeof(struct vioblk_dev)) { 2236 log_warnx("%s: error reading vioblk from fd", __func__); 2237 return (-1); 2238 } 2239 for (i = 0; i < vcp->vcp_ndisks; i++) { 2240 if (pci_set_bar_fn(vioblk[i].pci_id, 0, virtio_blk_io, 2241 &vioblk[i])) { 2242 log_warnx("%s: can't set bar fn for virtio block " 2243 "device", __progname); 2244 return (-1); 2245 } 2246 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 2247 child_disks[i], vmc->vmc_diskbases[i], 2248 vmc->vmc_disktypes[i]) == -1) { 2249 log_warnx("%s: unable to determine disk format", 2250 __func__); 2251 return (-1); 2252 } 2253 vioblk[i].vm_id = vcp->vcp_id; 2254 vioblk[i].irq = pci_get_dev_irq(vioblk[i].pci_id); 2255 } 2256 return (0); 2257 } 2258 2259 int 2260 vioscsi_restore(int fd, struct vm_create_params *vcp, int child_cdrom) 2261 { 2262 if (!strlen(vcp->vcp_cdrom)) 2263 return (0); 2264 2265 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 2266 if (vioscsi == NULL) { 2267 log_warn("%s: calloc failure allocating vioscsi", __progname); 2268 return (-1); 2269 } 2270 2271 log_debug("%s: receiving vioscsi", __func__); 2272 2273 if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2274 sizeof(struct vioscsi_dev)) { 2275 log_warnx("%s: error reading vioscsi from fd", __func__); 2276 return (-1); 2277 } 2278 2279 if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) { 2280 log_warnx("%s: can't set bar fn for vmm control device", 2281 __progname); 2282 return (-1); 2283 } 2284 2285 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1, 2286 VMDF_RAW) == -1) { 2287 log_warnx("%s: unable to determine iso format", __func__); 2288 return (-1); 2289 } 2290 vioscsi->vm_id = vcp->vcp_id; 2291 vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id); 2292 2293 return (0); 2294 } 2295 2296 int 2297 virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, 2298 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 2299 { 2300 struct vmop_create_params *vmc = &vm->vm_params; 2301 struct vm_create_params *vcp = &vmc->vmc_params; 2302 int ret; 2303 2304 if ((ret = viornd_restore(fd, vcp)) == -1) 2305 return ret; 2306 2307 if ((ret = vioblk_restore(fd, vmc, child_disks)) == -1) 2308 return ret; 2309 2310 if ((ret = vioscsi_restore(fd, vcp, child_cdrom)) == -1) 2311 return ret; 2312 2313 if ((ret = vionet_restore(fd, vm, child_taps)) == -1) 2314 return ret; 2315 2316 if ((ret = vmmci_restore(fd, vcp->vcp_id)) == -1) 2317 return ret; 2318 2319 return (0); 2320 } 2321 2322 int 2323 viornd_dump(int fd) 2324 { 2325 log_debug("%s: sending viornd", __func__); 2326 if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2327 log_warnx("%s: error writing viornd to fd", __func__); 2328 return (-1); 2329 } 2330 return (0); 2331 } 2332 2333 int 2334 vmmci_dump(int fd) 2335 { 2336 log_debug("%s: sending vmmci", __func__); 2337 if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 2338 log_warnx("%s: error writing vmmci to fd", __func__); 2339 return (-1); 2340 } 2341 return (0); 2342 } 2343 2344 int 2345 vionet_dump(int fd) 2346 { 2347 log_debug("%s: sending vionet", __func__); 2348 if (atomicio(vwrite, fd, vionet, 2349 nr_vionet * sizeof(struct vionet_dev)) != 2350 nr_vionet * sizeof(struct vionet_dev)) { 2351 log_warnx("%s: error writing vionet to fd", __func__); 2352 return (-1); 2353 } 2354 return (0); 2355 } 2356 2357 int 2358 vioblk_dump(int fd) 2359 { 2360 log_debug("%s: sending vioblk", __func__); 2361 if (atomicio(vwrite, fd, vioblk, 2362 nr_vioblk * sizeof(struct vioblk_dev)) != 2363 nr_vioblk * sizeof(struct vioblk_dev)) { 2364 log_warnx("%s: error writing vioblk to fd", __func__); 2365 return (-1); 2366 } 2367 return (0); 2368 } 2369 2370 int 2371 vioscsi_dump(int fd) 2372 { 2373 if (vioscsi == NULL) 2374 return (0); 2375 2376 log_debug("%s: sending vioscsi", __func__); 2377 if (atomicio(vwrite, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2378 sizeof(struct vioscsi_dev)) { 2379 log_warnx("%s: error writing vioscsi to fd", __func__); 2380 return (-1); 2381 } 2382 return (0); 2383 } 2384 2385 int 2386 virtio_dump(int fd) 2387 { 2388 int ret; 2389 2390 if ((ret = viornd_dump(fd)) == -1) 2391 return ret; 2392 2393 if ((ret = vioblk_dump(fd)) == -1) 2394 return ret; 2395 2396 if ((ret = vioscsi_dump(fd)) == -1) 2397 return ret; 2398 2399 if ((ret = vionet_dump(fd)) == -1) 2400 return ret; 2401 2402 if ((ret = vmmci_dump(fd)) == -1) 2403 return ret; 2404 2405 return (0); 2406 } 2407 2408 void 2409 virtio_stop(struct vm_create_params *vcp) 2410 { 2411 uint8_t i; 2412 for (i = 0; i < vcp->vcp_nnics; i++) { 2413 if (event_del(&vionet[i].event)) { 2414 log_warn("could not initialize vionet event " 2415 "handler"); 2416 return; 2417 } 2418 } 2419 } 2420 2421 void 2422 virtio_start(struct vm_create_params *vcp) 2423 { 2424 uint8_t i; 2425 for (i = 0; i < vcp->vcp_nnics; i++) { 2426 if (event_add(&vionet[i].event, NULL)) { 2427 log_warn("could not initialize vionet event " 2428 "handler"); 2429 return; 2430 } 2431 } 2432 } 2433