1*7ccb23ddSdv /* $OpenBSD: virtio.c,v 1.123 2025/01/08 15:46:10 dv Exp $ */ 215caf263Sreyk 3f3c0184aSmlarkin /* 4f3c0184aSmlarkin * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5f3c0184aSmlarkin * 6f3c0184aSmlarkin * Permission to use, copy, modify, and distribute this software for any 7f3c0184aSmlarkin * purpose with or without fee is hereby granted, provided that the above 8f3c0184aSmlarkin * copyright notice and this permission notice appear in all copies. 9f3c0184aSmlarkin * 10f3c0184aSmlarkin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11f3c0184aSmlarkin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12f3c0184aSmlarkin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13f3c0184aSmlarkin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14f3c0184aSmlarkin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15f3c0184aSmlarkin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16f3c0184aSmlarkin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17f3c0184aSmlarkin */ 18f3c0184aSmlarkin 1987302766Sderaadt #include <sys/param.h> /* PAGE_SIZE */ 202fca0731Sreyk #include <sys/socket.h> 213481ecdfSdv #include <sys/wait.h> 2287302766Sderaadt 2387302766Sderaadt #include <dev/pci/pcireg.h> 2487302766Sderaadt #include <dev/pci/pcidevs.h> 251350bd58Sreyk #include <dev/pv/virtioreg.h> 2611b9cb3bSsf #include <dev/pci/virtio_pcireg.h> 271350bd58Sreyk #include <dev/pv/vioblkreg.h> 28ba66f564Sdv #include <dev/vmm/vmm.h> 2987302766Sderaadt 302fca0731Sreyk #include <net/if.h> 312fca0731Sreyk #include <netinet/in.h> 322fca0731Sreyk #include <netinet/if_ether.h> 332fca0731Sreyk 34f3c0184aSmlarkin #include <errno.h> 35c396b7b1Sstefan #include <event.h> 36f3c0184aSmlarkin #include <stdlib.h> 37f3c0184aSmlarkin #include <string.h> 38f3c0184aSmlarkin #include <unistd.h> 3987302766Sderaadt 406eb4c859Sdv #include "atomicio.h" 41f3c0184aSmlarkin #include "pci.h" 426eb4c859Sdv #include "vioscsi.h" 436eb4c859Sdv #include "virtio.h" 44f3c0184aSmlarkin #include "vmd.h" 45f3c0184aSmlarkin 463481ecdfSdv extern struct vmd *env; 47f3c0184aSmlarkin extern char *__progname; 483481ecdfSdv 49f3c0184aSmlarkin struct viornd_dev viornd; 5095ab188fSccardenas struct vioscsi_dev *vioscsi; 51f84d5d33Sreyk struct vmmci_dev vmmci; 52f3c0184aSmlarkin 533481ecdfSdv /* Devices emulated in subprocesses are inserted into this list. */ 543481ecdfSdv SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs; 55f3c0184aSmlarkin 56f3c0184aSmlarkin #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ 57f3c0184aSmlarkin 58f3c0184aSmlarkin #define VIRTIO_NET_F_MAC (1<<5) 59f3c0184aSmlarkin 60981cad08Sreyk #define VMMCI_F_TIMESYNC (1<<0) 613320a88dSreyk #define VMMCI_F_ACK (1<<1) 62e82d5294Smlarkin #define VMMCI_F_SYNCRTC (1<<2) 63981cad08Sreyk 640686b443Smlarkin #define RXQ 0 650686b443Smlarkin #define TXQ 1 660686b443Smlarkin 673481ecdfSdv static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *); 683481ecdfSdv static void virtio_dispatch_dev(int, short, void *); 693481ecdfSdv static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *); 70b3bc6112Sdv static int virtio_dev_closefds(struct virtio_dev *); 71*7ccb23ddSdv static void vmmci_pipe_dispatch(int, short, void *); 72f3c0184aSmlarkin 733481ecdfSdv const char * 74f3c0184aSmlarkin virtio_reg_name(uint8_t reg) 75f3c0184aSmlarkin { 76f3c0184aSmlarkin switch (reg) { 77f3c0184aSmlarkin case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; 78f3c0184aSmlarkin case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; 790bd10b9fSdv case VIRTIO_CONFIG_QUEUE_PFN: return "queue address"; 80f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; 81f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; 82f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; 83f3c0184aSmlarkin case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; 84f3c0184aSmlarkin case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; 853481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI...VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 863481ecdfSdv return "device config 0"; 873481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 883481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 893481ecdfSdv return "device config 1"; 90f3c0184aSmlarkin case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2"; 91981cad08Sreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3"; 92981cad08Sreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4"; 93f3c0184aSmlarkin default: return "unknown"; 94f3c0184aSmlarkin } 95f3c0184aSmlarkin } 96f3c0184aSmlarkin 97f3c0184aSmlarkin uint32_t 98f3c0184aSmlarkin vring_size(uint32_t vq_size) 99f3c0184aSmlarkin { 100f3c0184aSmlarkin uint32_t allocsize1, allocsize2; 101f3c0184aSmlarkin 102f3c0184aSmlarkin /* allocsize1: descriptor table + avail ring + pad */ 103f3c0184aSmlarkin allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size 104f3c0184aSmlarkin + sizeof(uint16_t) * (2 + vq_size)); 105f3c0184aSmlarkin /* allocsize2: used ring + pad */ 106f3c0184aSmlarkin allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2 107f3c0184aSmlarkin + sizeof(struct vring_used_elem) * vq_size); 108f3c0184aSmlarkin 109f3c0184aSmlarkin return allocsize1 + allocsize2; 110f3c0184aSmlarkin } 111f3c0184aSmlarkin 112f3c0184aSmlarkin /* Update queue select */ 113f3c0184aSmlarkin void 114f3c0184aSmlarkin viornd_update_qs(void) 115f3c0184aSmlarkin { 1160bd10b9fSdv struct virtio_vq_info *vq_info; 1170bd10b9fSdv 118f3c0184aSmlarkin /* Invalid queue? */ 119169cf2a2Smlarkin if (viornd.cfg.queue_select > 0) { 120169cf2a2Smlarkin viornd.cfg.queue_size = 0; 121f3c0184aSmlarkin return; 122169cf2a2Smlarkin } 123f3c0184aSmlarkin 1240bd10b9fSdv vq_info = &viornd.vq[viornd.cfg.queue_select]; 1250bd10b9fSdv 1260bd10b9fSdv /* Update queue pfn/size based on queue select */ 1270bd10b9fSdv viornd.cfg.queue_pfn = vq_info->q_gpa >> 12; 1280bd10b9fSdv viornd.cfg.queue_size = vq_info->qs; 129f3c0184aSmlarkin } 130f3c0184aSmlarkin 131f3c0184aSmlarkin /* Update queue address */ 132f3c0184aSmlarkin void 133f3c0184aSmlarkin viornd_update_qa(void) 134f3c0184aSmlarkin { 1350bd10b9fSdv struct virtio_vq_info *vq_info; 1360bd10b9fSdv void *hva = NULL; 1370bd10b9fSdv 138f3c0184aSmlarkin /* Invalid queue? */ 139f3c0184aSmlarkin if (viornd.cfg.queue_select > 0) 140f3c0184aSmlarkin return; 141f3c0184aSmlarkin 1420bd10b9fSdv vq_info = &viornd.vq[viornd.cfg.queue_select]; 1430bd10b9fSdv vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE; 1440bd10b9fSdv 1450bd10b9fSdv hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE)); 1460bd10b9fSdv if (hva == NULL) 1473481ecdfSdv fatalx("viornd_update_qa"); 1480bd10b9fSdv vq_info->q_hva = hva; 149f3c0184aSmlarkin } 150f3c0184aSmlarkin 151f3c0184aSmlarkin int 152f3c0184aSmlarkin viornd_notifyq(void) 153f3c0184aSmlarkin { 15492e4f4b6Smlarkin size_t sz; 155321f3ee3Sdv int dxx, ret; 156207349e7Smlarkin uint16_t aidx, uidx; 1570bd10b9fSdv char *vr, *rnd_data; 158f3c0184aSmlarkin struct vring_desc *desc; 159f3c0184aSmlarkin struct vring_avail *avail; 160f3c0184aSmlarkin struct vring_used *used; 1610bd10b9fSdv struct virtio_vq_info *vq_info; 162f3c0184aSmlarkin 163f3c0184aSmlarkin ret = 0; 164f3c0184aSmlarkin 165f3c0184aSmlarkin /* Invalid queue? */ 166f3c0184aSmlarkin if (viornd.cfg.queue_notify > 0) 167f3c0184aSmlarkin return (0); 168f3c0184aSmlarkin 1690bd10b9fSdv vq_info = &viornd.vq[viornd.cfg.queue_notify]; 1700bd10b9fSdv vr = vq_info->q_hva; 1710bd10b9fSdv if (vr == NULL) 1720bd10b9fSdv fatalx("%s: null vring", __func__); 173f3c0184aSmlarkin 1740bd10b9fSdv desc = (struct vring_desc *)(vr); 1750bd10b9fSdv avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 1760bd10b9fSdv used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 177f3c0184aSmlarkin 178207349e7Smlarkin aidx = avail->idx & VIORND_QUEUE_MASK; 179207349e7Smlarkin uidx = used->idx & VIORND_QUEUE_MASK; 180207349e7Smlarkin 181321f3ee3Sdv dxx = avail->ring[aidx] & VIORND_QUEUE_MASK; 182321f3ee3Sdv 183321f3ee3Sdv sz = desc[dxx].len; 18492e4f4b6Smlarkin if (sz > MAXPHYS) 185321f3ee3Sdv fatalx("viornd descriptor size too large (%zu)", sz); 18692e4f4b6Smlarkin 18792e4f4b6Smlarkin rnd_data = malloc(sz); 188f3c0184aSmlarkin 189f3c0184aSmlarkin if (rnd_data != NULL) { 190321f3ee3Sdv arc4random_buf(rnd_data, sz); 191321f3ee3Sdv if (write_mem(desc[dxx].addr, rnd_data, sz)) { 1927da934edSreyk log_warnx("viornd: can't write random data @ " 1937da934edSreyk "0x%llx", 194321f3ee3Sdv desc[dxx].addr); 195f3c0184aSmlarkin } else { 196f3c0184aSmlarkin /* ret == 1 -> interrupt needed */ 197f3c0184aSmlarkin /* XXX check VIRTIO_F_NO_INTR */ 198f3c0184aSmlarkin ret = 1; 199f3c0184aSmlarkin viornd.cfg.isr_status = 1; 200321f3ee3Sdv used->ring[uidx].id = dxx; 201321f3ee3Sdv used->ring[uidx].len = sz; 2020bd10b9fSdv __sync_synchronize(); 203f3c0184aSmlarkin used->idx++; 204f3c0184aSmlarkin } 205f3c0184aSmlarkin free(rnd_data); 20692e4f4b6Smlarkin } else 20792e4f4b6Smlarkin fatal("memory allocation error for viornd data"); 208f3c0184aSmlarkin 209f3c0184aSmlarkin return (ret); 210f3c0184aSmlarkin } 211f3c0184aSmlarkin 212f3c0184aSmlarkin int 213f3c0184aSmlarkin virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 214eef1411cSmlarkin void *unused, uint8_t sz) 215f3c0184aSmlarkin { 216f3c0184aSmlarkin *intr = 0xFF; 217f3c0184aSmlarkin 218f3c0184aSmlarkin if (dir == 0) { 219f3c0184aSmlarkin switch (reg) { 220f3c0184aSmlarkin case VIRTIO_CONFIG_DEVICE_FEATURES: 221f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_SIZE: 222f3c0184aSmlarkin case VIRTIO_CONFIG_ISR_STATUS: 2237da934edSreyk log_warnx("%s: illegal write %x to %s", 224f3c0184aSmlarkin __progname, *data, virtio_reg_name(reg)); 225f3c0184aSmlarkin break; 226f3c0184aSmlarkin case VIRTIO_CONFIG_GUEST_FEATURES: 227f3c0184aSmlarkin viornd.cfg.guest_feature = *data; 228f3c0184aSmlarkin break; 2290bd10b9fSdv case VIRTIO_CONFIG_QUEUE_PFN: 2300bd10b9fSdv viornd.cfg.queue_pfn = *data; 231f3c0184aSmlarkin viornd_update_qa(); 232f3c0184aSmlarkin break; 233f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_SELECT: 234f3c0184aSmlarkin viornd.cfg.queue_select = *data; 235f3c0184aSmlarkin viornd_update_qs(); 236f3c0184aSmlarkin break; 237f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_NOTIFY: 238f3c0184aSmlarkin viornd.cfg.queue_notify = *data; 239f3c0184aSmlarkin if (viornd_notifyq()) 240f3c0184aSmlarkin *intr = 1; 241f3c0184aSmlarkin break; 242f3c0184aSmlarkin case VIRTIO_CONFIG_DEVICE_STATUS: 243f3c0184aSmlarkin viornd.cfg.device_status = *data; 244f3c0184aSmlarkin break; 245f3c0184aSmlarkin } 246f3c0184aSmlarkin } else { 247f3c0184aSmlarkin switch (reg) { 248f3c0184aSmlarkin case VIRTIO_CONFIG_DEVICE_FEATURES: 249f3c0184aSmlarkin *data = viornd.cfg.device_feature; 250f3c0184aSmlarkin break; 251f3c0184aSmlarkin case VIRTIO_CONFIG_GUEST_FEATURES: 252f3c0184aSmlarkin *data = viornd.cfg.guest_feature; 253f3c0184aSmlarkin break; 2540bd10b9fSdv case VIRTIO_CONFIG_QUEUE_PFN: 2550bd10b9fSdv *data = viornd.cfg.queue_pfn; 256f3c0184aSmlarkin break; 257f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_SIZE: 258f3c0184aSmlarkin *data = viornd.cfg.queue_size; 259f3c0184aSmlarkin break; 260f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_SELECT: 261f3c0184aSmlarkin *data = viornd.cfg.queue_select; 262f3c0184aSmlarkin break; 263f3c0184aSmlarkin case VIRTIO_CONFIG_QUEUE_NOTIFY: 264f3c0184aSmlarkin *data = viornd.cfg.queue_notify; 265f3c0184aSmlarkin break; 266f3c0184aSmlarkin case VIRTIO_CONFIG_DEVICE_STATUS: 267f3c0184aSmlarkin *data = viornd.cfg.device_status; 268f3c0184aSmlarkin break; 269f3c0184aSmlarkin case VIRTIO_CONFIG_ISR_STATUS: 270f3c0184aSmlarkin *data = viornd.cfg.isr_status; 27107520ef5Smlarkin viornd.cfg.isr_status = 0; 272c4fd4c5bSdv vcpu_deassert_irq(viornd.vm_id, 0, viornd.irq); 273f3c0184aSmlarkin break; 274f3c0184aSmlarkin } 275f3c0184aSmlarkin } 276f3c0184aSmlarkin return (0); 277f3c0184aSmlarkin } 278f3c0184aSmlarkin 279*7ccb23ddSdv /* 280*7ccb23ddSdv * vmmci_ctl 281*7ccb23ddSdv * 282*7ccb23ddSdv * Inject a command into the vmmci device, potentially delivering interrupt. 283*7ccb23ddSdv * 284*7ccb23ddSdv * Called by the vm process's event(3) loop. 285*7ccb23ddSdv */ 286f84d5d33Sreyk int 287f84d5d33Sreyk vmmci_ctl(unsigned int cmd) 288f84d5d33Sreyk { 289*7ccb23ddSdv int ret = 0; 2903320a88dSreyk struct timeval tv = { 0, 0 }; 2913320a88dSreyk 292*7ccb23ddSdv mutex_lock(&vmmci.mutex); 293*7ccb23ddSdv 294f84d5d33Sreyk if ((vmmci.cfg.device_status & 295*7ccb23ddSdv VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) { 296*7ccb23ddSdv ret = -1; 297*7ccb23ddSdv goto unlock; 298*7ccb23ddSdv } 299f84d5d33Sreyk 300f84d5d33Sreyk if (cmd == vmmci.cmd) 301*7ccb23ddSdv goto unlock; 302f84d5d33Sreyk 303f84d5d33Sreyk switch (cmd) { 304f84d5d33Sreyk case VMMCI_NONE: 3053320a88dSreyk break; 306f84d5d33Sreyk case VMMCI_SHUTDOWN: 307f84d5d33Sreyk case VMMCI_REBOOT: 308f84d5d33Sreyk /* Update command */ 309f84d5d33Sreyk vmmci.cmd = cmd; 310f84d5d33Sreyk 311a4ba3913Sreyk /* 312a4ba3913Sreyk * vmm VMs do not support powerdown, send a reboot request 313a4ba3913Sreyk * instead and turn it off after the triple fault. 314a4ba3913Sreyk */ 315a4ba3913Sreyk if (cmd == VMMCI_SHUTDOWN) 316a4ba3913Sreyk cmd = VMMCI_REBOOT; 317a4ba3913Sreyk 318f84d5d33Sreyk /* Trigger interrupt */ 319f84d5d33Sreyk vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 320c4fd4c5bSdv vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq); 3213320a88dSreyk 3223320a88dSreyk /* Add ACK timeout */ 323*7ccb23ddSdv tv.tv_sec = VMMCI_TIMEOUT_SHORT; 3243320a88dSreyk evtimer_add(&vmmci.timeout, &tv); 325f84d5d33Sreyk break; 326e82d5294Smlarkin case VMMCI_SYNCRTC: 327e82d5294Smlarkin if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) { 328e82d5294Smlarkin /* RTC updated, request guest VM resync of its RTC */ 329e82d5294Smlarkin vmmci.cmd = cmd; 330e82d5294Smlarkin 331e82d5294Smlarkin vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 332c4fd4c5bSdv vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq); 333e82d5294Smlarkin } else { 334e82d5294Smlarkin log_debug("%s: RTC sync skipped (guest does not " 335e82d5294Smlarkin "support RTC sync)\n", __func__); 336e82d5294Smlarkin } 337e82d5294Smlarkin break; 338f84d5d33Sreyk default: 339f84d5d33Sreyk fatalx("invalid vmmci command: %d", cmd); 340f84d5d33Sreyk } 341f84d5d33Sreyk 342*7ccb23ddSdv unlock: 343*7ccb23ddSdv mutex_unlock(&vmmci.mutex); 344*7ccb23ddSdv 345*7ccb23ddSdv return (ret); 346f84d5d33Sreyk } 347f84d5d33Sreyk 348*7ccb23ddSdv /* 349*7ccb23ddSdv * vmmci_ack 350*7ccb23ddSdv * 351*7ccb23ddSdv * Process a write to the command register. 352*7ccb23ddSdv * 353*7ccb23ddSdv * Called by the vcpu thread. Must be called with the mutex held. 354*7ccb23ddSdv */ 3553320a88dSreyk void 3563320a88dSreyk vmmci_ack(unsigned int cmd) 3573320a88dSreyk { 3583320a88dSreyk switch (cmd) { 3593320a88dSreyk case VMMCI_NONE: 3603320a88dSreyk break; 3613320a88dSreyk case VMMCI_SHUTDOWN: 3623320a88dSreyk /* 3633320a88dSreyk * The shutdown was requested by the VM if we don't have 3643320a88dSreyk * a pending shutdown request. In this case add a short 3653320a88dSreyk * timeout to give the VM a chance to reboot before the 3663320a88dSreyk * timer is expired. 3673320a88dSreyk */ 3683320a88dSreyk if (vmmci.cmd == 0) { 3693320a88dSreyk log_debug("%s: vm %u requested shutdown", __func__, 3703320a88dSreyk vmmci.vm_id); 371*7ccb23ddSdv vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_SHORT); 3723320a88dSreyk return; 3733320a88dSreyk } 3743320a88dSreyk /* FALLTHROUGH */ 3753320a88dSreyk case VMMCI_REBOOT: 3763320a88dSreyk /* 3773a50f0a9Sjmc * If the VM acknowledged our shutdown request, give it 3783320a88dSreyk * enough time to shutdown or reboot gracefully. This 3793320a88dSreyk * might take a considerable amount of time (running 3803320a88dSreyk * rc.shutdown on the VM), so increase the timeout before 3813320a88dSreyk * killing it forcefully. 3823320a88dSreyk */ 383*7ccb23ddSdv if (cmd == vmmci.cmd) { 3843320a88dSreyk log_debug("%s: vm %u acknowledged shutdown request", 3853320a88dSreyk __func__, vmmci.vm_id); 386*7ccb23ddSdv vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_LONG); 3873320a88dSreyk } 3883320a88dSreyk break; 389e82d5294Smlarkin case VMMCI_SYNCRTC: 390e82d5294Smlarkin log_debug("%s: vm %u acknowledged RTC sync request", 391e82d5294Smlarkin __func__, vmmci.vm_id); 392e82d5294Smlarkin vmmci.cmd = VMMCI_NONE; 393e82d5294Smlarkin break; 3943320a88dSreyk default: 3953320a88dSreyk log_warnx("%s: illegal request %u", __func__, cmd); 3963320a88dSreyk break; 3973320a88dSreyk } 3983320a88dSreyk } 3993320a88dSreyk 4003320a88dSreyk void 4013320a88dSreyk vmmci_timeout(int fd, short type, void *arg) 4023320a88dSreyk { 4033320a88dSreyk log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); 4043320a88dSreyk vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); 4053320a88dSreyk } 4063320a88dSreyk 407f84d5d33Sreyk int 408f84d5d33Sreyk vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 409eef1411cSmlarkin void *unused, uint8_t sz) 410f84d5d33Sreyk { 411f84d5d33Sreyk *intr = 0xFF; 412f84d5d33Sreyk 413*7ccb23ddSdv mutex_lock(&vmmci.mutex); 414f84d5d33Sreyk if (dir == 0) { 415f84d5d33Sreyk switch (reg) { 416f84d5d33Sreyk case VIRTIO_CONFIG_DEVICE_FEATURES: 417f84d5d33Sreyk case VIRTIO_CONFIG_QUEUE_SIZE: 418f84d5d33Sreyk case VIRTIO_CONFIG_ISR_STATUS: 419f84d5d33Sreyk log_warnx("%s: illegal write %x to %s", 420f84d5d33Sreyk __progname, *data, virtio_reg_name(reg)); 421f84d5d33Sreyk break; 422f84d5d33Sreyk case VIRTIO_CONFIG_GUEST_FEATURES: 423f84d5d33Sreyk vmmci.cfg.guest_feature = *data; 424f84d5d33Sreyk break; 4250bd10b9fSdv case VIRTIO_CONFIG_QUEUE_PFN: 4260bd10b9fSdv vmmci.cfg.queue_pfn = *data; 427f84d5d33Sreyk break; 428f84d5d33Sreyk case VIRTIO_CONFIG_QUEUE_SELECT: 429f84d5d33Sreyk vmmci.cfg.queue_select = *data; 430f84d5d33Sreyk break; 431f84d5d33Sreyk case VIRTIO_CONFIG_QUEUE_NOTIFY: 432f84d5d33Sreyk vmmci.cfg.queue_notify = *data; 433f84d5d33Sreyk break; 434f84d5d33Sreyk case VIRTIO_CONFIG_DEVICE_STATUS: 435f84d5d33Sreyk vmmci.cfg.device_status = *data; 436f84d5d33Sreyk break; 4373320a88dSreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 4383320a88dSreyk vmmci_ack(*data); 4393320a88dSreyk break; 440f84d5d33Sreyk } 441f84d5d33Sreyk } else { 442f84d5d33Sreyk switch (reg) { 443f84d5d33Sreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 444f84d5d33Sreyk *data = vmmci.cmd; 445f84d5d33Sreyk break; 446981cad08Sreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 447981cad08Sreyk /* Update time once when reading the first register */ 448981cad08Sreyk gettimeofday(&vmmci.time, NULL); 449981cad08Sreyk *data = (uint64_t)vmmci.time.tv_sec; 450981cad08Sreyk break; 451981cad08Sreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 452981cad08Sreyk *data = (uint64_t)vmmci.time.tv_sec << 32; 453981cad08Sreyk break; 454981cad08Sreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: 455981cad08Sreyk *data = (uint64_t)vmmci.time.tv_usec; 456981cad08Sreyk break; 457981cad08Sreyk case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: 458981cad08Sreyk *data = (uint64_t)vmmci.time.tv_usec << 32; 459981cad08Sreyk break; 460f84d5d33Sreyk case VIRTIO_CONFIG_DEVICE_FEATURES: 461f84d5d33Sreyk *data = vmmci.cfg.device_feature; 462f84d5d33Sreyk break; 463f84d5d33Sreyk case VIRTIO_CONFIG_GUEST_FEATURES: 464f84d5d33Sreyk *data = vmmci.cfg.guest_feature; 465f84d5d33Sreyk break; 4660bd10b9fSdv case VIRTIO_CONFIG_QUEUE_PFN: 4670bd10b9fSdv *data = vmmci.cfg.queue_pfn; 468f84d5d33Sreyk break; 469f84d5d33Sreyk case VIRTIO_CONFIG_QUEUE_SIZE: 470f84d5d33Sreyk *data = vmmci.cfg.queue_size; 471f84d5d33Sreyk break; 472f84d5d33Sreyk case VIRTIO_CONFIG_QUEUE_SELECT: 473f84d5d33Sreyk *data = vmmci.cfg.queue_select; 474f84d5d33Sreyk break; 475f84d5d33Sreyk case VIRTIO_CONFIG_QUEUE_NOTIFY: 476f84d5d33Sreyk *data = vmmci.cfg.queue_notify; 477f84d5d33Sreyk break; 478f84d5d33Sreyk case VIRTIO_CONFIG_DEVICE_STATUS: 479f84d5d33Sreyk *data = vmmci.cfg.device_status; 480f84d5d33Sreyk break; 481f84d5d33Sreyk case VIRTIO_CONFIG_ISR_STATUS: 482f84d5d33Sreyk *data = vmmci.cfg.isr_status; 48307520ef5Smlarkin vmmci.cfg.isr_status = 0; 484c4fd4c5bSdv vcpu_deassert_irq(vmmci.vm_id, 0, vmmci.irq); 485f84d5d33Sreyk break; 486f84d5d33Sreyk } 487f84d5d33Sreyk } 488*7ccb23ddSdv mutex_unlock(&vmmci.mutex); 489*7ccb23ddSdv 490f84d5d33Sreyk return (0); 491f84d5d33Sreyk } 492f84d5d33Sreyk 49373613953Sreyk int 4944d2a1fb2Sreyk virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath) 49573613953Sreyk { 49673613953Sreyk switch (type) { 49773613953Sreyk case VMDF_RAW: 49873613953Sreyk return 0; 49973613953Sreyk case VMDF_QCOW2: 5004d2a1fb2Sreyk return virtio_qcow2_get_base(fd, path, npath, dpath); 50173613953Sreyk } 50273613953Sreyk log_warnx("%s: invalid disk format", __func__); 50373613953Sreyk return -1; 50473613953Sreyk } 50573613953Sreyk 506*7ccb23ddSdv static void 507*7ccb23ddSdv vmmci_pipe_dispatch(int fd, short event, void *arg) 508*7ccb23ddSdv { 509*7ccb23ddSdv enum pipe_msg_type msg; 510*7ccb23ddSdv struct timeval tv = { 0, 0 }; 511*7ccb23ddSdv 512*7ccb23ddSdv msg = vm_pipe_recv(&vmmci.dev_pipe); 513*7ccb23ddSdv switch (msg) { 514*7ccb23ddSdv case VMMCI_SET_TIMEOUT_SHORT: 515*7ccb23ddSdv tv.tv_sec = VMMCI_TIMEOUT_SHORT; 516*7ccb23ddSdv evtimer_add(&vmmci.timeout, &tv); 517*7ccb23ddSdv break; 518*7ccb23ddSdv case VMMCI_SET_TIMEOUT_LONG: 519*7ccb23ddSdv tv.tv_sec = VMMCI_TIMEOUT_LONG; 520*7ccb23ddSdv evtimer_add(&vmmci.timeout, &tv); 521*7ccb23ddSdv break; 522*7ccb23ddSdv default: 523*7ccb23ddSdv log_warnx("%s: invalid pipe message type %d", __func__, msg); 524*7ccb23ddSdv } 525*7ccb23ddSdv } 526*7ccb23ddSdv 527f3c0184aSmlarkin void 52873613953Sreyk virtio_init(struct vmd_vm *vm, int child_cdrom, 52973613953Sreyk int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 530f3c0184aSmlarkin { 531470adcf5Sreyk struct vmop_create_params *vmc = &vm->vm_params; 5322b2a5f0dSreyk struct vm_create_params *vcp = &vmc->vmc_params; 5333481ecdfSdv struct virtio_dev *dev; 534f3c0184aSmlarkin uint8_t id; 5353481ecdfSdv uint8_t i, j; 536*7ccb23ddSdv int ret = 0; 537f3c0184aSmlarkin 538f3c0184aSmlarkin /* Virtio entropy device */ 539f3c0184aSmlarkin if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 540f3c0184aSmlarkin PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, 541f3c0184aSmlarkin PCI_SUBCLASS_SYSTEM_MISC, 542f3c0184aSmlarkin PCI_VENDOR_OPENBSD, 543f3c0184aSmlarkin PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { 5447da934edSreyk log_warnx("%s: can't add PCI virtio rng device", 545f3c0184aSmlarkin __progname); 546f3c0184aSmlarkin return; 547f3c0184aSmlarkin } 548f3c0184aSmlarkin 549f3c0184aSmlarkin if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { 5507da934edSreyk log_warnx("%s: can't add bar for virtio rng device", 551f3c0184aSmlarkin __progname); 552f3c0184aSmlarkin return; 553f3c0184aSmlarkin } 554f3c0184aSmlarkin 555cb0fa87cSmlarkin memset(&viornd, 0, sizeof(viornd)); 556f3c0184aSmlarkin viornd.vq[0].qs = VIORND_QUEUE_SIZE; 557f3c0184aSmlarkin viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * 558f3c0184aSmlarkin VIORND_QUEUE_SIZE; 559f3c0184aSmlarkin viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 560f3c0184aSmlarkin sizeof(struct vring_desc) * VIORND_QUEUE_SIZE 561f3c0184aSmlarkin + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); 562813e3047Spd viornd.pci_id = id; 5637b7c4b51Smlarkin viornd.irq = pci_get_dev_irq(id); 5647b36e131Smlarkin viornd.vm_id = vcp->vcp_id; 565f3c0184aSmlarkin 5663481ecdfSdv SLIST_INIT(&virtio_devs); 5673481ecdfSdv 56873a98491Sdv if (vmc->vmc_nnics > 0) { 5693481ecdfSdv for (i = 0; i < vmc->vmc_nnics; i++) { 5703481ecdfSdv dev = calloc(1, sizeof(struct virtio_dev)); 5713481ecdfSdv if (dev == NULL) { 5723481ecdfSdv log_warn("%s: calloc failure allocating vionet", 573f3c0184aSmlarkin __progname); 574f3c0184aSmlarkin return; 575f3c0184aSmlarkin } 576f3c0184aSmlarkin /* Virtio network */ 5773481ecdfSdv dev->dev_type = VMD_DEVTYPE_NET; 5783481ecdfSdv 579f3c0184aSmlarkin if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 580f3c0184aSmlarkin PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, 5813481ecdfSdv PCI_SUBCLASS_SYSTEM_MISC, PCI_VENDOR_OPENBSD, 582f3c0184aSmlarkin PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { 5837da934edSreyk log_warnx("%s: can't add PCI virtio net device", 584f3c0184aSmlarkin __progname); 585f3c0184aSmlarkin return; 586f3c0184aSmlarkin } 5873481ecdfSdv dev->pci_id = id; 5883481ecdfSdv dev->sync_fd = -1; 5893481ecdfSdv dev->async_fd = -1; 5903481ecdfSdv dev->vm_id = vcp->vcp_id; 5913481ecdfSdv dev->vm_vmid = vm->vm_vmid; 5923481ecdfSdv dev->irq = pci_get_dev_irq(id); 593f3c0184aSmlarkin 5943481ecdfSdv /* The vionet pci bar function is called by the vcpu. */ 5953481ecdfSdv if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io, 5963481ecdfSdv dev)) { 5977da934edSreyk log_warnx("%s: can't add bar for virtio net " 5987da934edSreyk "device", __progname); 599f3c0184aSmlarkin return; 600f3c0184aSmlarkin } 601f3c0184aSmlarkin 6023481ecdfSdv dev->vionet.vq[RXQ].qs = VIONET_QUEUE_SIZE; 6033481ecdfSdv dev->vionet.vq[RXQ].vq_availoffset = 6042fee2047Smlarkin sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 6053481ecdfSdv dev->vionet.vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 606f3c0184aSmlarkin sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 607f3c0184aSmlarkin + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 6083481ecdfSdv dev->vionet.vq[RXQ].last_avail = 0; 6093481ecdfSdv dev->vionet.vq[RXQ].notified_avail = 0; 6106c31e103Sdv 6113481ecdfSdv dev->vionet.vq[TXQ].qs = VIONET_QUEUE_SIZE; 6123481ecdfSdv dev->vionet.vq[TXQ].vq_availoffset = 6132fee2047Smlarkin sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 6143481ecdfSdv dev->vionet.vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 615f3c0184aSmlarkin sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 616f3c0184aSmlarkin + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 6173481ecdfSdv dev->vionet.vq[TXQ].last_avail = 0; 6183481ecdfSdv dev->vionet.vq[TXQ].notified_avail = 0; 619c396b7b1Sstefan 6203481ecdfSdv dev->vionet.data_fd = child_taps[i]; 621f3c0184aSmlarkin 6223a0c3a66Sreyk /* MAC address has been assigned by the parent */ 6233481ecdfSdv memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6); 6243481ecdfSdv dev->vionet.cfg.device_feature = VIRTIO_NET_F_MAC; 62547ccdcefSreyk 6263481ecdfSdv dev->vionet.lockedmac = 6272b2a5f0dSreyk vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; 6283481ecdfSdv dev->vionet.local = 629470adcf5Sreyk vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0; 630cc104512Sclaudio if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET) 6313481ecdfSdv dev->vionet.pxeboot = 1; 6322272e586Sdv memcpy(&dev->vionet.local_prefix, 6332272e586Sdv &env->vmd_cfg.cfg_localprefix, 6342272e586Sdv sizeof(dev->vionet.local_prefix)); 635cc104512Sclaudio log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s", 6362fca0731Sreyk __func__, vcp->vcp_name, i, 6373481ecdfSdv ether_ntoa((void *)dev->vionet.mac), 6383481ecdfSdv dev->vionet.lockedmac ? ", locked" : "", 6393481ecdfSdv dev->vionet.local ? ", local" : "", 6403481ecdfSdv dev->vionet.pxeboot ? ", pxeboot" : ""); 6413481ecdfSdv 6423481ecdfSdv /* Add the vionet to our device list. */ 6433481ecdfSdv dev->vionet.idx = i; 6443481ecdfSdv SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next); 645f3c0184aSmlarkin } 646f3c0184aSmlarkin } 647f84d5d33Sreyk 64873a98491Sdv if (vmc->vmc_ndisks > 0) { 6493481ecdfSdv for (i = 0; i < vmc->vmc_ndisks; i++) { 6503481ecdfSdv dev = calloc(1, sizeof(struct virtio_dev)); 6513481ecdfSdv if (dev == NULL) { 6523481ecdfSdv log_warn("%s: calloc failure allocating vioblk", 65393622bc9Smlarkin __progname); 65493622bc9Smlarkin return; 65593622bc9Smlarkin } 65693622bc9Smlarkin 6573481ecdfSdv /* One vioblk device for each disk defined in vcp */ 6583481ecdfSdv dev->dev_type = VMD_DEVTYPE_DISK; 6593481ecdfSdv 66093622bc9Smlarkin if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 66193622bc9Smlarkin PCI_PRODUCT_QUMRANET_VIO_BLOCK, 66293622bc9Smlarkin PCI_CLASS_MASS_STORAGE, 66393622bc9Smlarkin PCI_SUBCLASS_MASS_STORAGE_SCSI, 66493622bc9Smlarkin PCI_VENDOR_OPENBSD, 66593622bc9Smlarkin PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { 66693622bc9Smlarkin log_warnx("%s: can't add PCI virtio block " 66793622bc9Smlarkin "device", __progname); 66893622bc9Smlarkin return; 66993622bc9Smlarkin } 6703481ecdfSdv dev->pci_id = id; 6713481ecdfSdv dev->sync_fd = -1; 6723481ecdfSdv dev->async_fd = -1; 6733481ecdfSdv dev->vm_id = vcp->vcp_id; 6743481ecdfSdv dev->vm_vmid = vm->vm_vmid; 6753481ecdfSdv dev->irq = pci_get_dev_irq(id); 6763481ecdfSdv 6773481ecdfSdv if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io, 6783481ecdfSdv &dev->vioblk)) { 67993622bc9Smlarkin log_warnx("%s: can't add bar for virtio block " 68093622bc9Smlarkin "device", __progname); 68193622bc9Smlarkin return; 68293622bc9Smlarkin } 6833481ecdfSdv dev->vioblk.vq[0].qs = VIOBLK_QUEUE_SIZE; 6843481ecdfSdv dev->vioblk.vq[0].vq_availoffset = 68593622bc9Smlarkin sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; 6863481ecdfSdv dev->vioblk.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 68793622bc9Smlarkin sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE 68893622bc9Smlarkin + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); 6893481ecdfSdv dev->vioblk.vq[0].last_avail = 0; 6903481ecdfSdv dev->vioblk.cfg.device_feature = 69120e554f8Sdv VIRTIO_BLK_F_SEG_MAX; 69220e554f8Sdv dev->vioblk.seg_max = VIOBLK_SEG_MAX; 6933481ecdfSdv 6943481ecdfSdv /* 6953481ecdfSdv * Initialize disk fds to an invalid fd (-1), then 6963481ecdfSdv * set any child disk fds. 6973481ecdfSdv */ 6983481ecdfSdv memset(&dev->vioblk.disk_fd, -1, 6993481ecdfSdv sizeof(dev->vioblk.disk_fd)); 7003481ecdfSdv dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i]; 7013481ecdfSdv for (j = 0; j < dev->vioblk.ndisk_fd; j++) 7023481ecdfSdv dev->vioblk.disk_fd[j] = child_disks[i][j]; 7033481ecdfSdv 7043481ecdfSdv dev->vioblk.idx = i; 7053481ecdfSdv SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next); 70693622bc9Smlarkin } 70793622bc9Smlarkin } 7083481ecdfSdv 7093481ecdfSdv /* 7103481ecdfSdv * Launch virtio devices that support subprocess execution. 7113481ecdfSdv */ 7123481ecdfSdv SLIST_FOREACH(dev, &virtio_devs, dev_next) { 7133481ecdfSdv if (virtio_dev_launch(vm, dev) != 0) 7143481ecdfSdv fatalx("failed to launch virtio device"); 71593622bc9Smlarkin } 71693622bc9Smlarkin 71795ab188fSccardenas /* vioscsi cdrom */ 71873a98491Sdv if (strlen(vmc->vmc_cdrom)) { 71995ab188fSccardenas vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 72095ab188fSccardenas if (vioscsi == NULL) { 72195ab188fSccardenas log_warn("%s: calloc failure allocating vioscsi", 72295ab188fSccardenas __progname); 72395ab188fSccardenas return; 72495ab188fSccardenas } 72595ab188fSccardenas 72695ab188fSccardenas if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 72795ab188fSccardenas PCI_PRODUCT_QUMRANET_VIO_SCSI, 72895ab188fSccardenas PCI_CLASS_MASS_STORAGE, 72995ab188fSccardenas PCI_SUBCLASS_MASS_STORAGE_SCSI, 73095ab188fSccardenas PCI_VENDOR_OPENBSD, 73195ab188fSccardenas PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) { 73295ab188fSccardenas log_warnx("%s: can't add PCI vioscsi device", 73395ab188fSccardenas __progname); 73495ab188fSccardenas return; 73595ab188fSccardenas } 73695ab188fSccardenas 73795ab188fSccardenas if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) { 73895ab188fSccardenas log_warnx("%s: can't add bar for vioscsi device", 73995ab188fSccardenas __progname); 74095ab188fSccardenas return; 74195ab188fSccardenas } 74295ab188fSccardenas 74395ab188fSccardenas for (i = 0; i < VIRTIO_MAX_QUEUES; i++) { 74495ab188fSccardenas vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE; 74595ab188fSccardenas vioscsi->vq[i].vq_availoffset = 74695ab188fSccardenas sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE; 74795ab188fSccardenas vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN( 74895ab188fSccardenas sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE 74995ab188fSccardenas + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE)); 75095ab188fSccardenas vioscsi->vq[i].last_avail = 0; 75195ab188fSccardenas } 7523481ecdfSdv if (virtio_raw_init(&vioscsi->file, &vioscsi->sz, &child_cdrom, 7533481ecdfSdv 1) == -1) { 7540e71c2c7Sccardenas log_warnx("%s: unable to determine iso format", 7550e71c2c7Sccardenas __func__); 7569617633bSccardenas return; 7572919bad8Sccardenas } 75895ab188fSccardenas vioscsi->locked = 0; 75995ab188fSccardenas vioscsi->lba = 0; 7602495c0aeSdv vioscsi->n_blocks = vioscsi->sz / VIOSCSI_BLOCK_SIZE_CDROM; 76195ab188fSccardenas vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM; 76295ab188fSccardenas vioscsi->pci_id = id; 7637b36e131Smlarkin vioscsi->vm_id = vcp->vcp_id; 7647b7c4b51Smlarkin vioscsi->irq = pci_get_dev_irq(id); 76595ab188fSccardenas } 76695ab188fSccardenas 767f84d5d33Sreyk /* virtio control device */ 768f84d5d33Sreyk if (pci_add_device(&id, PCI_VENDOR_OPENBSD, 769f84d5d33Sreyk PCI_PRODUCT_OPENBSD_CONTROL, 770f84d5d33Sreyk PCI_CLASS_COMMUNICATIONS, 771f84d5d33Sreyk PCI_SUBCLASS_COMMUNICATIONS_MISC, 772f84d5d33Sreyk PCI_VENDOR_OPENBSD, 773f84d5d33Sreyk PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) { 774f84d5d33Sreyk log_warnx("%s: can't add PCI vmm control device", 775f84d5d33Sreyk __progname); 776f84d5d33Sreyk return; 777f84d5d33Sreyk } 778f84d5d33Sreyk 779f84d5d33Sreyk if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) { 780f84d5d33Sreyk log_warnx("%s: can't add bar for vmm control device", 781f84d5d33Sreyk __progname); 782f84d5d33Sreyk return; 783f84d5d33Sreyk } 784f84d5d33Sreyk 7853320a88dSreyk memset(&vmmci, 0, sizeof(vmmci)); 786e82d5294Smlarkin vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK | 787e82d5294Smlarkin VMMCI_F_SYNCRTC; 788f84d5d33Sreyk vmmci.vm_id = vcp->vcp_id; 789f84d5d33Sreyk vmmci.irq = pci_get_dev_irq(id); 790813e3047Spd vmmci.pci_id = id; 791*7ccb23ddSdv ret = pthread_mutex_init(&vmmci.mutex, NULL); 792*7ccb23ddSdv if (ret) { 793*7ccb23ddSdv errno = ret; 794*7ccb23ddSdv fatal("could not initialize vmmci mutex"); 795*7ccb23ddSdv } 7963320a88dSreyk 7973320a88dSreyk evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 798*7ccb23ddSdv vm_pipe_init(&vmmci.dev_pipe, vmmci_pipe_dispatch); 799*7ccb23ddSdv event_add(&vmmci.dev_pipe.read_ev, NULL); 8002fee2047Smlarkin } 801149417b6Sreyk 80297f33f1dSdv /* 80397f33f1dSdv * vionet_set_hostmac 80497f33f1dSdv * 80597f33f1dSdv * Sets the hardware address for the host-side tap(4) on a vionet_dev. 80697f33f1dSdv * 80797f33f1dSdv * This should only be called from the event-loop thread 80897f33f1dSdv * 80997f33f1dSdv * vm: pointer to the current vmd_vm instance 81097f33f1dSdv * idx: index into the array of vionet_dev's for the target vionet_dev 81197f33f1dSdv * addr: ethernet address to set 81297f33f1dSdv */ 81397f33f1dSdv void 81497f33f1dSdv vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr) 81597f33f1dSdv { 81697f33f1dSdv struct vmop_create_params *vmc = &vm->vm_params; 8173481ecdfSdv struct virtio_dev *dev; 8183481ecdfSdv struct vionet_dev *vionet = NULL; 8193481ecdfSdv int ret; 82097f33f1dSdv 82173a98491Sdv if (idx > vmc->vmc_nnics) 8223481ecdfSdv fatalx("%s: invalid vionet index: %u", __func__, idx); 82397f33f1dSdv 8243481ecdfSdv SLIST_FOREACH(dev, &virtio_devs, dev_next) { 8253481ecdfSdv if (dev->dev_type == VMD_DEVTYPE_NET 8263481ecdfSdv && dev->vionet.idx == idx) { 8273481ecdfSdv vionet = &dev->vionet; 8283481ecdfSdv break; 8293481ecdfSdv } 8303481ecdfSdv } 8313481ecdfSdv if (vionet == NULL) 8323481ecdfSdv fatalx("%s: dev == NULL, idx = %u", __func__, idx); 8333481ecdfSdv 8343481ecdfSdv /* Set the local vm process copy. */ 8353481ecdfSdv memcpy(vionet->hostmac, addr, sizeof(vionet->hostmac)); 8363481ecdfSdv 8373481ecdfSdv /* Send the information to the device process. */ 8383481ecdfSdv ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_HOSTMAC, 0, 0, -1, 8393481ecdfSdv vionet->hostmac, sizeof(vionet->hostmac)); 8403481ecdfSdv if (ret == -1) { 8413481ecdfSdv log_warnx("%s: failed to queue hostmac to vionet dev %u", 8423481ecdfSdv __func__, idx); 8433481ecdfSdv return; 8443481ecdfSdv } 84597f33f1dSdv } 84697f33f1dSdv 84750bebf2cSccardenas void 84850bebf2cSccardenas virtio_shutdown(struct vmd_vm *vm) 84950bebf2cSccardenas { 8503481ecdfSdv int ret, status; 8513481ecdfSdv pid_t pid = 0; 8523481ecdfSdv struct virtio_dev *dev, *tmp; 8533481ecdfSdv struct viodev_msg msg; 8543481ecdfSdv struct imsgbuf *ibuf; 85550bebf2cSccardenas 8563481ecdfSdv /* Ensure that our disks are synced. */ 857a95a03dbSccardenas if (vioscsi != NULL) 858f6c09be3Sreyk vioscsi->file.close(vioscsi->file.p, 0); 859a95a03dbSccardenas 8603481ecdfSdv /* 8613481ecdfSdv * Broadcast shutdown to child devices. We need to do this 8623481ecdfSdv * synchronously as we have already stopped the async event thread. 8633481ecdfSdv */ 8643481ecdfSdv SLIST_FOREACH(dev, &virtio_devs, dev_next) { 8653481ecdfSdv memset(&msg, 0, sizeof(msg)); 8663481ecdfSdv msg.type = VIODEV_MSG_SHUTDOWN; 8673481ecdfSdv ibuf = &dev->sync_iev.ibuf; 8683481ecdfSdv ret = imsg_compose(ibuf, VIODEV_MSG_SHUTDOWN, 0, 0, -1, 8693481ecdfSdv &msg, sizeof(msg)); 8703481ecdfSdv if (ret == -1) 8713481ecdfSdv fatalx("%s: failed to send shutdown to device", 8723481ecdfSdv __func__); 873dd7efffeSclaudio if (imsgbuf_flush(ibuf) == -1) 874dd7efffeSclaudio fatalx("%s: imsgbuf_flush", __func__); 8753481ecdfSdv } 8763481ecdfSdv 8773481ecdfSdv /* 8783481ecdfSdv * Wait for all children to shutdown using a simple approach of 8793481ecdfSdv * iterating over known child devices and waiting for them to die. 8803481ecdfSdv */ 8813481ecdfSdv SLIST_FOREACH_SAFE(dev, &virtio_devs, dev_next, tmp) { 8823481ecdfSdv log_debug("%s: waiting on device pid %d", __func__, 8833481ecdfSdv dev->dev_pid); 8843481ecdfSdv do { 8853481ecdfSdv pid = waitpid(dev->dev_pid, &status, WNOHANG); 8863481ecdfSdv } while (pid == 0 || (pid == -1 && errno == EINTR)); 8873481ecdfSdv if (pid == dev->dev_pid) 8883481ecdfSdv log_debug("%s: device for pid %d is stopped", 8893481ecdfSdv __func__, pid); 8903481ecdfSdv else 8913481ecdfSdv log_warnx("%s: unexpected pid %d", __func__, pid); 8923481ecdfSdv free(dev); 8933481ecdfSdv } 89450bebf2cSccardenas } 89550bebf2cSccardenas 896149417b6Sreyk int 897149417b6Sreyk vmmci_restore(int fd, uint32_t vm_id) 898149417b6Sreyk { 899149417b6Sreyk log_debug("%s: receiving vmmci", __func__); 900149417b6Sreyk if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 901149417b6Sreyk log_warnx("%s: error reading vmmci from fd", __func__); 902149417b6Sreyk return (-1); 903149417b6Sreyk } 904149417b6Sreyk 905813e3047Spd if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) { 906813e3047Spd log_warnx("%s: can't set bar fn for vmm control device", 907149417b6Sreyk __progname); 908149417b6Sreyk return (-1); 909149417b6Sreyk } 910149417b6Sreyk vmmci.vm_id = vm_id; 911353e9815Spd vmmci.irq = pci_get_dev_irq(vmmci.pci_id); 912149417b6Sreyk memset(&vmmci.timeout, 0, sizeof(struct event)); 913149417b6Sreyk evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 914149417b6Sreyk return (0); 915149417b6Sreyk } 916149417b6Sreyk 917149417b6Sreyk int 91873a98491Sdv viornd_restore(int fd, struct vmd_vm *vm) 919149417b6Sreyk { 92068534885Sdv void *hva = NULL; 92168534885Sdv 922149417b6Sreyk log_debug("%s: receiving viornd", __func__); 923149417b6Sreyk if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 924149417b6Sreyk log_warnx("%s: error reading viornd from fd", __func__); 925149417b6Sreyk return (-1); 926149417b6Sreyk } 927813e3047Spd if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) { 928813e3047Spd log_warnx("%s: can't set bar fn for virtio rng device", 929149417b6Sreyk __progname); 930149417b6Sreyk return (-1); 931149417b6Sreyk } 93273a98491Sdv viornd.vm_id = vm->vm_params.vmc_params.vcp_id; 933353e9815Spd viornd.irq = pci_get_dev_irq(viornd.pci_id); 9347b7c4b51Smlarkin 93568534885Sdv hva = hvaddr_mem(viornd.vq[0].q_gpa, vring_size(VIORND_QUEUE_SIZE)); 93668534885Sdv if (hva == NULL) 93768534885Sdv fatal("failed to restore viornd virtqueue"); 93868534885Sdv viornd.vq[0].q_hva = hva; 93968534885Sdv 940149417b6Sreyk return (0); 941149417b6Sreyk } 942149417b6Sreyk 943149417b6Sreyk int 944149417b6Sreyk vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) 945149417b6Sreyk { 946149417b6Sreyk struct vmop_create_params *vmc = &vm->vm_params; 947149417b6Sreyk struct vm_create_params *vcp = &vmc->vmc_params; 9483481ecdfSdv struct virtio_dev *dev; 949813e3047Spd uint8_t i; 950149417b6Sreyk 9513481ecdfSdv if (vmc->vmc_nnics == 0) 9523481ecdfSdv return (0); 9533481ecdfSdv 9543481ecdfSdv for (i = 0; i < vmc->vmc_nnics; i++) { 9553481ecdfSdv dev = calloc(1, sizeof(struct virtio_dev)); 9563481ecdfSdv if (dev == NULL) { 9573481ecdfSdv log_warn("%s: calloc failure allocating vionet", 958149417b6Sreyk __progname); 959149417b6Sreyk return (-1); 960149417b6Sreyk } 9613481ecdfSdv 9623481ecdfSdv log_debug("%s: receiving virtio network device", __func__); 9633481ecdfSdv if (atomicio(read, fd, dev, sizeof(struct virtio_dev)) 9643481ecdfSdv != sizeof(struct virtio_dev)) { 965149417b6Sreyk log_warnx("%s: error reading vionet from fd", 966149417b6Sreyk __func__); 967149417b6Sreyk return (-1); 968149417b6Sreyk } 969149417b6Sreyk 970149417b6Sreyk /* Virtio network */ 9713481ecdfSdv if (dev->dev_type != VMD_DEVTYPE_NET) { 9723481ecdfSdv log_warnx("%s: invalid device type", __func__); 9733481ecdfSdv return (-1); 9743481ecdfSdv } 9753481ecdfSdv 9763481ecdfSdv dev->sync_fd = -1; 9773481ecdfSdv dev->async_fd = -1; 9783481ecdfSdv dev->vm_id = vcp->vcp_id; 9793481ecdfSdv dev->vm_vmid = vm->vm_vmid; 9803481ecdfSdv dev->irq = pci_get_dev_irq(dev->pci_id); 9813481ecdfSdv 9823481ecdfSdv if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) { 983813e3047Spd log_warnx("%s: can't set bar fn for virtio net " 984149417b6Sreyk "device", __progname); 985149417b6Sreyk return (-1); 986149417b6Sreyk } 987149417b6Sreyk 9883481ecdfSdv dev->vionet.data_fd = child_taps[i]; 9893481ecdfSdv dev->vionet.idx = i; 990149417b6Sreyk 9913481ecdfSdv SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next); 992149417b6Sreyk } 993149417b6Sreyk 994149417b6Sreyk return (0); 995149417b6Sreyk } 996149417b6Sreyk 997149417b6Sreyk int 99873a98491Sdv vioblk_restore(int fd, struct vmd_vm *vm, 99973613953Sreyk int child_disks[][VM_MAX_BASE_PER_DISK]) 1000149417b6Sreyk { 10013481ecdfSdv struct vmop_create_params *vmc = &vm->vm_params; 10023481ecdfSdv struct virtio_dev *dev; 10033481ecdfSdv uint8_t i, j; 1004149417b6Sreyk 10053481ecdfSdv if (vmc->vmc_ndisks == 0) 10063481ecdfSdv return (0); 10073481ecdfSdv 10083481ecdfSdv for (i = 0; i < vmc->vmc_ndisks; i++) { 10093481ecdfSdv dev = calloc(1, sizeof(struct virtio_dev)); 10103481ecdfSdv if (dev == NULL) { 10113481ecdfSdv log_warn("%s: calloc failure allocating vioblks", 10123481ecdfSdv __progname); 1013149417b6Sreyk return (-1); 1014149417b6Sreyk } 10153481ecdfSdv 1016149417b6Sreyk log_debug("%s: receiving vioblk", __func__); 10173481ecdfSdv if (atomicio(read, fd, dev, sizeof(struct virtio_dev)) 10183481ecdfSdv != sizeof(struct virtio_dev)) { 1019149417b6Sreyk log_warnx("%s: error reading vioblk from fd", __func__); 1020149417b6Sreyk return (-1); 1021149417b6Sreyk } 10223481ecdfSdv if (dev->dev_type != VMD_DEVTYPE_DISK) { 10233481ecdfSdv log_warnx("%s: invalid device type", __func__); 10243481ecdfSdv return (-1); 10253481ecdfSdv } 10263481ecdfSdv 10273481ecdfSdv dev->sync_fd = -1; 10283481ecdfSdv dev->async_fd = -1; 10293481ecdfSdv 10303481ecdfSdv if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) { 1031813e3047Spd log_warnx("%s: can't set bar fn for virtio block " 1032149417b6Sreyk "device", __progname); 1033149417b6Sreyk return (-1); 1034149417b6Sreyk } 10353481ecdfSdv dev->vm_id = vmc->vmc_params.vcp_id; 10363481ecdfSdv dev->irq = pci_get_dev_irq(dev->pci_id); 103768534885Sdv 10383481ecdfSdv memset(&dev->vioblk.disk_fd, -1, sizeof(dev->vioblk.disk_fd)); 10393481ecdfSdv dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i]; 10403481ecdfSdv for (j = 0; j < dev->vioblk.ndisk_fd; j++) 10413481ecdfSdv dev->vioblk.disk_fd[j] = child_disks[i][j]; 10423481ecdfSdv 10433481ecdfSdv dev->vioblk.idx = i; 10443481ecdfSdv SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next); 1045149417b6Sreyk } 1046149417b6Sreyk return (0); 1047149417b6Sreyk } 1048149417b6Sreyk 1049149417b6Sreyk int 105073a98491Sdv vioscsi_restore(int fd, struct vmd_vm *vm, int child_cdrom) 105195ab188fSccardenas { 105268534885Sdv void *hva = NULL; 105368534885Sdv unsigned int i; 105468534885Sdv 105573a98491Sdv if (!strlen(vm->vm_params.vmc_cdrom)) 105684dba573Spd return (0); 105784dba573Spd 105895ab188fSccardenas vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 105995ab188fSccardenas if (vioscsi == NULL) { 106095ab188fSccardenas log_warn("%s: calloc failure allocating vioscsi", __progname); 106195ab188fSccardenas return (-1); 106295ab188fSccardenas } 106395ab188fSccardenas 106495ab188fSccardenas log_debug("%s: receiving vioscsi", __func__); 106595ab188fSccardenas 106695ab188fSccardenas if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) != 106795ab188fSccardenas sizeof(struct vioscsi_dev)) { 106895ab188fSccardenas log_warnx("%s: error reading vioscsi from fd", __func__); 106995ab188fSccardenas return (-1); 107095ab188fSccardenas } 107195ab188fSccardenas 107284dba573Spd if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) { 107395ab188fSccardenas log_warnx("%s: can't set bar fn for vmm control device", 107495ab188fSccardenas __progname); 107595ab188fSccardenas return (-1); 107695ab188fSccardenas } 107795ab188fSccardenas 107873a98491Sdv vioscsi->vm_id = vm->vm_params.vmc_params.vcp_id; 1079353e9815Spd vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id); 108095ab188fSccardenas 108168534885Sdv /* vioscsi uses 3 virtqueues. */ 108268534885Sdv for (i = 0; i < 3; i++) { 108368534885Sdv hva = hvaddr_mem(vioscsi->vq[i].q_gpa, 108468534885Sdv vring_size(VIOSCSI_QUEUE_SIZE)); 108568534885Sdv if (hva == NULL) 108668534885Sdv fatal("failed to restore vioscsi virtqueue"); 108768534885Sdv vioscsi->vq[i].q_hva = hva; 108868534885Sdv } 108968534885Sdv 109095ab188fSccardenas return (0); 109195ab188fSccardenas } 109295ab188fSccardenas 109395ab188fSccardenas int 109473613953Sreyk virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, 109573613953Sreyk int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 1096149417b6Sreyk { 10973481ecdfSdv struct virtio_dev *dev; 1098149417b6Sreyk int ret; 1099149417b6Sreyk 11003481ecdfSdv SLIST_INIT(&virtio_devs); 11013481ecdfSdv 110273a98491Sdv if ((ret = viornd_restore(fd, vm)) == -1) 11033481ecdfSdv return (ret); 1104149417b6Sreyk 110573a98491Sdv if ((ret = vioblk_restore(fd, vm, child_disks)) == -1) 11063481ecdfSdv return (ret); 1107149417b6Sreyk 110873a98491Sdv if ((ret = vioscsi_restore(fd, vm, child_cdrom)) == -1) 11093481ecdfSdv return (ret); 111095ab188fSccardenas 1111149417b6Sreyk if ((ret = vionet_restore(fd, vm, child_taps)) == -1) 11123481ecdfSdv return (ret); 1113149417b6Sreyk 111473a98491Sdv if ((ret = vmmci_restore(fd, vm->vm_params.vmc_params.vcp_id)) == -1) 11153481ecdfSdv return (ret); 11163481ecdfSdv 11173481ecdfSdv SLIST_FOREACH(dev, &virtio_devs, dev_next) { 11183481ecdfSdv if (virtio_dev_launch(vm, dev) != 0) 11193481ecdfSdv fatalx("%s: failed to restore virtio dev", __func__); 11203481ecdfSdv } 1121149417b6Sreyk 1122149417b6Sreyk return (0); 1123149417b6Sreyk } 1124149417b6Sreyk 1125149417b6Sreyk int 1126149417b6Sreyk viornd_dump(int fd) 1127149417b6Sreyk { 1128149417b6Sreyk log_debug("%s: sending viornd", __func__); 112968534885Sdv 113068534885Sdv viornd.vq[0].q_hva = NULL; 113168534885Sdv 1132149417b6Sreyk if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 1133149417b6Sreyk log_warnx("%s: error writing viornd to fd", __func__); 1134149417b6Sreyk return (-1); 1135149417b6Sreyk } 1136149417b6Sreyk return (0); 1137149417b6Sreyk } 1138149417b6Sreyk 1139149417b6Sreyk int 1140149417b6Sreyk vmmci_dump(int fd) 1141149417b6Sreyk { 1142149417b6Sreyk log_debug("%s: sending vmmci", __func__); 114368534885Sdv 1144149417b6Sreyk if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 1145149417b6Sreyk log_warnx("%s: error writing vmmci to fd", __func__); 1146149417b6Sreyk return (-1); 1147149417b6Sreyk } 1148149417b6Sreyk return (0); 1149149417b6Sreyk } 1150149417b6Sreyk 1151149417b6Sreyk int 1152149417b6Sreyk vionet_dump(int fd) 1153149417b6Sreyk { 11543481ecdfSdv struct virtio_dev *dev, temp; 11553481ecdfSdv struct viodev_msg msg; 11563481ecdfSdv struct imsg imsg; 11573481ecdfSdv struct imsgbuf *ibuf = NULL; 11583481ecdfSdv size_t sz; 11593481ecdfSdv int ret; 116068534885Sdv 11613481ecdfSdv log_debug("%s: dumping vionet", __func__); 116268534885Sdv 11633481ecdfSdv SLIST_FOREACH(dev, &virtio_devs, dev_next) { 11643481ecdfSdv if (dev->dev_type != VMD_DEVTYPE_NET) 11653481ecdfSdv continue; 11663481ecdfSdv 11673481ecdfSdv memset(&msg, 0, sizeof(msg)); 11683481ecdfSdv memset(&imsg, 0, sizeof(imsg)); 11693481ecdfSdv 11703481ecdfSdv ibuf = &dev->sync_iev.ibuf; 11713481ecdfSdv msg.type = VIODEV_MSG_DUMP; 11723481ecdfSdv 11733481ecdfSdv ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 11743481ecdfSdv sizeof(msg)); 11753481ecdfSdv if (ret == -1) { 11763481ecdfSdv log_warnx("%s: failed requesting dump of vionet[%d]", 11773481ecdfSdv __func__, dev->vionet.idx); 11783481ecdfSdv return (-1); 11793481ecdfSdv } 1180dd7efffeSclaudio if (imsgbuf_flush(ibuf) == -1) { 1181dd7efffeSclaudio log_warnx("%s: imsgbuf_flush", __func__); 11823481ecdfSdv return (-1); 118368534885Sdv } 118468534885Sdv 11853481ecdfSdv sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp)); 11863481ecdfSdv if (sz != sizeof(temp)) { 11873481ecdfSdv log_warnx("%s: failed to dump vionet[%d]", __func__, 11883481ecdfSdv dev->vionet.idx); 11893481ecdfSdv return (-1); 11903481ecdfSdv } 11913481ecdfSdv 119208d0da61Sdv /* Clear volatile state. Will reinitialize on restore. */ 11933481ecdfSdv temp.vionet.vq[RXQ].q_hva = NULL; 11943481ecdfSdv temp.vionet.vq[TXQ].q_hva = NULL; 11953481ecdfSdv temp.async_fd = -1; 11963481ecdfSdv temp.sync_fd = -1; 11973481ecdfSdv memset(&temp.async_iev, 0, sizeof(temp.async_iev)); 11983481ecdfSdv memset(&temp.sync_iev, 0, sizeof(temp.sync_iev)); 11993481ecdfSdv 12003481ecdfSdv if (atomicio(vwrite, fd, &temp, sizeof(temp)) != sizeof(temp)) { 1201149417b6Sreyk log_warnx("%s: error writing vionet to fd", __func__); 1202149417b6Sreyk return (-1); 1203149417b6Sreyk } 12043481ecdfSdv } 12053481ecdfSdv 1206149417b6Sreyk return (0); 1207149417b6Sreyk } 1208149417b6Sreyk 1209149417b6Sreyk int 1210149417b6Sreyk vioblk_dump(int fd) 1211149417b6Sreyk { 12123481ecdfSdv struct virtio_dev *dev, temp; 12133481ecdfSdv struct viodev_msg msg; 12143481ecdfSdv struct imsg imsg; 12153481ecdfSdv struct imsgbuf *ibuf = NULL; 12163481ecdfSdv size_t sz; 12173481ecdfSdv int ret; 121868534885Sdv 12193481ecdfSdv log_debug("%s: dumping vioblk", __func__); 122068534885Sdv 12213481ecdfSdv SLIST_FOREACH(dev, &virtio_devs, dev_next) { 12223481ecdfSdv if (dev->dev_type != VMD_DEVTYPE_DISK) 12233481ecdfSdv continue; 122468534885Sdv 12253481ecdfSdv memset(&msg, 0, sizeof(msg)); 12263481ecdfSdv memset(&imsg, 0, sizeof(imsg)); 12273481ecdfSdv 12283481ecdfSdv ibuf = &dev->sync_iev.ibuf; 12293481ecdfSdv msg.type = VIODEV_MSG_DUMP; 12303481ecdfSdv 12313481ecdfSdv ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 12323481ecdfSdv sizeof(msg)); 12333481ecdfSdv if (ret == -1) { 12343481ecdfSdv log_warnx("%s: failed requesting dump of vioblk[%d]", 12353481ecdfSdv __func__, dev->vioblk.idx); 12363481ecdfSdv return (-1); 12373481ecdfSdv } 1238dd7efffeSclaudio if (imsgbuf_flush(ibuf) == -1) { 1239dd7efffeSclaudio log_warnx("%s: imsgbuf_flush", __func__); 12403481ecdfSdv return (-1); 12413481ecdfSdv } 12423481ecdfSdv 12433481ecdfSdv 12443481ecdfSdv sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp)); 12453481ecdfSdv if (sz != sizeof(temp)) { 12463481ecdfSdv log_warnx("%s: failed to dump vioblk[%d]", __func__, 12473481ecdfSdv dev->vioblk.idx); 12483481ecdfSdv return (-1); 12493481ecdfSdv } 12503481ecdfSdv 125108d0da61Sdv /* Clear volatile state. Will reinitialize on restore. */ 12523481ecdfSdv temp.vioblk.vq[0].q_hva = NULL; 12533481ecdfSdv temp.async_fd = -1; 12543481ecdfSdv temp.sync_fd = -1; 12553481ecdfSdv memset(&temp.async_iev, 0, sizeof(temp.async_iev)); 12563481ecdfSdv memset(&temp.sync_iev, 0, sizeof(temp.sync_iev)); 12573481ecdfSdv 12583481ecdfSdv if (atomicio(vwrite, fd, &temp, sizeof(temp)) != sizeof(temp)) { 1259149417b6Sreyk log_warnx("%s: error writing vioblk to fd", __func__); 1260149417b6Sreyk return (-1); 1261149417b6Sreyk } 12623481ecdfSdv } 12633481ecdfSdv 1264149417b6Sreyk return (0); 1265149417b6Sreyk } 1266149417b6Sreyk 1267149417b6Sreyk int 126895ab188fSccardenas vioscsi_dump(int fd) 126995ab188fSccardenas { 127068534885Sdv unsigned int i; 127168534885Sdv 127284dba573Spd if (vioscsi == NULL) 127384dba573Spd return (0); 127484dba573Spd 127595ab188fSccardenas log_debug("%s: sending vioscsi", __func__); 127668534885Sdv 127768534885Sdv for (i = 0; i < 3; i++) 127868534885Sdv vioscsi->vq[i].q_hva = NULL; 127968534885Sdv 128084dba573Spd if (atomicio(vwrite, fd, vioscsi, sizeof(struct vioscsi_dev)) != 128184dba573Spd sizeof(struct vioscsi_dev)) { 128295ab188fSccardenas log_warnx("%s: error writing vioscsi to fd", __func__); 128395ab188fSccardenas return (-1); 128495ab188fSccardenas } 128595ab188fSccardenas return (0); 128695ab188fSccardenas } 128795ab188fSccardenas 128895ab188fSccardenas int 1289149417b6Sreyk virtio_dump(int fd) 1290149417b6Sreyk { 1291149417b6Sreyk int ret; 1292149417b6Sreyk 1293149417b6Sreyk if ((ret = viornd_dump(fd)) == -1) 1294149417b6Sreyk return ret; 1295149417b6Sreyk 1296149417b6Sreyk if ((ret = vioblk_dump(fd)) == -1) 1297149417b6Sreyk return ret; 1298149417b6Sreyk 129995ab188fSccardenas if ((ret = vioscsi_dump(fd)) == -1) 130095ab188fSccardenas return ret; 130195ab188fSccardenas 1302149417b6Sreyk if ((ret = vionet_dump(fd)) == -1) 1303149417b6Sreyk return ret; 1304149417b6Sreyk 1305ebcaa090Smlarkin if ((ret = vmmci_dump(fd)) == -1) 1306149417b6Sreyk return ret; 1307149417b6Sreyk 1308149417b6Sreyk return (0); 1309149417b6Sreyk } 1310548054a9Spd 131108d0da61Sdv void virtio_broadcast_imsg(struct vmd_vm *vm, uint16_t type, void *data, 131208d0da61Sdv uint16_t datalen) 1313548054a9Spd { 13143481ecdfSdv struct virtio_dev *dev; 13153481ecdfSdv int ret; 13163481ecdfSdv 13173481ecdfSdv SLIST_FOREACH(dev, &virtio_devs, dev_next) { 131808d0da61Sdv ret = imsg_compose_event(&dev->async_iev, type, 0, 0, -1, data, 131908d0da61Sdv datalen); 13203481ecdfSdv if (ret == -1) { 132108d0da61Sdv log_warnx("%s: failed to broadcast imsg type %u", 132208d0da61Sdv __func__, type); 1323548054a9Spd } 1324548054a9Spd } 132508d0da61Sdv 132608d0da61Sdv } 132708d0da61Sdv 132808d0da61Sdv void 132908d0da61Sdv virtio_stop(struct vmd_vm *vm) 133008d0da61Sdv { 133108d0da61Sdv return virtio_broadcast_imsg(vm, IMSG_VMDOP_PAUSE_VM, NULL, 0); 1332548054a9Spd } 1333548054a9Spd 1334548054a9Spd void 133573a98491Sdv virtio_start(struct vmd_vm *vm) 1336548054a9Spd { 133708d0da61Sdv return virtio_broadcast_imsg(vm, IMSG_VMDOP_UNPAUSE_VM, NULL, 0); 13383481ecdfSdv } 13393481ecdfSdv 13403481ecdfSdv /* 13413481ecdfSdv * Fork+exec a child virtio device. Returns 0 on success. 13423481ecdfSdv */ 13433481ecdfSdv static int 13443481ecdfSdv virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) 13453481ecdfSdv { 134608d0da61Sdv char *nargv[12], num[32], vmm_fd[32], vm_name[VM_NAME_MAX], t[2]; 13473481ecdfSdv pid_t dev_pid; 134878979b66Sdv int sync_fds[2], async_fds[2], ret = 0; 13497f22b52aSbluhm size_t i, sz = 0; 13503481ecdfSdv struct viodev_msg msg; 1351b3bc6112Sdv struct virtio_dev *dev_entry; 13523481ecdfSdv struct imsg imsg; 13533481ecdfSdv struct imsgev *iev = &dev->sync_iev; 13543481ecdfSdv 13553481ecdfSdv switch (dev->dev_type) { 13563481ecdfSdv case VMD_DEVTYPE_NET: 135708d0da61Sdv log_debug("%s: launching vionet%d", 13583481ecdfSdv vm->vm_params.vmc_params.vcp_name, dev->vionet.idx); 13593481ecdfSdv break; 13603481ecdfSdv case VMD_DEVTYPE_DISK: 136108d0da61Sdv log_debug("%s: launching vioblk%d", 13623481ecdfSdv vm->vm_params.vmc_params.vcp_name, dev->vioblk.idx); 13633481ecdfSdv break; 13643481ecdfSdv /* NOTREACHED */ 13653481ecdfSdv default: 13663481ecdfSdv log_warn("%s: invalid device type", __func__); 13673481ecdfSdv return (EINVAL); 13683481ecdfSdv } 13693481ecdfSdv 13703481ecdfSdv /* We need two channels: one synchronous (IO reads) and one async. */ 1371b3bc6112Sdv if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, 1372b3bc6112Sdv sync_fds) == -1) { 13733481ecdfSdv log_warn("failed to create socketpair"); 13743481ecdfSdv return (errno); 13753481ecdfSdv } 1376b3bc6112Sdv if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, 1377b3bc6112Sdv async_fds) == -1) { 13783481ecdfSdv log_warn("failed to create async socketpair"); 13793481ecdfSdv return (errno); 13803481ecdfSdv } 13813481ecdfSdv 13823481ecdfSdv /* Fork... */ 13833481ecdfSdv dev_pid = fork(); 13843481ecdfSdv if (dev_pid == -1) { 13853481ecdfSdv ret = errno; 13863481ecdfSdv log_warn("%s: fork failed", __func__); 13873481ecdfSdv goto err; 13883481ecdfSdv } 13893481ecdfSdv 13903481ecdfSdv if (dev_pid > 0) { 13913481ecdfSdv /* Parent */ 13923481ecdfSdv close_fd(sync_fds[1]); 13933481ecdfSdv close_fd(async_fds[1]); 13943481ecdfSdv 13953481ecdfSdv /* Save the child's pid to help with cleanup. */ 13963481ecdfSdv dev->dev_pid = dev_pid; 13973481ecdfSdv 13983481ecdfSdv /* Set the channel fds to the child's before sending. */ 13993481ecdfSdv dev->sync_fd = sync_fds[1]; 14003481ecdfSdv dev->async_fd = async_fds[1]; 14013481ecdfSdv 14023481ecdfSdv /* 1. Send over our configured device. */ 14033481ecdfSdv log_debug("%s: sending '%c' type device struct", __func__, 14043481ecdfSdv dev->dev_type); 14053481ecdfSdv sz = atomicio(vwrite, sync_fds[0], dev, sizeof(*dev)); 14063481ecdfSdv if (sz != sizeof(*dev)) { 14073481ecdfSdv log_warnx("%s: failed to send device", __func__); 14083481ecdfSdv ret = EIO; 14093481ecdfSdv goto err; 14103481ecdfSdv } 14113481ecdfSdv 141278979b66Sdv /* Close data fds. Only the child device needs them now. */ 141378979b66Sdv if (virtio_dev_closefds(dev) == -1) { 141478979b66Sdv log_warnx("%s: failed to close device data fds", 141578979b66Sdv __func__); 141678979b66Sdv goto err; 141778979b66Sdv } 141878979b66Sdv 14193481ecdfSdv /* 2. Send over details on the VM (including memory fds). */ 14203481ecdfSdv log_debug("%s: sending vm message for '%s'", __func__, 14213481ecdfSdv vm->vm_params.vmc_params.vcp_name); 14223481ecdfSdv sz = atomicio(vwrite, sync_fds[0], vm, sizeof(*vm)); 14233481ecdfSdv if (sz != sizeof(*vm)) { 14243481ecdfSdv log_warnx("%s: failed to send vm details", __func__); 14253481ecdfSdv ret = EIO; 14263481ecdfSdv goto err; 14273481ecdfSdv } 14283481ecdfSdv 14293481ecdfSdv /* 14303481ecdfSdv * Initialize our imsg channel to the child device. The initial 14313481ecdfSdv * communication will be synchronous. We expect the child to 14323481ecdfSdv * report itself "ready" to confirm the launch was a success. 14333481ecdfSdv */ 14340a9d031fSclaudio if (imsgbuf_init(&iev->ibuf, sync_fds[0]) == -1) { 14350a9d031fSclaudio log_warn("%s: failed to init imsgbuf", __func__); 14360a9d031fSclaudio goto err; 14370a9d031fSclaudio } 14380a9d031fSclaudio imsgbuf_allow_fdpass(&iev->ibuf); 1439d12ef5f3Sclaudio ret = imsgbuf_read_one(&iev->ibuf, &imsg); 14403481ecdfSdv if (ret == 0 || ret == -1) { 14413481ecdfSdv log_warnx("%s: failed to receive ready message from " 14423481ecdfSdv "'%c' type device", __func__, dev->dev_type); 14433481ecdfSdv ret = EIO; 14443481ecdfSdv goto err; 14453481ecdfSdv } 14463481ecdfSdv ret = 0; 14473481ecdfSdv 14483481ecdfSdv IMSG_SIZE_CHECK(&imsg, &msg); 14493481ecdfSdv memcpy(&msg, imsg.data, sizeof(msg)); 14503481ecdfSdv imsg_free(&imsg); 14513481ecdfSdv 14523481ecdfSdv if (msg.type != VIODEV_MSG_READY) { 14533481ecdfSdv log_warnx("%s: expected ready message, got type %d", 14543481ecdfSdv __func__, msg.type); 14553481ecdfSdv ret = EINVAL; 14563481ecdfSdv goto err; 14573481ecdfSdv } 14583481ecdfSdv log_debug("%s: device reports ready via sync channel", 14593481ecdfSdv __func__); 14603481ecdfSdv 14613481ecdfSdv /* 14623481ecdfSdv * Wire in the async event handling, but after reverting back 14633481ecdfSdv * to the parent's fd's. 14643481ecdfSdv */ 14653481ecdfSdv dev->sync_fd = sync_fds[0]; 14663481ecdfSdv dev->async_fd = async_fds[0]; 1467a246f7a0Sdv vm_device_pipe(dev, virtio_dispatch_dev, NULL); 14683481ecdfSdv } else { 14693481ecdfSdv /* Child */ 14703481ecdfSdv close_fd(async_fds[0]); 14713481ecdfSdv close_fd(sync_fds[0]); 14723481ecdfSdv 1473b3bc6112Sdv /* Close pty. Virtio devices do not need it. */ 1474b3bc6112Sdv close_fd(vm->vm_tty); 1475b3bc6112Sdv vm->vm_tty = -1; 1476b3bc6112Sdv 1477b3bc6112Sdv if (vm->vm_cdrom != -1) { 1478b3bc6112Sdv close_fd(vm->vm_cdrom); 1479b3bc6112Sdv vm->vm_cdrom = -1; 14803481ecdfSdv } 1481b3bc6112Sdv 1482b3bc6112Sdv /* Keep data file descriptors open after exec. */ 1483b3bc6112Sdv SLIST_FOREACH(dev_entry, &virtio_devs, dev_next) { 1484b3bc6112Sdv if (dev_entry == dev) 1485b3bc6112Sdv continue; 1486b3bc6112Sdv if (virtio_dev_closefds(dev_entry) == -1) 1487b3bc6112Sdv fatalx("unable to close other virtio devs"); 14883481ecdfSdv } 14893481ecdfSdv 14903481ecdfSdv memset(num, 0, sizeof(num)); 14913481ecdfSdv snprintf(num, sizeof(num), "%d", sync_fds[1]); 14923c817da7Sdv memset(vmm_fd, 0, sizeof(vmm_fd)); 14933c817da7Sdv snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); 149408d0da61Sdv memset(vm_name, 0, sizeof(vm_name)); 149508d0da61Sdv snprintf(vm_name, sizeof(vm_name), "%s", 149608d0da61Sdv vm->vm_params.vmc_params.vcp_name); 14973481ecdfSdv 14983481ecdfSdv t[0] = dev->dev_type; 14993481ecdfSdv t[1] = '\0'; 15003481ecdfSdv 15017f22b52aSbluhm i = 0; 15027f22b52aSbluhm nargv[i++] = env->argv0; 15037f22b52aSbluhm nargv[i++] = "-X"; 15047f22b52aSbluhm nargv[i++] = num; 15057f22b52aSbluhm nargv[i++] = "-t"; 15067f22b52aSbluhm nargv[i++] = t; 15077f22b52aSbluhm nargv[i++] = "-i"; 15087f22b52aSbluhm nargv[i++] = vmm_fd; 15097f22b52aSbluhm nargv[i++] = "-p"; 15107f22b52aSbluhm nargv[i++] = vm_name; 15117f22b52aSbluhm if (env->vmd_debug) 15127f22b52aSbluhm nargv[i++] = "-d"; 15137f22b52aSbluhm if (env->vmd_verbose == 1) 15147f22b52aSbluhm nargv[i++] = "-v"; 15157f22b52aSbluhm else if (env->vmd_verbose > 1) 15167f22b52aSbluhm nargv[i++] = "-vv"; 15177f22b52aSbluhm nargv[i++] = NULL; 15187f22b52aSbluhm if (i > sizeof(nargv) / sizeof(nargv[0])) 15197f22b52aSbluhm fatalx("%s: nargv overflow", __func__); 152061f4cd73Sdv 15213481ecdfSdv /* Control resumes in vmd.c:main(). */ 15223481ecdfSdv execvp(nargv[0], nargv); 15233481ecdfSdv 15243481ecdfSdv ret = errno; 15253481ecdfSdv log_warn("%s: failed to exec device", __func__); 15263481ecdfSdv _exit(ret); 15273481ecdfSdv /* NOTREACHED */ 15283481ecdfSdv } 15293481ecdfSdv 15303481ecdfSdv return (ret); 15313481ecdfSdv 15323481ecdfSdv err: 15333481ecdfSdv close_fd(sync_fds[0]); 15343481ecdfSdv close_fd(sync_fds[1]); 15353481ecdfSdv close_fd(async_fds[0]); 15363481ecdfSdv close_fd(async_fds[1]); 15373481ecdfSdv return (ret); 15383481ecdfSdv } 15393481ecdfSdv 15403481ecdfSdv /* 15413481ecdfSdv * Initialize an async imsg channel for a virtio device. 15423481ecdfSdv */ 15433481ecdfSdv int 1544a246f7a0Sdv vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *), 1545a246f7a0Sdv struct event_base *ev_base) 15463481ecdfSdv { 15473481ecdfSdv struct imsgev *iev = &dev->async_iev; 15483481ecdfSdv int fd = dev->async_fd; 15493481ecdfSdv 15503481ecdfSdv log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__, 15513481ecdfSdv dev->dev_type, fd); 15523481ecdfSdv 15530a9d031fSclaudio if (imsgbuf_init(&iev->ibuf, fd) == -1) 15540a9d031fSclaudio fatal("imsgbuf_init"); 15550a9d031fSclaudio imsgbuf_allow_fdpass(&iev->ibuf); 15563481ecdfSdv iev->handler = cb; 15573481ecdfSdv iev->data = dev; 15583481ecdfSdv iev->events = EV_READ; 1559a246f7a0Sdv imsg_event_add2(iev, ev_base); 15603481ecdfSdv 15613481ecdfSdv return (0); 15623481ecdfSdv } 15633481ecdfSdv 15643481ecdfSdv void 15653481ecdfSdv virtio_dispatch_dev(int fd, short event, void *arg) 15663481ecdfSdv { 15673481ecdfSdv struct virtio_dev *dev = (struct virtio_dev*)arg; 15683481ecdfSdv struct imsgev *iev = &dev->async_iev; 15693481ecdfSdv struct imsgbuf *ibuf = &iev->ibuf; 15703481ecdfSdv struct imsg imsg; 15713481ecdfSdv struct viodev_msg msg; 15723481ecdfSdv ssize_t n = 0; 15733481ecdfSdv 15743481ecdfSdv if (event & EV_READ) { 1575d12ef5f3Sclaudio if ((n = imsgbuf_read(ibuf)) == -1) 1576dd7efffeSclaudio fatal("%s: imsgbuf_read", __func__); 15773481ecdfSdv if (n == 0) { 15783481ecdfSdv /* this pipe is dead, so remove the event handler */ 15793481ecdfSdv log_debug("%s: pipe dead (EV_READ)", __func__); 15803481ecdfSdv event_del(&iev->ev); 15813481ecdfSdv event_loopexit(NULL); 1582548054a9Spd return; 1583548054a9Spd } 1584548054a9Spd } 15853481ecdfSdv 15863481ecdfSdv if (event & EV_WRITE) { 1587dd7efffeSclaudio if (imsgbuf_write(ibuf) == -1) { 1588c1aa9554Sclaudio if (errno == EPIPE) { 1589c1aa9554Sclaudio /* this pipe is dead, remove the handler */ 15903481ecdfSdv log_debug("%s: pipe dead (EV_WRITE)", __func__); 15913481ecdfSdv event_del(&iev->ev); 15923481ecdfSdv event_loopexit(NULL); 15933481ecdfSdv return; 15943481ecdfSdv } 1595dd7efffeSclaudio fatal("%s: imsgbuf_write", __func__); 1596c1aa9554Sclaudio } 15973481ecdfSdv } 15983481ecdfSdv 15993481ecdfSdv for (;;) { 16003481ecdfSdv if ((n = imsg_get(ibuf, &imsg)) == -1) 16013481ecdfSdv fatal("%s: imsg_get", __func__); 16023481ecdfSdv if (n == 0) 16033481ecdfSdv break; 16043481ecdfSdv 16053481ecdfSdv switch (imsg.hdr.type) { 16063481ecdfSdv case IMSG_DEVOP_MSG: 16073481ecdfSdv IMSG_SIZE_CHECK(&imsg, &msg); 16083481ecdfSdv memcpy(&msg, imsg.data, sizeof(msg)); 16093481ecdfSdv handle_dev_msg(&msg, dev); 16103481ecdfSdv break; 16113481ecdfSdv default: 16123481ecdfSdv log_warnx("%s: got non devop imsg %d", __func__, 16133481ecdfSdv imsg.hdr.type); 16143481ecdfSdv break; 16153481ecdfSdv } 16163481ecdfSdv imsg_free(&imsg); 16173481ecdfSdv } 16183481ecdfSdv imsg_event_add(iev); 16193481ecdfSdv } 16203481ecdfSdv 16213481ecdfSdv 16223481ecdfSdv static int 16233481ecdfSdv handle_dev_msg(struct viodev_msg *msg, struct virtio_dev *gdev) 16243481ecdfSdv { 16253481ecdfSdv uint32_t vm_id = gdev->vm_id; 16263481ecdfSdv int irq = gdev->irq; 16273481ecdfSdv 16283481ecdfSdv switch (msg->type) { 16293481ecdfSdv case VIODEV_MSG_KICK: 16303481ecdfSdv if (msg->state == INTR_STATE_ASSERT) 1631c4fd4c5bSdv vcpu_assert_irq(vm_id, msg->vcpu, irq); 16323481ecdfSdv else if (msg->state == INTR_STATE_DEASSERT) 1633c4fd4c5bSdv vcpu_deassert_irq(vm_id, msg->vcpu, irq); 16343481ecdfSdv break; 16353481ecdfSdv case VIODEV_MSG_READY: 16363481ecdfSdv log_debug("%s: device reports ready", __func__); 16373481ecdfSdv break; 16383481ecdfSdv case VIODEV_MSG_ERROR: 16393481ecdfSdv log_warnx("%s: device reported error", __func__); 16403481ecdfSdv break; 16413481ecdfSdv case VIODEV_MSG_INVALID: 16423481ecdfSdv case VIODEV_MSG_IO_READ: 16433481ecdfSdv case VIODEV_MSG_IO_WRITE: 16443481ecdfSdv /* FALLTHROUGH */ 16453481ecdfSdv default: 16463481ecdfSdv log_warnx("%s: unsupported device message type %d", __func__, 16473481ecdfSdv msg->type); 16483481ecdfSdv return (1); 16493481ecdfSdv } 16503481ecdfSdv 16513481ecdfSdv return (0); 16523481ecdfSdv }; 16533481ecdfSdv 16543481ecdfSdv /* 16553481ecdfSdv * Called by the VM process while processing IO from the VCPU thread. 16563481ecdfSdv * 16573481ecdfSdv * N.b. Since the VCPU thread calls this function, we cannot mutate the event 16583481ecdfSdv * system. All ipc messages must be sent manually and cannot be queued for 16593481ecdfSdv * the event loop to push them. (We need to perform a synchronous read, so 16603481ecdfSdv * this isn't really a big deal.) 16613481ecdfSdv */ 16623481ecdfSdv int 16633481ecdfSdv virtio_pci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 16643481ecdfSdv void *cookie, uint8_t sz) 16653481ecdfSdv { 16663481ecdfSdv struct virtio_dev *dev = (struct virtio_dev *)cookie; 16673481ecdfSdv struct imsgbuf *ibuf = &dev->sync_iev.ibuf; 16683481ecdfSdv struct imsg imsg; 16693481ecdfSdv struct viodev_msg msg; 16703481ecdfSdv int ret = 0; 16713481ecdfSdv 16723481ecdfSdv memset(&msg, 0, sizeof(msg)); 16733481ecdfSdv msg.reg = reg; 16743481ecdfSdv msg.io_sz = sz; 16753481ecdfSdv 16763481ecdfSdv if (dir == 0) { 16773481ecdfSdv msg.type = VIODEV_MSG_IO_WRITE; 16783481ecdfSdv msg.data = *data; 16793481ecdfSdv msg.data_valid = 1; 16803481ecdfSdv } else 16813481ecdfSdv msg.type = VIODEV_MSG_IO_READ; 16823481ecdfSdv 16833481ecdfSdv if (msg.type == VIODEV_MSG_IO_WRITE) { 16843481ecdfSdv /* 16853481ecdfSdv * Write request. No reply expected. 16863481ecdfSdv */ 16873481ecdfSdv ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 16883481ecdfSdv sizeof(msg)); 16893481ecdfSdv if (ret == -1) { 16904cd4f486Sdv log_warn("%s: failed to send async io event to virtio" 16913481ecdfSdv " device", __func__); 16923481ecdfSdv return (ret); 16933481ecdfSdv } 1694dd7efffeSclaudio if (imsgbuf_flush(ibuf) == -1) { 1695dd7efffeSclaudio log_warnx("%s: imsgbuf_flush (write)", __func__); 16963481ecdfSdv return (-1); 16973481ecdfSdv } 16983481ecdfSdv } else { 16993481ecdfSdv /* 17003481ecdfSdv * Read request. Requires waiting for a reply. 17013481ecdfSdv */ 17023481ecdfSdv ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 17033481ecdfSdv sizeof(msg)); 17043481ecdfSdv if (ret == -1) { 17054cd4f486Sdv log_warnx("%s: failed to send sync io event to virtio" 17063481ecdfSdv " device", __func__); 17073481ecdfSdv return (ret); 17083481ecdfSdv } 1709dd7efffeSclaudio if (imsgbuf_flush(ibuf) == -1) { 1710dd7efffeSclaudio log_warnx("%s: imsgbuf_flush (read)", __func__); 17113481ecdfSdv return (-1); 17123481ecdfSdv } 17133481ecdfSdv 17143481ecdfSdv /* Read our reply. */ 1715d12ef5f3Sclaudio ret = imsgbuf_read_one(ibuf, &imsg); 1716d12ef5f3Sclaudio if (ret == 0 || ret == -1) { 1717d12ef5f3Sclaudio log_warn("%s: imsgbuf_read (n=%d)", __func__, ret); 17183481ecdfSdv return (-1); 17193481ecdfSdv } 17203481ecdfSdv IMSG_SIZE_CHECK(&imsg, &msg); 17213481ecdfSdv memcpy(&msg, imsg.data, sizeof(msg)); 17223481ecdfSdv imsg_free(&imsg); 17233481ecdfSdv 17243481ecdfSdv if (msg.type == VIODEV_MSG_IO_READ && msg.data_valid) { 172561f4cd73Sdv #if DEBUG 17263481ecdfSdv log_debug("%s: got sync read response (reg=%s)", 17273481ecdfSdv __func__, virtio_reg_name(msg.reg)); 172861f4cd73Sdv #endif /* DEBUG */ 17293481ecdfSdv *data = msg.data; 17303481ecdfSdv /* 17313481ecdfSdv * It's possible we're asked to {de,}assert after the 17323481ecdfSdv * device performs a register read. 17333481ecdfSdv */ 17343481ecdfSdv if (msg.state == INTR_STATE_ASSERT) 1735c4fd4c5bSdv vcpu_assert_irq(dev->vm_id, msg.vcpu, msg.irq); 17363481ecdfSdv else if (msg.state == INTR_STATE_DEASSERT) 1737c4fd4c5bSdv vcpu_deassert_irq(dev->vm_id, msg.vcpu, msg.irq); 17383481ecdfSdv } else { 17393481ecdfSdv log_warnx("%s: expected IO_READ, got %d", __func__, 17403481ecdfSdv msg.type); 17413481ecdfSdv return (-1); 17423481ecdfSdv } 17433481ecdfSdv } 17443481ecdfSdv 17453481ecdfSdv return (0); 17463481ecdfSdv } 17473481ecdfSdv 17483481ecdfSdv void 1749c4fd4c5bSdv virtio_assert_irq(struct virtio_dev *dev, int vcpu) 17503481ecdfSdv { 17513481ecdfSdv struct viodev_msg msg; 17523481ecdfSdv int ret; 17533481ecdfSdv 17543481ecdfSdv memset(&msg, 0, sizeof(msg)); 17553481ecdfSdv msg.irq = dev->irq; 17563481ecdfSdv msg.vcpu = vcpu; 17573481ecdfSdv msg.type = VIODEV_MSG_KICK; 17583481ecdfSdv msg.state = INTR_STATE_ASSERT; 17593481ecdfSdv 17603481ecdfSdv ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 17613481ecdfSdv &msg, sizeof(msg)); 17623481ecdfSdv if (ret == -1) 17633481ecdfSdv log_warnx("%s: failed to assert irq %d", __func__, dev->irq); 17643481ecdfSdv } 17653481ecdfSdv 17663481ecdfSdv void 1767c4fd4c5bSdv virtio_deassert_irq(struct virtio_dev *dev, int vcpu) 17683481ecdfSdv { 17693481ecdfSdv struct viodev_msg msg; 17703481ecdfSdv int ret; 17713481ecdfSdv 17723481ecdfSdv memset(&msg, 0, sizeof(msg)); 17733481ecdfSdv msg.irq = dev->irq; 17743481ecdfSdv msg.vcpu = vcpu; 17753481ecdfSdv msg.type = VIODEV_MSG_KICK; 17763481ecdfSdv msg.state = INTR_STATE_DEASSERT; 17773481ecdfSdv 17783481ecdfSdv ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 17793481ecdfSdv &msg, sizeof(msg)); 17803481ecdfSdv if (ret == -1) 17813481ecdfSdv log_warnx("%s: failed to deassert irq %d", __func__, dev->irq); 1782548054a9Spd } 1783b3bc6112Sdv 1784b3bc6112Sdv /* 1785b3bc6112Sdv * Close all underlying file descriptors for a given virtio device. 1786b3bc6112Sdv */ 1787b3bc6112Sdv static int 1788b3bc6112Sdv virtio_dev_closefds(struct virtio_dev *dev) 1789b3bc6112Sdv { 1790b3bc6112Sdv size_t i; 1791b3bc6112Sdv 1792b3bc6112Sdv switch (dev->dev_type) { 1793b3bc6112Sdv case VMD_DEVTYPE_DISK: 1794b3bc6112Sdv for (i = 0; i < dev->vioblk.ndisk_fd; i++) { 1795b3bc6112Sdv close_fd(dev->vioblk.disk_fd[i]); 1796b3bc6112Sdv dev->vioblk.disk_fd[i] = -1; 1797b3bc6112Sdv } 1798b3bc6112Sdv break; 1799b3bc6112Sdv case VMD_DEVTYPE_NET: 1800b3bc6112Sdv close_fd(dev->vionet.data_fd); 1801b3bc6112Sdv dev->vionet.data_fd = -1; 1802b3bc6112Sdv break; 1803b3bc6112Sdv default: 1804b3bc6112Sdv log_warnx("%s: invalid device type", __func__); 1805b3bc6112Sdv return (-1); 1806b3bc6112Sdv } 1807b3bc6112Sdv 180878979b66Sdv close_fd(dev->async_fd); 180978979b66Sdv dev->async_fd = -1; 181078979b66Sdv close_fd(dev->sync_fd); 181178979b66Sdv dev->sync_fd = -1; 181278979b66Sdv 1813b3bc6112Sdv return (0); 1814b3bc6112Sdv } 1815