1 /* $OpenBSD: vmd.c,v 1.163 2024/11/06 14:26:20 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/stat.h> 22 #include <sys/sysctl.h> 23 #include <sys/tty.h> 24 #include <sys/ttycom.h> 25 #include <sys/ioctl.h> 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <errno.h> 31 #include <event.h> 32 #include <fcntl.h> 33 #include <pwd.h> 34 #include <signal.h> 35 #include <syslog.h> 36 #include <unistd.h> 37 #include <util.h> 38 #include <ctype.h> 39 #include <grp.h> 40 41 #include <dev/vmm/vmm.h> 42 43 #include "proc.h" 44 #include "atomicio.h" 45 #include "vmd.h" 46 47 __dead void usage(void); 48 49 int main(int, char **); 50 int vmd_configure(void); 51 void vmd_sighdlr(int sig, short event, void *arg); 52 void vmd_shutdown(void); 53 int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 54 int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 55 int vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *); 56 int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 57 int vmd_check_vmh(struct vm_dump_header *); 58 59 int vm_instance(struct privsep *, struct vmd_vm **, 60 struct vmop_create_params *, uid_t); 61 int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 62 int vm_claimid(const char *, int, uint32_t *); 63 void start_vm_batch(int, short, void*); 64 65 static inline void vm_terminate(struct vmd_vm *, const char *); 66 67 struct vmd *env; 68 69 static struct privsep_proc procs[] = { 70 /* Keep "priv" on top as procs[0] */ 71 { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 72 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 73 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, 74 vmm_shutdown, "/" }, 75 { "agentx", PROC_AGENTX, vmd_dispatch_agentx, vm_agentx, 76 vm_agentx_shutdown, "/" } 77 }; 78 79 enum privsep_procid privsep_process; 80 81 struct event staggered_start_timer; 82 83 /* For the privileged process */ 84 static struct privsep_proc *proc_priv = &procs[0]; 85 static struct passwd proc_privpw; 86 static const uint8_t zero_mac[ETHER_ADDR_LEN]; 87 88 const char default_conffile[] = VMD_CONF; 89 const char *conffile = default_conffile; 90 91 int 92 vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 93 { 94 struct privsep *ps = p->p_ps; 95 int res = 0, ret = 0, cmd = 0, verbose; 96 int ifd; 97 unsigned int v = 0, flags; 98 struct vmop_create_params vmc; 99 struct vmop_id vid; 100 struct vmop_result vmr; 101 struct vm_dump_header vmh; 102 struct vmd_vm *vm = NULL; 103 char *str = NULL; 104 uint32_t id = 0; 105 struct control_sock *rcs; 106 107 switch (imsg->hdr.type) { 108 case IMSG_VMDOP_START_VM_REQUEST: 109 IMSG_SIZE_CHECK(imsg, &vmc); 110 memcpy(&vmc, imsg->data, sizeof(vmc)); 111 vmc.vmc_kernel = imsg_get_fd(imsg); 112 113 /* Try registering our VM in our list of known VMs. */ 114 if (vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid)) { 115 res = errno; 116 117 /* Did we have a failure during lookup of a parent? */ 118 if (vm == NULL) { 119 cmd = IMSG_VMDOP_START_VM_RESPONSE; 120 break; 121 } 122 123 /* Does the VM already exist? */ 124 if (res == EALREADY) { 125 /* Is it already running? */ 126 if (vm->vm_state & VM_STATE_RUNNING) { 127 cmd = IMSG_VMDOP_START_VM_RESPONSE; 128 break; 129 } 130 131 /* If not running, are our flags ok? */ 132 if (vmc.vmc_flags && 133 vmc.vmc_flags != VMOP_CREATE_KERNEL) { 134 cmd = IMSG_VMDOP_START_VM_RESPONSE; 135 break; 136 } 137 } 138 res = 0; 139 } 140 141 /* Try to start the launch of the VM. */ 142 res = config_setvm(ps, vm, imsg->hdr.peerid, 143 vm->vm_params.vmc_owner.uid); 144 if (res) 145 cmd = IMSG_VMDOP_START_VM_RESPONSE; 146 break; 147 case IMSG_VMDOP_WAIT_VM_REQUEST: 148 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 149 IMSG_SIZE_CHECK(imsg, &vid); 150 memcpy(&vid, imsg->data, sizeof(vid)); 151 flags = vid.vid_flags; 152 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 153 154 if ((id = vid.vid_id) == 0) { 155 /* Lookup vm (id) by name */ 156 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 157 res = ENOENT; 158 break; 159 } 160 id = vm->vm_vmid; 161 } else if ((vm = vm_getbyvmid(id)) == NULL) { 162 res = ENOENT; 163 break; 164 } 165 166 /* Validate curent state of vm */ 167 if ((vm->vm_state & VM_STATE_SHUTDOWN) && 168 (flags & VMOP_FORCE) == 0) { 169 res = EALREADY; 170 break; 171 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 172 res = EINVAL; 173 break; 174 } else if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 175 res = EPERM; 176 break; 177 } 178 179 /* Only relay TERMINATION requests, not WAIT requests */ 180 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 181 memset(&vid, 0, sizeof(vid)); 182 vid.vid_id = id; 183 vid.vid_flags = flags; 184 185 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 186 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 187 return (-1); 188 } 189 break; 190 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 191 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 192 break; 193 case IMSG_VMDOP_LOAD: 194 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 195 str = get_string((uint8_t *)imsg->data, 196 IMSG_DATA_SIZE(imsg)); 197 case IMSG_VMDOP_RELOAD: 198 if (vmd_reload(0, str) == -1) 199 cmd = IMSG_CTL_FAIL; 200 else 201 cmd = IMSG_CTL_OK; 202 free(str); 203 break; 204 case IMSG_CTL_RESET: 205 IMSG_SIZE_CHECK(imsg, &v); 206 memcpy(&v, imsg->data, sizeof(v)); 207 if (vmd_reload(v, NULL) == -1) 208 cmd = IMSG_CTL_FAIL; 209 else 210 cmd = IMSG_CTL_OK; 211 break; 212 case IMSG_CTL_VERBOSE: 213 IMSG_SIZE_CHECK(imsg, &verbose); 214 memcpy(&verbose, imsg->data, sizeof(verbose)); 215 log_setverbose(verbose); 216 217 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 218 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 219 cmd = IMSG_CTL_OK; 220 break; 221 case IMSG_VMDOP_PAUSE_VM: 222 case IMSG_VMDOP_UNPAUSE_VM: 223 IMSG_SIZE_CHECK(imsg, &vid); 224 memcpy(&vid, imsg->data, sizeof(vid)); 225 if (vid.vid_id == 0) { 226 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 227 res = ENOENT; 228 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 229 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 230 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 231 break; 232 } else { 233 vid.vid_id = vm->vm_vmid; 234 } 235 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 236 res = ENOENT; 237 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 238 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 239 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 240 break; 241 } 242 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 243 vid.vid_uid) != 0) { 244 res = EPERM; 245 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 246 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 247 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 248 break; 249 } 250 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 251 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 252 break; 253 case IMSG_VMDOP_SEND_VM_REQUEST: 254 IMSG_SIZE_CHECK(imsg, &vid); 255 memcpy(&vid, imsg->data, sizeof(vid)); 256 id = vid.vid_id; 257 ifd = imsg_get_fd(imsg); 258 if (vid.vid_id == 0) { 259 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 260 res = ENOENT; 261 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 262 close(ifd); 263 break; 264 } else { 265 vid.vid_id = vm->vm_vmid; 266 } 267 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 268 res = ENOENT; 269 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 270 close(ifd); 271 break; 272 } 273 vmr.vmr_id = vid.vid_id; 274 log_debug("%s: sending fd to vmm", __func__); 275 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 276 imsg->hdr.peerid, ifd, &vid, sizeof(vid)); 277 break; 278 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 279 IMSG_SIZE_CHECK(imsg, &vid); 280 memcpy(&vid, imsg->data, sizeof(vid)); 281 ifd = imsg_get_fd(imsg); 282 if (ifd == -1) { 283 log_warnx("%s: invalid fd", __func__); 284 return (-1); 285 } 286 if (atomicio(read, ifd, &vmh, sizeof(vmh)) != sizeof(vmh)) { 287 log_warnx("%s: error reading vmh from received vm", 288 __func__); 289 res = EIO; 290 close(ifd); 291 cmd = IMSG_VMDOP_START_VM_RESPONSE; 292 break; 293 } 294 295 if (vmd_check_vmh(&vmh)) { 296 res = ENOENT; 297 close(ifd); 298 cmd = IMSG_VMDOP_START_VM_RESPONSE; 299 break; 300 } 301 if (atomicio(read, ifd, &vmc, sizeof(vmc)) != sizeof(vmc)) { 302 log_warnx("%s: error reading vmc from received vm", 303 __func__); 304 res = EIO; 305 close(ifd); 306 cmd = IMSG_VMDOP_START_VM_RESPONSE; 307 break; 308 } 309 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 310 sizeof(vmc.vmc_params.vcp_name)); 311 vmc.vmc_params.vcp_id = 0; 312 313 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 314 if (ret != 0) { 315 res = errno; 316 cmd = IMSG_VMDOP_START_VM_RESPONSE; 317 close(ifd); 318 } else { 319 vm->vm_state |= VM_STATE_RECEIVED; 320 config_setvm(ps, vm, imsg->hdr.peerid, 321 vmc.vmc_owner.uid); 322 log_debug("%s: sending fd to vmm", __func__); 323 proc_compose_imsg(ps, PROC_VMM, -1, 324 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, ifd, 325 NULL, 0); 326 } 327 break; 328 case IMSG_VMDOP_DONE: 329 control_reset(&ps->ps_csock); 330 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 331 control_reset(rcs); 332 cmd = 0; 333 break; 334 default: 335 return (-1); 336 } 337 338 switch (cmd) { 339 case 0: 340 break; 341 case IMSG_VMDOP_START_VM_RESPONSE: 342 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 343 memset(&vmr, 0, sizeof(vmr)); 344 vmr.vmr_result = res; 345 vmr.vmr_id = id; 346 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 347 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 348 return (-1); 349 break; 350 default: 351 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 352 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 353 return (-1); 354 break; 355 } 356 357 return (0); 358 } 359 360 int 361 vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 362 { 363 struct vmop_result vmr; 364 struct privsep *ps = p->p_ps; 365 int res = 0; 366 struct vmd_vm *vm; 367 struct vm_create_params *vcp; 368 struct vmop_info_result vir; 369 370 switch (imsg->hdr.type) { 371 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 372 IMSG_SIZE_CHECK(imsg, &vmr); 373 memcpy(&vmr, imsg->data, sizeof(vmr)); 374 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 375 break; 376 proc_compose_imsg(ps, PROC_CONTROL, -1, 377 imsg->hdr.type, imsg->hdr.peerid, -1, 378 imsg->data, sizeof(imsg->data)); 379 log_info("%s: paused vm %d successfully", 380 vm->vm_params.vmc_params.vcp_name, 381 vm->vm_vmid); 382 vm->vm_state |= VM_STATE_PAUSED; 383 break; 384 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 385 IMSG_SIZE_CHECK(imsg, &vmr); 386 memcpy(&vmr, imsg->data, sizeof(vmr)); 387 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 388 break; 389 proc_compose_imsg(ps, PROC_CONTROL, -1, 390 imsg->hdr.type, imsg->hdr.peerid, -1, 391 imsg->data, sizeof(imsg->data)); 392 log_info("%s: unpaused vm %d successfully.", 393 vm->vm_params.vmc_params.vcp_name, 394 vm->vm_vmid); 395 vm->vm_state &= ~VM_STATE_PAUSED; 396 break; 397 case IMSG_VMDOP_START_VM_RESPONSE: 398 IMSG_SIZE_CHECK(imsg, &vmr); 399 memcpy(&vmr, imsg->data, sizeof(vmr)); 400 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 401 break; 402 vm->vm_pid = vmr.vmr_pid; 403 vcp = &vm->vm_params.vmc_params; 404 vcp->vcp_id = vmr.vmr_id; 405 406 /* 407 * If the peerid is not -1, forward the response back to the 408 * the control socket. If it is -1, the request originated 409 * from the parent, not the control socket. 410 */ 411 if (vm->vm_peerid != (uint32_t)-1) { 412 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 413 sizeof(vmr.vmr_ttyname)); 414 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 415 imsg->hdr.type, vm->vm_peerid, -1, 416 &vmr, sizeof(vmr)) == -1) { 417 errno = vmr.vmr_result; 418 log_warn("%s: failed to forward vm result", 419 vcp->vcp_name); 420 vm_terminate(vm, __func__); 421 return (-1); 422 } 423 } 424 425 if (vmr.vmr_result) { 426 log_warnx("%s: failed to start vm", vcp->vcp_name); 427 vm_terminate(vm, __func__); 428 errno = vmr.vmr_result; 429 break; 430 } 431 432 /* Now configure all the interfaces */ 433 if (vm_priv_ifconfig(ps, vm) == -1) { 434 log_warn("%s: failed to configure vm", vcp->vcp_name); 435 vm_terminate(vm, __func__); 436 break; 437 } 438 439 log_info("started %s (vm %d) successfully, tty %s", 440 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 441 break; 442 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 443 IMSG_SIZE_CHECK(imsg, &vmr); 444 memcpy(&vmr, imsg->data, sizeof(vmr)); 445 446 if (vmr.vmr_result) { 447 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 448 __func__, vmr.vmr_id); 449 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 450 } else { 451 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 452 break; 453 /* Mark VM as shutting down */ 454 vm->vm_state |= VM_STATE_SHUTDOWN; 455 } 456 break; 457 case IMSG_VMDOP_SEND_VM_RESPONSE: 458 IMSG_SIZE_CHECK(imsg, &vmr); 459 memcpy(&vmr, imsg->data, sizeof(vmr)); 460 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 461 break; 462 if (!vmr.vmr_result) { 463 log_info("%s: sent vm %d successfully.", 464 vm->vm_params.vmc_params.vcp_name, 465 vm->vm_vmid); 466 vm_terminate(vm, __func__); 467 } 468 469 /* Send a response if a control client is waiting for it */ 470 if (imsg->hdr.peerid != (uint32_t)-1) { 471 /* the error is meaningless for deferred responses */ 472 vmr.vmr_result = 0; 473 474 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 475 IMSG_VMDOP_SEND_VM_RESPONSE, 476 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 477 return (-1); 478 } 479 break; 480 case IMSG_VMDOP_TERMINATE_VM_EVENT: 481 IMSG_SIZE_CHECK(imsg, &vmr); 482 memcpy(&vmr, imsg->data, sizeof(vmr)); 483 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 484 __func__, vmr.vmr_id, vmr.vmr_result); 485 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 486 log_debug("%s: vm %d is no longer available", 487 __func__, vmr.vmr_id); 488 break; 489 } 490 if (vmr.vmr_result != EAGAIN || 491 vm->vm_params.vmc_bootdevice) { 492 vm_terminate(vm, __func__); 493 } else { 494 /* Stop VM instance but keep the tty open */ 495 vm_stop(vm, 1, __func__); 496 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 497 } 498 499 /* The error is meaningless for deferred responses */ 500 vmr.vmr_result = 0; 501 502 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 503 IMSG_VMDOP_TERMINATE_VM_EVENT, 504 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 505 return (-1); 506 break; 507 case IMSG_VMDOP_GET_INFO_VM_DATA: 508 IMSG_SIZE_CHECK(imsg, &vir); 509 memcpy(&vir, imsg->data, sizeof(vir)); 510 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 511 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 512 if (vm->vm_ttyname[0] != '\0') 513 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 514 sizeof(vir.vir_ttyname)); 515 log_debug("%s: running vm: %d, vm_state: 0x%x", 516 __func__, vm->vm_vmid, vm->vm_state); 517 vir.vir_state = vm->vm_state; 518 /* get the user id who started the vm */ 519 vir.vir_uid = vm->vm_uid; 520 vir.vir_gid = vm->vm_params.vmc_owner.gid; 521 } 522 if (proc_compose_imsg(ps, 523 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 524 PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type, 525 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 526 if (vm) 527 vm_terminate(vm, __func__); 528 return (-1); 529 } 530 break; 531 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 532 /* 533 * PROC_VMM has responded with the *running* VMs, now we 534 * append the others. These use the special value 0 for their 535 * kernel id to indicate that they are not running. 536 */ 537 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 538 if (!(vm->vm_state & VM_STATE_RUNNING)) { 539 memset(&vir, 0, sizeof(vir)); 540 vir.vir_info.vir_id = vm->vm_vmid; 541 strlcpy(vir.vir_info.vir_name, 542 vm->vm_params.vmc_params.vcp_name, 543 VMM_MAX_NAME_LEN); 544 vir.vir_info.vir_memory_size = 545 vm->vm_params.vmc_params. 546 vcp_memranges[0].vmr_size; 547 vir.vir_info.vir_ncpus = 548 vm->vm_params.vmc_params.vcp_ncpus; 549 /* get the configured user id for this vm */ 550 vir.vir_uid = vm->vm_params.vmc_owner.uid; 551 vir.vir_gid = vm->vm_params.vmc_owner.gid; 552 log_debug("%s: vm: %d, vm_state: 0x%x", 553 __func__, vm->vm_vmid, vm->vm_state); 554 vir.vir_state = vm->vm_state; 555 if (proc_compose_imsg(ps, 556 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 557 PROC_AGENTX : PROC_CONTROL, -1, 558 IMSG_VMDOP_GET_INFO_VM_DATA, 559 imsg->hdr.peerid, -1, &vir, 560 sizeof(vir)) == -1) { 561 log_debug("%s: GET_INFO_VM_END failed", 562 __func__); 563 vm_terminate(vm, __func__); 564 return (-1); 565 } 566 } 567 } 568 IMSG_SIZE_CHECK(imsg, &res); 569 proc_forward_imsg(ps, imsg, 570 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 571 PROC_AGENTX : PROC_CONTROL, -1); 572 break; 573 default: 574 return (-1); 575 } 576 577 return (0); 578 } 579 580 int 581 vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg) 582 { 583 struct privsep *ps = p->p_ps; 584 585 switch (imsg->hdr.type) { 586 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 587 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 588 return (0); 589 default: 590 break; 591 } 592 return (-1); 593 } 594 595 int 596 vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 597 { 598 struct vmop_addr_result var; 599 600 switch (imsg->hdr.type) { 601 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 602 IMSG_SIZE_CHECK(imsg, &var); 603 memcpy(&var, imsg->data, sizeof(var)); 604 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 605 break; 606 default: 607 return (-1); 608 } 609 610 return (0); 611 } 612 613 614 void 615 vmd_sighdlr(int sig, short event, void *arg) 616 { 617 if (privsep_process != PROC_PARENT) 618 return; 619 log_debug("%s: handling signal", __func__); 620 621 switch (sig) { 622 case SIGHUP: 623 log_info("%s: reload requested with SIGHUP", __func__); 624 625 /* 626 * This is safe because libevent uses async signal handlers 627 * that run in the event loop and not in signal context. 628 */ 629 (void)vmd_reload(0, NULL); 630 break; 631 case SIGPIPE: 632 log_info("%s: ignoring SIGPIPE", __func__); 633 break; 634 case SIGUSR1: 635 log_info("%s: ignoring SIGUSR1", __func__); 636 break; 637 case SIGTERM: 638 case SIGINT: 639 vmd_shutdown(); 640 break; 641 default: 642 fatalx("unexpected signal"); 643 } 644 } 645 646 __dead void 647 usage(void) 648 { 649 extern char *__progname; 650 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 651 __progname); 652 exit(1); 653 } 654 655 int 656 main(int argc, char **argv) 657 { 658 struct privsep *ps; 659 int ch; 660 enum privsep_procid proc_id = PROC_PARENT; 661 int proc_instance = 0, vm_launch = 0; 662 int vmm_fd = -1, vm_fd = -1, psp_fd = -1; 663 const char *errp, *title = NULL; 664 int argc0 = argc; 665 char dev_type = '\0'; 666 667 log_init(0, LOG_DAEMON); 668 669 if ((env = calloc(1, sizeof(*env))) == NULL) 670 fatal("calloc: env"); 671 env->vmd_fd = -1; 672 env->vmd_fd6 = -1; 673 674 while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:j:nt:vp:")) != -1) { 675 switch (ch) { 676 case 'D': 677 if (cmdline_symset(optarg) < 0) 678 log_warnx("could not parse macro definition %s", 679 optarg); 680 break; 681 case 'd': 682 env->vmd_debug = 2; 683 break; 684 case 'f': 685 conffile = optarg; 686 break; 687 case 'v': 688 env->vmd_verbose++; 689 break; 690 /* vmd fork/exec */ 691 case 'n': 692 env->vmd_noaction = 1; 693 break; 694 case 'P': 695 title = optarg; 696 proc_id = proc_getid(procs, nitems(procs), title); 697 if (proc_id == PROC_MAX) 698 fatalx("invalid process name"); 699 break; 700 case 'I': 701 proc_instance = strtonum(optarg, 0, 702 PROC_MAX_INSTANCES, &errp); 703 if (errp) 704 fatalx("invalid process instance"); 705 break; 706 /* child vm and device fork/exec */ 707 case 'p': 708 title = optarg; 709 break; 710 case 'V': 711 vm_launch = VMD_LAUNCH_VM; 712 vm_fd = strtonum(optarg, 0, 128, &errp); 713 if (errp) 714 fatalx("invalid vm fd"); 715 break; 716 case 'X': 717 vm_launch = VMD_LAUNCH_DEV; 718 vm_fd = strtonum(optarg, 0, 128, &errp); 719 if (errp) 720 fatalx("invalid device fd"); 721 break; 722 case 't': 723 dev_type = *optarg; 724 switch (dev_type) { 725 case VMD_DEVTYPE_NET: 726 case VMD_DEVTYPE_DISK: 727 break; 728 default: fatalx("invalid device type"); 729 } 730 break; 731 case 'i': 732 vmm_fd = strtonum(optarg, 0, 128, &errp); 733 if (errp) 734 fatalx("invalid vmm fd"); 735 break; 736 case 'j': 737 /* -1 means no PSP available */ 738 psp_fd = strtonum(optarg, -1, 128, &errp); 739 if (errp) 740 fatalx("invalid psp fd"); 741 break; 742 default: 743 usage(); 744 } 745 } 746 747 argc -= optind; 748 if (argc > 0) 749 usage(); 750 751 if (env->vmd_noaction && !env->vmd_debug) 752 env->vmd_debug = 1; 753 754 log_init(env->vmd_debug, LOG_DAEMON); 755 log_setverbose(env->vmd_verbose); 756 757 /* Re-exec from the vmm child process requires an absolute path. */ 758 if (proc_id == PROC_PARENT && *argv[0] != '/' && !env->vmd_noaction) 759 fatalx("re-exec requires execution with an absolute path"); 760 env->argv0 = argv[0]; 761 762 /* check for root privileges */ 763 if (env->vmd_noaction == 0 && !vm_launch) { 764 if (geteuid()) 765 fatalx("need root privileges"); 766 } 767 768 ps = &env->vmd_ps; 769 ps->ps_env = env; 770 env->vmd_psp_fd = psp_fd; 771 772 if (config_init(env) == -1) 773 fatal("failed to initialize configuration"); 774 775 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 776 fatal("unknown user %s", VMD_USER); 777 778 /* First proc runs as root without pledge but in default chroot */ 779 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 780 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 781 782 /* 783 * If we're launching a new vm or its device, we short out here. 784 */ 785 if (vm_launch == VMD_LAUNCH_VM) { 786 vm_main(vm_fd, vmm_fd); 787 /* NOTREACHED */ 788 } else if (vm_launch == VMD_LAUNCH_DEV) { 789 if (dev_type == VMD_DEVTYPE_NET) { 790 log_procinit("vm/%s/vionet", title); 791 vionet_main(vm_fd, vmm_fd); 792 /* NOTREACHED */ 793 } else if (dev_type == VMD_DEVTYPE_DISK) { 794 log_procinit("vm/%s/vioblk", title); 795 vioblk_main(vm_fd, vmm_fd); 796 /* NOTREACHED */ 797 } 798 fatalx("unsupported device type '%c'", dev_type); 799 } 800 801 /* Open /dev/vmm early. */ 802 if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { 803 env->vmd_fd = open(VMM_NODE, O_RDWR | O_CLOEXEC); 804 if (env->vmd_fd == -1) 805 fatal("%s", VMM_NODE); 806 } 807 808 /* Configure the control socket */ 809 ps->ps_csock.cs_name = SOCKET_NAME; 810 TAILQ_INIT(&ps->ps_rcsocks); 811 812 /* Configuration will be parsed after forking the children */ 813 env->vmd_conffile = conffile; 814 815 if (env->vmd_noaction) 816 ps->ps_noaction = 1; 817 ps->ps_instance = proc_instance; 818 if (title != NULL) 819 ps->ps_title[proc_id] = title; 820 821 /* only the parent returns */ 822 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 823 proc_id); 824 825 if (ps->ps_noaction == 0) 826 log_info("startup"); 827 828 event_init(); 829 830 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 831 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 832 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 833 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 834 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 835 836 signal_add(&ps->ps_evsigint, NULL); 837 signal_add(&ps->ps_evsigterm, NULL); 838 signal_add(&ps->ps_evsighup, NULL); 839 signal_add(&ps->ps_evsigpipe, NULL); 840 signal_add(&ps->ps_evsigusr1, NULL); 841 842 if (!env->vmd_noaction) 843 proc_connect(ps); 844 845 env->vmd_psp_fd = -1; 846 #ifdef __amd64__ 847 if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) 848 psp_setup(); 849 #endif 850 851 if (vmd_configure() == -1) 852 fatalx("configuration failed"); 853 854 event_dispatch(); 855 856 log_debug("exiting"); 857 858 return (0); 859 } 860 861 void 862 start_vm_batch(int fd, short type, void *args) 863 { 864 int i = 0; 865 struct vmd_vm *vm; 866 867 log_debug("%s: starting batch of %d vms", __func__, 868 env->vmd_cfg.parallelism); 869 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 870 if (!(vm->vm_state & VM_STATE_WAITING)) { 871 log_debug("%s: not starting vm %s (disabled)", 872 __func__, 873 vm->vm_params.vmc_params.vcp_name); 874 continue; 875 } 876 i++; 877 if (i > env->vmd_cfg.parallelism) { 878 evtimer_add(&staggered_start_timer, 879 &env->vmd_cfg.delay); 880 break; 881 } 882 vm->vm_state &= ~VM_STATE_WAITING; 883 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 884 } 885 log_debug("%s: done starting vms", __func__); 886 } 887 888 int 889 vmd_configure(void) 890 { 891 int ncpus; 892 struct vmd_switch *vsw; 893 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 894 size_t ncpus_sz = sizeof(ncpus); 895 896 /* 897 * pledge in the parent process: 898 * stdio - for malloc and basic I/O including events. 899 * rpath - for reload to open and read the configuration files. 900 * wpath - for opening disk images and tap devices. 901 * tty - for openpty and TIOCUCNTL. 902 * proc - run kill to terminate its children safely. 903 * sendfd - for disks, interfaces and other fds. 904 * recvfd - for send and receive. 905 * getpw - lookup user or group id by name. 906 * chown, fattr - change tty ownership 907 * flock - locking disk files 908 */ 909 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 910 " chown fattr flock", NULL) == -1) 911 fatal("pledge"); 912 913 if ((env->vmd_ptmfd = getptmfd()) == -1) 914 fatal("getptmfd %s", PATH_PTMDEV); 915 916 if (parse_config(env->vmd_conffile) == -1) { 917 proc_kill(&env->vmd_ps); 918 exit(1); 919 } 920 921 if (env->vmd_noaction) { 922 fprintf(stderr, "configuration OK\n"); 923 proc_kill(&env->vmd_ps); 924 exit(0); 925 } 926 927 /* Send VMM device fd to vmm proc. */ 928 proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, 929 IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); 930 931 /* Send PSP device fd to vmm proc. */ 932 if (env->vmd_psp_fd != -1) { 933 proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, 934 IMSG_VMDOP_RECEIVE_PSP_FD, -1, env->vmd_psp_fd, NULL, 0); 935 } 936 937 /* Send shared global configuration to all children */ 938 if (config_setconfig(env) == -1) 939 return (-1); 940 941 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 942 if (vsw->sw_running) 943 continue; 944 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 945 log_warn("%s: failed to create switch %s", 946 __func__, vsw->sw_name); 947 switch_remove(vsw); 948 return (-1); 949 } 950 } 951 952 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 953 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 954 if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 955 ncpus = 1; 956 env->vmd_cfg.parallelism = ncpus; 957 log_debug("%s: setting staggered start configuration to " 958 "parallelism: %d and delay: %lld", 959 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 960 } 961 962 log_debug("%s: starting vms in staggered fashion", __func__); 963 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 964 /* start first batch */ 965 start_vm_batch(0, 0, NULL); 966 967 return (0); 968 } 969 970 int 971 vmd_reload(unsigned int reset, const char *filename) 972 { 973 struct vmd_vm *vm, *next_vm; 974 struct vmd_switch *vsw; 975 int reload = 0; 976 977 /* Switch back to the default config file */ 978 if (filename == NULL || *filename == '\0') { 979 filename = env->vmd_conffile; 980 reload = 1; 981 } 982 983 log_debug("%s: level %d config file %s", __func__, reset, filename); 984 985 if (reset) { 986 /* Purge the configuration */ 987 config_purge(env, reset); 988 config_setreset(env, reset); 989 } else { 990 /* 991 * Load or reload the configuration. 992 * 993 * Reloading removes all non-running VMs before processing the 994 * config file, whereas loading only adds to the existing list 995 * of VMs. 996 */ 997 998 if (reload) { 999 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1000 next_vm) { 1001 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1002 DPRINTF("%s: calling vm_remove", 1003 __func__); 1004 vm_remove(vm, __func__); 1005 } 1006 } 1007 } 1008 1009 if (parse_config(filename) == -1) { 1010 log_debug("%s: failed to load config file %s", 1011 __func__, filename); 1012 return (-1); 1013 } 1014 1015 if (reload) { 1016 /* Update shared global configuration in all children */ 1017 if (config_setconfig(env) == -1) 1018 return (-1); 1019 } 1020 1021 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1022 if (vsw->sw_running) 1023 continue; 1024 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1025 log_warn("%s: failed to create switch %s", 1026 __func__, vsw->sw_name); 1027 switch_remove(vsw); 1028 return (-1); 1029 } 1030 } 1031 1032 log_debug("%s: starting vms in staggered fashion", __func__); 1033 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1034 /* start first batch */ 1035 start_vm_batch(0, 0, NULL); 1036 1037 } 1038 1039 return (0); 1040 } 1041 1042 void 1043 vmd_shutdown(void) 1044 { 1045 struct vmd_vm *vm, *vm_next; 1046 1047 log_debug("%s: performing shutdown", __func__); 1048 1049 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1050 vm_remove(vm, __func__); 1051 } 1052 1053 proc_kill(&env->vmd_ps); 1054 free(env); 1055 1056 log_warnx("terminating"); 1057 exit(0); 1058 } 1059 1060 struct vmd_vm * 1061 vm_getbyvmid(uint32_t vmid) 1062 { 1063 struct vmd_vm *vm; 1064 1065 if (vmid == 0) 1066 return (NULL); 1067 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1068 if (vm->vm_vmid == vmid) 1069 return (vm); 1070 } 1071 1072 return (NULL); 1073 } 1074 1075 struct vmd_vm * 1076 vm_getbyid(uint32_t id) 1077 { 1078 struct vmd_vm *vm; 1079 1080 if (id == 0) 1081 return (NULL); 1082 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1083 if (vm->vm_params.vmc_params.vcp_id == id) 1084 return (vm); 1085 } 1086 1087 return (NULL); 1088 } 1089 1090 uint32_t 1091 vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1092 { 1093 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1094 return (0); 1095 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1096 id, vm->vm_vmid); 1097 return (vm->vm_vmid); 1098 } 1099 1100 uint32_t 1101 vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1102 { 1103 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1104 return (0); 1105 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1106 vmid, vm->vm_params.vmc_params.vcp_id); 1107 return (vm->vm_params.vmc_params.vcp_id); 1108 } 1109 1110 struct vmd_vm * 1111 vm_getbyname(const char *name) 1112 { 1113 struct vmd_vm *vm; 1114 1115 if (name == NULL) 1116 return (NULL); 1117 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1118 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1119 return (vm); 1120 } 1121 1122 return (NULL); 1123 } 1124 1125 struct vmd_vm * 1126 vm_getbypid(pid_t pid) 1127 { 1128 struct vmd_vm *vm; 1129 1130 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1131 if (vm->vm_pid == pid) 1132 return (vm); 1133 } 1134 1135 return (NULL); 1136 } 1137 1138 void 1139 vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1140 { 1141 struct privsep *ps = &env->vmd_ps; 1142 unsigned int i, j; 1143 1144 if (vm == NULL) 1145 return; 1146 1147 log_debug("%s: %s %s stopping vm %d%s", 1148 __func__, ps->ps_title[privsep_process], caller, 1149 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1150 1151 vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING 1152 | VM_STATE_SHUTDOWN); 1153 1154 if (vm->vm_iev.ibuf.fd != -1) { 1155 event_del(&vm->vm_iev.ev); 1156 close(vm->vm_iev.ibuf.fd); 1157 } 1158 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) { 1159 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1160 if (vm->vm_disks[i][j] != -1) { 1161 close(vm->vm_disks[i][j]); 1162 vm->vm_disks[i][j] = -1; 1163 } 1164 } 1165 } 1166 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) { 1167 if (vm->vm_ifs[i].vif_fd != -1) { 1168 close(vm->vm_ifs[i].vif_fd); 1169 vm->vm_ifs[i].vif_fd = -1; 1170 } 1171 free(vm->vm_ifs[i].vif_name); 1172 free(vm->vm_ifs[i].vif_switch); 1173 free(vm->vm_ifs[i].vif_group); 1174 vm->vm_ifs[i].vif_name = NULL; 1175 vm->vm_ifs[i].vif_switch = NULL; 1176 vm->vm_ifs[i].vif_group = NULL; 1177 } 1178 if (vm->vm_kernel != -1) { 1179 close(vm->vm_kernel); 1180 vm->vm_kernel = -1; 1181 } 1182 if (vm->vm_cdrom != -1) { 1183 close(vm->vm_cdrom); 1184 vm->vm_cdrom = -1; 1185 } 1186 if (!keeptty) { 1187 vm_closetty(vm); 1188 vm->vm_uid = 0; 1189 } 1190 } 1191 1192 void 1193 vm_remove(struct vmd_vm *vm, const char *caller) 1194 { 1195 struct privsep *ps = &env->vmd_ps; 1196 1197 if (vm == NULL) 1198 return; 1199 1200 log_debug("%s: %s %s removing vm %d from running config", 1201 __func__, ps->ps_title[privsep_process], caller, 1202 vm->vm_vmid); 1203 1204 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1205 1206 vm_stop(vm, 0, caller); 1207 if (vm->vm_kernel_path != NULL && !vm->vm_from_config) 1208 free(vm->vm_kernel_path); 1209 free(vm); 1210 } 1211 1212 int 1213 vm_claimid(const char *name, int uid, uint32_t *id) 1214 { 1215 struct name2id *n2i = NULL; 1216 1217 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1218 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1219 goto out; 1220 1221 if (++env->vmd_nvm == 0) { 1222 log_warnx("too many vms"); 1223 return (-1); 1224 } 1225 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1226 log_warnx("could not alloc vm name"); 1227 return (-1); 1228 } 1229 n2i->id = env->vmd_nvm; 1230 n2i->uid = uid; 1231 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1232 log_warnx("vm name too long"); 1233 free(n2i); 1234 return (-1); 1235 } 1236 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1237 1238 out: 1239 *id = n2i->id; 1240 return (0); 1241 } 1242 1243 int 1244 vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1245 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1246 { 1247 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1248 struct vm_create_params *vcp = &vmc->vmc_params; 1249 struct vmop_owner *vmo = NULL; 1250 uint32_t nid, rng; 1251 unsigned int i, j; 1252 struct vmd_switch *sw; 1253 char *s; 1254 int ret = 0; 1255 1256 /* Check if this is an instance of another VM */ 1257 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1258 errno = ret; /* XXX might set invalid errno */ 1259 return (-1); 1260 } 1261 1262 errno = 0; 1263 *ret_vm = NULL; 1264 1265 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1266 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1267 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1268 uid) != 0) { 1269 errno = EPERM; 1270 goto fail; 1271 } 1272 vm->vm_kernel = vmc->vmc_kernel; 1273 *ret_vm = vm; 1274 errno = EALREADY; 1275 goto fail; 1276 } 1277 1278 if (vm_parent != NULL) 1279 vmo = &vm_parent->vm_params.vmc_insowner; 1280 1281 /* non-root users can only start existing VMs or instances */ 1282 if (vm_checkperm(NULL, vmo, uid) != 0) { 1283 log_warnx("permission denied"); 1284 errno = EPERM; 1285 goto fail; 1286 } 1287 if (vmc->vmc_flags == 0) { 1288 log_warnx("invalid configuration, no devices"); 1289 errno = VMD_DISK_MISSING; 1290 goto fail; 1291 } 1292 if (vcp->vcp_ncpus == 0) 1293 vcp->vcp_ncpus = 1; 1294 if (vcp->vcp_memranges[0].vmr_size == 0) 1295 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1296 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1297 log_warnx("invalid number of CPUs"); 1298 goto fail; 1299 } else if (vmc->vmc_ndisks > VM_MAX_DISKS_PER_VM) { 1300 log_warnx("invalid number of disks"); 1301 goto fail; 1302 } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) { 1303 log_warnx("invalid number of interfaces"); 1304 goto fail; 1305 } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0 1306 && strlen(vmc->vmc_cdrom) == 0) { 1307 log_warnx("no kernel or disk/cdrom specified"); 1308 goto fail; 1309 } else if (strlen(vcp->vcp_name) == 0) { 1310 log_warnx("invalid VM name"); 1311 goto fail; 1312 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1313 *vcp->vcp_name == '_') { 1314 log_warnx("invalid VM name"); 1315 goto fail; 1316 } else { 1317 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1318 if (!(isalnum((unsigned char)*s) || *s == '.' || \ 1319 *s == '-' || *s == '_')) { 1320 log_warnx("invalid VM name"); 1321 goto fail; 1322 } 1323 } 1324 } 1325 1326 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1327 goto fail; 1328 1329 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1330 vmc = &vm->vm_params; 1331 vcp = &vmc->vmc_params; 1332 vm->vm_pid = -1; 1333 vm->vm_tty = -1; 1334 vm->vm_receive_fd = -1; 1335 vm->vm_kernel = -1; 1336 vm->vm_state &= ~VM_STATE_PAUSED; 1337 1338 if (vmc->vmc_kernel > -1) 1339 vm->vm_kernel = vmc->vmc_kernel; 1340 1341 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) 1342 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1343 vm->vm_disks[i][j] = -1; 1344 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) 1345 vm->vm_ifs[i].vif_fd = -1; 1346 for (i = 0; i < vmc->vmc_nnics; i++) { 1347 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1348 /* inherit per-interface flags from the switch */ 1349 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1350 } 1351 1352 /* 1353 * If the MAC address is zero, always randomize it in vmd(8) 1354 * because we cannot rely on the guest OS to do the right 1355 * thing like OpenBSD does. Based on ether_fakeaddr() 1356 * from the kernel, incremented by one to differentiate 1357 * the source. 1358 */ 1359 if (memcmp(zero_mac, &vmc->vmc_macs[i], ETHER_ADDR_LEN) == 0) { 1360 rng = arc4random(); 1361 vmc->vmc_macs[i][0] = 0xfe; 1362 vmc->vmc_macs[i][1] = 0xe1; 1363 vmc->vmc_macs[i][2] = 0xba + 1; 1364 vmc->vmc_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1365 vmc->vmc_macs[i][4] = rng; 1366 vmc->vmc_macs[i][5] = rng >> 8; 1367 } 1368 } 1369 vm->vm_cdrom = -1; 1370 vm->vm_iev.ibuf.fd = -1; 1371 1372 /* 1373 * Assign a new internal Id if not specified and we succeed in 1374 * claiming a new Id. 1375 */ 1376 if (id != 0) 1377 vm->vm_vmid = id; 1378 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1379 goto fail; 1380 else 1381 vm->vm_vmid = nid; 1382 1383 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1384 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1385 1386 *ret_vm = vm; 1387 return (0); 1388 fail: 1389 if (errno == 0) 1390 errno = EINVAL; 1391 return (-1); 1392 } 1393 1394 int 1395 vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1396 struct vmop_create_params *vmc, uid_t uid) 1397 { 1398 char *name; 1399 struct vm_create_params *vcp = &vmc->vmc_params; 1400 struct vmop_create_params *vmcp; 1401 struct vm_create_params *vcpp; 1402 unsigned int i, j; 1403 1404 /* return without error if the parent is NULL (nothing to inherit) */ 1405 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1406 vmc->vmc_instance[0] == '\0') 1407 return (0); 1408 1409 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1410 return (VMD_PARENT_INVALID); 1411 } 1412 1413 vmcp = &(*vm_parent)->vm_params; 1414 vcpp = &vmcp->vmc_params; 1415 1416 /* Are we allowed to create an instance from this VM? */ 1417 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1418 log_warnx("vm \"%s\" no permission to create vm instance", 1419 vcpp->vcp_name); 1420 return (ENAMETOOLONG); 1421 } 1422 1423 name = vcp->vcp_name; 1424 1425 if (vm_getbyname(vcp->vcp_name) != NULL || 1426 vm_getbyvmid(vcp->vcp_id) != NULL) { 1427 return (EPROCLIM); 1428 } 1429 1430 /* CPU */ 1431 if (vcp->vcp_ncpus == 0) 1432 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1433 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1434 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1435 log_warnx("vm \"%s\" no permission to set cpus", name); 1436 return (EPERM); 1437 } 1438 1439 /* memory */ 1440 if (vcp->vcp_memranges[0].vmr_size == 0) 1441 vcp->vcp_memranges[0].vmr_size = 1442 vcpp->vcp_memranges[0].vmr_size; 1443 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1444 vcp->vcp_memranges[0].vmr_size != 1445 vcpp->vcp_memranges[0].vmr_size) { 1446 log_warnx("vm \"%s\" no permission to set memory", name); 1447 return (EPERM); 1448 } 1449 1450 /* disks cannot be inherited */ 1451 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1452 vmc->vmc_ndisks) { 1453 log_warnx("vm \"%s\" no permission to set disks", name); 1454 return (EPERM); 1455 } 1456 for (i = 0; i < vmc->vmc_ndisks; i++) { 1457 /* Check if this disk is already used in the parent */ 1458 for (j = 0; j < vmcp->vmc_ndisks; j++) { 1459 if (strcmp(vmc->vmc_disks[i], 1460 vmcp->vmc_disks[j]) == 0) { 1461 log_warnx("vm \"%s\" disk %s cannot be reused", 1462 name, vmc->vmc_disks[i]); 1463 return (EBUSY); 1464 } 1465 } 1466 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1467 } 1468 1469 /* interfaces */ 1470 if (vmc->vmc_nnics > 0 && 1471 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1472 vmc->vmc_nnics != vmcp->vmc_nnics) { 1473 log_warnx("vm \"%s\" no permission to set interfaces", name); 1474 return (EPERM); 1475 } 1476 for (i = 0; i < vmcp->vmc_nnics; i++) { 1477 /* Interface got overwritten */ 1478 if (i < vmc->vmc_nnics) 1479 continue; 1480 1481 /* Copy interface from parent */ 1482 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1483 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1484 sizeof(vmc->vmc_ifnames[i])); 1485 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1486 sizeof(vmc->vmc_ifswitch[i])); 1487 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1488 sizeof(vmc->vmc_ifgroup[i])); 1489 memcpy(vmc->vmc_macs[i], vmcp->vmc_macs[i], 1490 sizeof(vmc->vmc_macs[i])); 1491 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1492 vmc->vmc_nnics++; 1493 } 1494 for (i = 0; i < vmc->vmc_nnics; i++) { 1495 for (j = 0; j < vmcp->vmc_nnics; j++) { 1496 if (memcmp(zero_mac, vmc->vmc_macs[i], 1497 sizeof(vmc->vmc_macs[i])) != 0 && 1498 memcmp(vmcp->vmc_macs[i], vmc->vmc_macs[i], 1499 sizeof(vmc->vmc_macs[i])) != 0) { 1500 log_warnx("vm \"%s\" lladdr cannot be reused", 1501 name); 1502 return (EBUSY); 1503 } 1504 if (strlen(vmc->vmc_ifnames[i]) && 1505 strcmp(vmc->vmc_ifnames[i], 1506 vmcp->vmc_ifnames[j]) == 0) { 1507 log_warnx("vm \"%s\" %s cannot be reused", 1508 vmc->vmc_ifnames[i], name); 1509 return (EBUSY); 1510 } 1511 } 1512 } 1513 1514 /* kernel */ 1515 if (vmc->vmc_kernel > -1 || ((*vm_parent)->vm_kernel_path != NULL && 1516 strnlen((*vm_parent)->vm_kernel_path, PATH_MAX) < PATH_MAX)) { 1517 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1518 log_warnx("vm \"%s\" no permission to set boot image", 1519 name); 1520 return (EPERM); 1521 } 1522 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1523 } 1524 1525 /* cdrom */ 1526 if (strlen(vmc->vmc_cdrom) > 0) { 1527 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1528 log_warnx("vm \"%s\" no permission to set cdrom", name); 1529 return (EPERM); 1530 } 1531 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1532 } else if (strlcpy(vmc->vmc_cdrom, vmcp->vmc_cdrom, 1533 sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) { 1534 log_warnx("vm \"%s\" cdrom name too long", name); 1535 return (EINVAL); 1536 } 1537 1538 /* user */ 1539 if (vmc->vmc_owner.uid == 0) 1540 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1541 else if (vmc->vmc_owner.uid != uid && 1542 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1543 log_warnx("vm \"%s\" user mismatch", name); 1544 return (EPERM); 1545 } 1546 1547 /* group */ 1548 if (vmc->vmc_owner.gid == 0) 1549 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1550 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1551 log_warnx("vm \"%s\" group mismatch", name); 1552 return (EPERM); 1553 } 1554 1555 /* child instances */ 1556 if (vmc->vmc_insflags) { 1557 log_warnx("vm \"%s\" cannot change instance permissions", name); 1558 return (EPERM); 1559 } 1560 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1561 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1562 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1563 vmc->vmc_insflags = vmcp->vmc_insflags; 1564 } else { 1565 vmc->vmc_insowner.gid = 0; 1566 vmc->vmc_insowner.uid = 0; 1567 vmc->vmc_insflags = 0; 1568 } 1569 1570 /* finished, remove instance flags */ 1571 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1572 1573 return (0); 1574 } 1575 1576 /* 1577 * vm_checkperm 1578 * 1579 * Checks if the user represented by the 'uid' parameter is allowed to 1580 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1581 * console.) 1582 * 1583 * Parameters: 1584 * vm: the VM whose permission is to be checked 1585 * vmo: the required uid/gid to be checked 1586 * uid: the user ID of the user making the request 1587 * 1588 * Return values: 1589 * 0: the permission should be granted 1590 * -1: the permission check failed (also returned if vm == null) 1591 */ 1592 int 1593 vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1594 { 1595 struct group *gr; 1596 struct passwd *pw; 1597 char **grmem; 1598 1599 /* root has no restrictions */ 1600 if (uid == 0) 1601 return (0); 1602 1603 if (vmo == NULL) 1604 return (-1); 1605 1606 /* check user */ 1607 if (vm == NULL) { 1608 if (vmo->uid == uid) 1609 return (0); 1610 } else { 1611 /* 1612 * check user of running vm (the owner of a running vm can 1613 * be different to (or more specific than) the configured owner. 1614 */ 1615 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1616 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1617 return (0); 1618 } 1619 1620 /* check groups */ 1621 if (vmo->gid != -1) { 1622 if ((pw = getpwuid(uid)) == NULL) 1623 return (-1); 1624 if (pw->pw_gid == vmo->gid) 1625 return (0); 1626 if ((gr = getgrgid(vmo->gid)) != NULL) { 1627 for (grmem = gr->gr_mem; *grmem; grmem++) 1628 if (strcmp(*grmem, pw->pw_name) == 0) 1629 return (0); 1630 } 1631 } 1632 1633 return (-1); 1634 } 1635 1636 /* 1637 * vm_checkinsflag 1638 * 1639 * Checks whether the non-root user is allowed to set an instance option. 1640 * 1641 * Parameters: 1642 * vmc: the VM create parameters 1643 * flag: the flag to be checked 1644 * uid: the user ID of the user making the request 1645 * 1646 * Return values: 1647 * 0: the permission should be granted 1648 * -1: the permission check failed (also returned if vm == null) 1649 */ 1650 int 1651 vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1652 { 1653 /* root has no restrictions */ 1654 if (uid == 0) 1655 return (0); 1656 1657 if ((vmc->vmc_insflags & flag) == 0) 1658 return (-1); 1659 1660 return (0); 1661 } 1662 1663 /* 1664 * vm_checkaccess 1665 * 1666 * Checks if the user represented by the 'uid' parameter is allowed to 1667 * access the file described by the 'path' parameter. 1668 * 1669 * Parameters: 1670 * fd: the file descriptor of the opened file 1671 * uflag: check if the userid has access to the file 1672 * uid: the user ID of the user making the request 1673 * amode: the access flags of R_OK and W_OK 1674 * 1675 * Return values: 1676 * 0: the permission should be granted 1677 * -1: the permission check failed 1678 */ 1679 int 1680 vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1681 { 1682 struct group *gr; 1683 struct passwd *pw; 1684 char **grmem; 1685 struct stat st; 1686 mode_t mode; 1687 1688 if (fd == -1) 1689 return (-1); 1690 1691 /* 1692 * File has to be accessible and a regular file 1693 */ 1694 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1695 return (-1); 1696 1697 /* root has no restrictions */ 1698 if (uid == 0 || uflag == 0) 1699 return (0); 1700 1701 /* check other */ 1702 mode = amode & W_OK ? S_IWOTH : 0; 1703 mode |= amode & R_OK ? S_IROTH : 0; 1704 if ((st.st_mode & mode) == mode) 1705 return (0); 1706 1707 /* check user */ 1708 mode = amode & W_OK ? S_IWUSR : 0; 1709 mode |= amode & R_OK ? S_IRUSR : 0; 1710 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1711 return (0); 1712 1713 /* check groups */ 1714 mode = amode & W_OK ? S_IWGRP : 0; 1715 mode |= amode & R_OK ? S_IRGRP : 0; 1716 if ((st.st_mode & mode) != mode) 1717 return (-1); 1718 if ((pw = getpwuid(uid)) == NULL) 1719 return (-1); 1720 if (pw->pw_gid == st.st_gid) 1721 return (0); 1722 if ((gr = getgrgid(st.st_gid)) != NULL) { 1723 for (grmem = gr->gr_mem; *grmem; grmem++) 1724 if (strcmp(*grmem, pw->pw_name) == 0) 1725 return (0); 1726 } 1727 1728 return (-1); 1729 } 1730 1731 int 1732 vm_opentty(struct vmd_vm *vm) 1733 { 1734 struct stat st; 1735 struct group *gr; 1736 uid_t uid; 1737 gid_t gid; 1738 mode_t mode; 1739 int on = 1, tty_slave; 1740 1741 /* 1742 * Open tty with pre-opened PTM fd 1743 */ 1744 if (fdopenpty(env->vmd_ptmfd, &vm->vm_tty, &tty_slave, vm->vm_ttyname, 1745 NULL, NULL) == -1) { 1746 log_warn("fdopenpty"); 1747 return (-1); 1748 } 1749 close(tty_slave); 1750 1751 /* 1752 * We use user ioctl(2) mode to pass break commands. 1753 */ 1754 if (ioctl(vm->vm_tty, TIOCUCNTL, &on) == -1) { 1755 log_warn("could not enable user ioctl mode on %s", 1756 vm->vm_ttyname); 1757 goto fail; 1758 } 1759 1760 uid = vm->vm_uid; 1761 gid = vm->vm_params.vmc_owner.gid; 1762 1763 if (vm->vm_params.vmc_owner.gid != -1) { 1764 mode = 0660; 1765 } else if ((gr = getgrnam("tty")) != NULL) { 1766 gid = gr->gr_gid; 1767 mode = 0620; 1768 } else { 1769 mode = 0600; 1770 gid = 0; 1771 } 1772 1773 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1774 __func__, vm->vm_params.vmc_params.vcp_name, 1775 vm->vm_ttyname, uid, gid, mode); 1776 1777 /* 1778 * Change ownership and mode of the tty as required. 1779 * Loosely based on the implementation of sshpty.c 1780 */ 1781 if (fstat(vm->vm_tty, &st) == -1) { 1782 log_warn("fstat failed for %s", vm->vm_ttyname); 1783 goto fail; 1784 } 1785 1786 if (st.st_uid != uid || st.st_gid != gid) { 1787 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1788 log_warn("chown %s %d %d failed, uid %d", 1789 vm->vm_ttyname, uid, gid, getuid()); 1790 1791 /* Ignore failure on read-only filesystems */ 1792 if (!((errno == EROFS) && 1793 (st.st_uid == uid || st.st_uid == 0))) 1794 goto fail; 1795 } 1796 } 1797 1798 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1799 if (chmod(vm->vm_ttyname, mode) == -1) { 1800 log_warn("chmod %s %o failed, uid %d", 1801 vm->vm_ttyname, mode, getuid()); 1802 1803 /* Ignore failure on read-only filesystems */ 1804 if (!((errno == EROFS) && 1805 (st.st_uid == uid || st.st_uid == 0))) 1806 goto fail; 1807 } 1808 } 1809 1810 return (0); 1811 fail: 1812 vm_closetty(vm); 1813 return (-1); 1814 } 1815 1816 void 1817 vm_closetty(struct vmd_vm *vm) 1818 { 1819 if (vm->vm_tty != -1) { 1820 /* Release and close the tty */ 1821 if (fchown(vm->vm_tty, 0, 0) == -1) 1822 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1823 if (fchmod(vm->vm_tty, 0666) == -1) 1824 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1825 close(vm->vm_tty); 1826 vm->vm_tty = -1; 1827 } 1828 memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname)); 1829 } 1830 1831 void 1832 switch_remove(struct vmd_switch *vsw) 1833 { 1834 if (vsw == NULL) 1835 return; 1836 1837 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1838 1839 free(vsw->sw_group); 1840 free(vsw->sw_name); 1841 free(vsw); 1842 } 1843 1844 struct vmd_switch * 1845 switch_getbyname(const char *name) 1846 { 1847 struct vmd_switch *vsw; 1848 1849 if (name == NULL) 1850 return (NULL); 1851 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1852 if (strcmp(vsw->sw_name, name) == 0) 1853 return (vsw); 1854 } 1855 1856 return (NULL); 1857 } 1858 1859 char * 1860 get_string(uint8_t *ptr, size_t len) 1861 { 1862 size_t i; 1863 1864 for (i = 0; i < len; i++) 1865 if (!isprint((unsigned char)ptr[i])) 1866 break; 1867 1868 return strndup(ptr, i); 1869 } 1870 1871 uint32_t 1872 prefixlen2mask(uint8_t prefixlen) 1873 { 1874 if (prefixlen == 0) 1875 return (0); 1876 1877 if (prefixlen > 32) 1878 prefixlen = 32; 1879 1880 return (htonl(0xffffffff << (32 - prefixlen))); 1881 } 1882 1883 void 1884 prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 1885 { 1886 struct in6_addr s6; 1887 int i; 1888 1889 if (prefixlen > 128) 1890 prefixlen = 128; 1891 1892 memset(&s6, 0, sizeof(s6)); 1893 for (i = 0; i < prefixlen / 8; i++) 1894 s6.s6_addr[i] = 0xff; 1895 i = prefixlen % 8; 1896 if (i) 1897 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 1898 1899 memcpy(mask, &s6, sizeof(s6)); 1900 } 1901 1902 void 1903 getmonotime(struct timeval *tv) 1904 { 1905 struct timespec ts; 1906 1907 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 1908 fatal("clock_gettime"); 1909 1910 TIMESPEC_TO_TIMEVAL(tv, &ts); 1911 } 1912 1913 static inline void 1914 vm_terminate(struct vmd_vm *vm, const char *caller) 1915 { 1916 if (vm->vm_from_config) 1917 vm_stop(vm, 0, caller); 1918 else { 1919 /* vm_remove calls vm_stop */ 1920 vm_remove(vm, caller); 1921 } 1922 } 1923 1924 /* 1925 * Utility function for closing vm file descriptors. Assumes an fd of -1 was 1926 * already closed or never opened. 1927 * 1928 * Returns 0 on success, otherwise -1 on failure. 1929 */ 1930 int 1931 close_fd(int fd) 1932 { 1933 int ret; 1934 1935 if (fd == -1) 1936 return (0); 1937 1938 #ifdef POSIX_CLOSE_RESTART 1939 do { ret = close(fd); } while (ret == -1 && errno == EINTR); 1940 #else 1941 ret = close(fd); 1942 #endif /* POSIX_CLOSE_RESTART */ 1943 1944 if (ret == -1 && errno == EIO) 1945 log_warn("%s(%d)", __func__, fd); 1946 1947 return (ret); 1948 } 1949