1 /*
2 * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3 * All rights reserved.
4 *
5 * This code is part of the NVMM hypervisor.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/ioctl.h>
31 #include <sys/mman.h>
32 #include <sys/queue.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40
41 #include "nvmm.h"
42
43 static struct nvmm_capability __capability;
44
45 #ifdef __x86_64__
46 #include "libnvmm_x86.c"
47 #endif
48
49 #ifdef __DragonFly__
50 #define LIST_FOREACH_SAFE LIST_FOREACH_MUTABLE
51 #endif
52
53 typedef struct __area {
54 LIST_ENTRY(__area) list;
55 gpaddr_t gpa;
56 uintptr_t hva;
57 size_t size;
58 nvmm_prot_t prot;
59 } area_t;
60
61 typedef LIST_HEAD(, __area) area_list_t;
62
63 static int nvmm_fd = -1;
64
65 /* -------------------------------------------------------------------------- */
66
67 static bool
__area_isvalid(struct nvmm_machine * mach,gpaddr_t gpa,size_t size)68 __area_isvalid(struct nvmm_machine *mach, gpaddr_t gpa, size_t size)
69 {
70 area_list_t *areas = mach->areas;
71 area_t *ent;
72
73 LIST_FOREACH(ent, areas, list) {
74 /* Collision on GPA */
75 if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) {
76 return false;
77 }
78 if (gpa + size > ent->gpa &&
79 gpa + size <= ent->gpa + ent->size) {
80 return false;
81 }
82 if (gpa <= ent->gpa && gpa + size >= ent->gpa + ent->size) {
83 return false;
84 }
85 }
86
87 return true;
88 }
89
90 static int
__area_add(struct nvmm_machine * mach,uintptr_t hva,gpaddr_t gpa,size_t size,int prot)91 __area_add(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, size_t size,
92 int prot)
93 {
94 area_list_t *areas = mach->areas;
95 nvmm_prot_t nprot;
96 area_t *area;
97
98 nprot = 0;
99 if (prot & PROT_READ)
100 nprot |= NVMM_PROT_READ;
101 if (prot & PROT_WRITE)
102 nprot |= NVMM_PROT_WRITE;
103 if (prot & PROT_EXEC)
104 nprot |= NVMM_PROT_EXEC;
105
106 if (!__area_isvalid(mach, gpa, size)) {
107 errno = EINVAL;
108 return -1;
109 }
110
111 area = malloc(sizeof(*area));
112 if (area == NULL)
113 return -1;
114 area->gpa = gpa;
115 area->hva = hva;
116 area->size = size;
117 area->prot = nprot;
118
119 LIST_INSERT_HEAD(areas, area, list);
120
121 return 0;
122 }
123
124 static int
__area_delete(struct nvmm_machine * mach,uintptr_t hva,gpaddr_t gpa,size_t size)125 __area_delete(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
126 size_t size)
127 {
128 area_list_t *areas = mach->areas;
129 area_t *ent, *nxt;
130
131 LIST_FOREACH_SAFE(ent, areas, list, nxt) {
132 if (hva == ent->hva && gpa == ent->gpa && size == ent->size) {
133 LIST_REMOVE(ent, list);
134 free(ent);
135 return 0;
136 }
137 }
138
139 return -1;
140 }
141
142 static void
__area_remove_all(struct nvmm_machine * mach)143 __area_remove_all(struct nvmm_machine *mach)
144 {
145 area_list_t *areas = mach->areas;
146 area_t *ent;
147
148 while ((ent = LIST_FIRST(areas)) != NULL) {
149 LIST_REMOVE(ent, list);
150 free(ent);
151 }
152
153 free(areas);
154 }
155
156 /* -------------------------------------------------------------------------- */
157
158 int
nvmm_init(void)159 nvmm_init(void)
160 {
161 if (nvmm_fd != -1)
162 return 0;
163 nvmm_fd = open("/dev/nvmm", O_RDONLY | O_CLOEXEC);
164 if (nvmm_fd == -1)
165 return -1;
166 if (nvmm_capability(&__capability) == -1) {
167 close(nvmm_fd);
168 nvmm_fd = -1;
169 return -1;
170 }
171 if (__capability.version != NVMM_KERN_VERSION) {
172 close(nvmm_fd);
173 nvmm_fd = -1;
174 errno = EPROGMISMATCH;
175 return -1;
176 }
177
178 return 0;
179 }
180
181 int
nvmm_root_init(void)182 nvmm_root_init(void)
183 {
184 if (nvmm_fd != -1)
185 return 0;
186 nvmm_fd = open("/dev/nvmm", O_WRONLY | O_CLOEXEC);
187 if (nvmm_fd == -1)
188 return -1;
189 if (nvmm_capability(&__capability) == -1) {
190 close(nvmm_fd);
191 nvmm_fd = -1;
192 return -1;
193 }
194 if (__capability.version != NVMM_KERN_VERSION) {
195 close(nvmm_fd);
196 nvmm_fd = -1;
197 errno = EPROGMISMATCH;
198 return -1;
199 }
200
201 return 0;
202 }
203
204 int
nvmm_capability(struct nvmm_capability * cap)205 nvmm_capability(struct nvmm_capability *cap)
206 {
207 struct nvmm_ioc_capability args;
208 int ret;
209
210 ret = ioctl(nvmm_fd, NVMM_IOC_CAPABILITY, &args);
211 if (ret == -1)
212 return -1;
213
214 memcpy(cap, &args.cap, sizeof(args.cap));
215
216 return 0;
217 }
218
219 int
nvmm_machine_create(struct nvmm_machine * mach)220 nvmm_machine_create(struct nvmm_machine *mach)
221 {
222 struct nvmm_ioc_machine_create args;
223 struct nvmm_comm_page **pages;
224 area_list_t *areas;
225 int ret;
226
227 areas = calloc(1, sizeof(*areas));
228 if (areas == NULL)
229 return -1;
230
231 pages = calloc(__capability.max_vcpus, sizeof(*pages));
232 if (pages == NULL) {
233 free(areas);
234 return -1;
235 }
236
237 ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CREATE, &args);
238 if (ret == -1) {
239 free(areas);
240 free(pages);
241 return -1;
242 }
243
244 LIST_INIT(areas);
245
246 memset(mach, 0, sizeof(*mach));
247 mach->machid = args.machid;
248 mach->pages = pages;
249 mach->areas = areas;
250
251 return 0;
252 }
253
254 int
nvmm_machine_destroy(struct nvmm_machine * mach)255 nvmm_machine_destroy(struct nvmm_machine *mach)
256 {
257 struct nvmm_ioc_machine_destroy args;
258 int ret;
259
260 args.machid = mach->machid;
261
262 ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_DESTROY, &args);
263 if (ret == -1)
264 return -1;
265
266 __area_remove_all(mach);
267 free(mach->pages);
268
269 return 0;
270 }
271
272 int
nvmm_machine_configure(struct nvmm_machine * mach,uint64_t op,void * conf)273 nvmm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *conf)
274 {
275 struct nvmm_ioc_machine_configure args;
276 int ret;
277
278 args.machid = mach->machid;
279 args.op = op;
280 args.conf = conf;
281
282 ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CONFIGURE, &args);
283 if (ret == -1)
284 return -1;
285
286 return 0;
287 }
288
289 int
nvmm_vcpu_create(struct nvmm_machine * mach,nvmm_cpuid_t cpuid,struct nvmm_vcpu * vcpu)290 nvmm_vcpu_create(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
291 struct nvmm_vcpu *vcpu)
292 {
293 struct nvmm_ioc_vcpu_create args;
294 int ret;
295
296 args.machid = mach->machid;
297 args.cpuid = cpuid;
298 args.comm = NULL;
299
300 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CREATE, &args);
301 if (ret == -1)
302 return -1;
303
304 mach->pages[cpuid] = args.comm;
305
306 vcpu->cpuid = cpuid;
307 vcpu->state = &args.comm->state;
308 vcpu->event = &args.comm->event;
309 vcpu->exit = malloc(sizeof(*vcpu->exit));
310
311 return 0;
312 }
313
314 int
nvmm_vcpu_destroy(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)315 nvmm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
316 {
317 struct nvmm_ioc_vcpu_destroy args;
318 struct nvmm_comm_page *comm;
319 int ret;
320
321 args.machid = mach->machid;
322 args.cpuid = vcpu->cpuid;
323
324 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_DESTROY, &args);
325 if (ret == -1)
326 return -1;
327
328 /*
329 * Need to unmap the comm page on the user side, because the
330 * kernel has no guarantee to get the correct address space to
331 * do the unmapping at the point of closing fd.
332 */
333 comm = mach->pages[vcpu->cpuid];
334 munmap(comm, __capability.comm_size);
335
336 free(vcpu->exit);
337
338 return 0;
339 }
340
341 int
nvmm_vcpu_configure(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,uint64_t op,void * conf)342 nvmm_vcpu_configure(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
343 uint64_t op, void *conf)
344 {
345 struct nvmm_ioc_vcpu_configure args;
346 int ret;
347
348 switch (op) {
349 case NVMM_VCPU_CONF_CALLBACKS:
350 memcpy(&vcpu->cbs, conf, sizeof(vcpu->cbs));
351 return 0;
352 }
353
354 args.machid = mach->machid;
355 args.cpuid = vcpu->cpuid;
356 args.op = op;
357 args.conf = conf;
358
359 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CONFIGURE, &args);
360 if (ret == -1)
361 return -1;
362
363 return 0;
364 }
365
366 int
nvmm_vcpu_setstate(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,uint64_t flags)367 nvmm_vcpu_setstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
368 uint64_t flags)
369 {
370 struct nvmm_comm_page *comm;
371
372 comm = mach->pages[vcpu->cpuid];
373 comm->state_commit |= flags;
374 comm->state_cached |= flags;
375
376 return 0;
377 }
378
379 int
nvmm_vcpu_getstate(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,uint64_t flags)380 nvmm_vcpu_getstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
381 uint64_t flags)
382 {
383 struct nvmm_ioc_vcpu_getstate args;
384 struct nvmm_comm_page *comm;
385 int ret;
386
387 comm = mach->pages[vcpu->cpuid];
388
389 if (__predict_true((flags & ~comm->state_cached) == 0)) {
390 return 0;
391 }
392 comm->state_wanted = flags & ~comm->state_cached;
393
394 args.machid = mach->machid;
395 args.cpuid = vcpu->cpuid;
396
397 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_GETSTATE, &args);
398 if (ret == -1)
399 return -1;
400
401 return 0;
402 }
403
404 int
nvmm_vcpu_inject(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)405 nvmm_vcpu_inject(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
406 {
407 struct nvmm_comm_page *comm;
408
409 comm = mach->pages[vcpu->cpuid];
410 comm->event_commit = true;
411
412 return 0;
413 }
414
415 int
nvmm_vcpu_run(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)416 nvmm_vcpu_run(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
417 {
418 struct nvmm_ioc_vcpu_run args;
419 int ret;
420
421 args.machid = mach->machid;
422 args.cpuid = vcpu->cpuid;
423 memset(&args.exit, 0, sizeof(args.exit));
424
425 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_RUN, &args);
426 if (ret == -1)
427 return -1;
428
429 /* No comm support yet, just copy. */
430 memcpy(vcpu->exit, &args.exit, sizeof(args.exit));
431
432 return 0;
433 }
434
435 int
nvmm_gpa_map(struct nvmm_machine * mach,uintptr_t hva,gpaddr_t gpa,size_t size,int prot)436 nvmm_gpa_map(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
437 size_t size, int prot)
438 {
439 struct nvmm_ioc_gpa_map args;
440 int ret;
441
442 ret = __area_add(mach, hva, gpa, size, prot);
443 if (ret == -1)
444 return -1;
445
446 args.machid = mach->machid;
447 args.hva = hva;
448 args.gpa = gpa;
449 args.size = size;
450 args.prot = prot;
451
452 ret = ioctl(nvmm_fd, NVMM_IOC_GPA_MAP, &args);
453 if (ret == -1) {
454 /* Can't recover. */
455 abort();
456 }
457
458 return 0;
459 }
460
461 int
nvmm_gpa_unmap(struct nvmm_machine * mach,uintptr_t hva,gpaddr_t gpa,size_t size)462 nvmm_gpa_unmap(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
463 size_t size)
464 {
465 struct nvmm_ioc_gpa_unmap args;
466 int ret;
467
468 ret = __area_delete(mach, hva, gpa, size);
469 if (ret == -1)
470 return -1;
471
472 args.machid = mach->machid;
473 args.gpa = gpa;
474 args.size = size;
475
476 ret = ioctl(nvmm_fd, NVMM_IOC_GPA_UNMAP, &args);
477 if (ret == -1) {
478 /* Can't recover. */
479 abort();
480 }
481
482 return 0;
483 }
484
485 int
nvmm_hva_map(struct nvmm_machine * mach,uintptr_t hva,size_t size)486 nvmm_hva_map(struct nvmm_machine *mach, uintptr_t hva, size_t size)
487 {
488 struct nvmm_ioc_hva_map args;
489 int ret;
490
491 args.machid = mach->machid;
492 args.hva = hva;
493 args.size = size;
494
495 ret = ioctl(nvmm_fd, NVMM_IOC_HVA_MAP, &args);
496 if (ret == -1)
497 return -1;
498
499 return 0;
500 }
501
502 int
nvmm_hva_unmap(struct nvmm_machine * mach,uintptr_t hva,size_t size)503 nvmm_hva_unmap(struct nvmm_machine *mach, uintptr_t hva, size_t size)
504 {
505 struct nvmm_ioc_hva_unmap args;
506 int ret;
507
508 args.machid = mach->machid;
509 args.hva = hva;
510 args.size = size;
511
512 ret = ioctl(nvmm_fd, NVMM_IOC_HVA_UNMAP, &args);
513 if (ret == -1)
514 return -1;
515
516 return 0;
517 }
518
519 /*
520 * nvmm_gva_to_gpa(): architecture-specific.
521 */
522
523 int
nvmm_gpa_to_hva(struct nvmm_machine * mach,gpaddr_t gpa,uintptr_t * hva,nvmm_prot_t * prot)524 nvmm_gpa_to_hva(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t *hva,
525 nvmm_prot_t *prot)
526 {
527 area_list_t *areas = mach->areas;
528 area_t *ent;
529
530 LIST_FOREACH(ent, areas, list) {
531 if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) {
532 *hva = ent->hva + (gpa - ent->gpa);
533 *prot = ent->prot;
534 return 0;
535 }
536 }
537
538 errno = ENOENT;
539 return -1;
540 }
541
542 /*
543 * nvmm_assist_io(): architecture-specific.
544 */
545
546 /*
547 * nvmm_assist_mem(): architecture-specific.
548 */
549
550 int
nvmm_ctl(int op,void * data,size_t size)551 nvmm_ctl(int op, void *data, size_t size)
552 {
553 struct nvmm_ioc_ctl args;
554 int ret;
555
556 args.op = op;
557 args.data = data;
558 args.size = size;
559
560 ret = ioctl(nvmm_fd, NVMM_IOC_CTL, &args);
561 if (ret == -1)
562 return -1;
563
564 return 0;
565 }
566