1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2021 The DragonFly Project. All rights reserved.
5 *
6 * This code is derived from software contributed to The DragonFly Project
7 * by Aaron LI <aly@aaronly.me>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mman.h>
41
42 #include "nvmm.h"
43 #include "nvmm_os.h"
44 #include "nvmm_internal.h"
45
46 MALLOC_DEFINE(M_NVMM, "nvmm", "NVMM data");
47
48 /*
49 * NVMM expects VM functions to return 0 on success, but DragonFly's VM
50 * functions return KERN_SUCCESS. Although it's also defined to be 0,
51 * assert it to be future-proofing.
52 */
53 CTASSERT(KERN_SUCCESS == 0);
54
55 os_vmspace_t *
os_vmspace_create(vaddr_t vmin,vaddr_t vmax)56 os_vmspace_create(vaddr_t vmin, vaddr_t vmax)
57 {
58 struct vmspace *vm;
59
60 vm = vmspace_alloc(vmin, vmax);
61
62 /*
63 * Set PMAP_MULTI on the backing pmap for the machine. Only
64 * pmap changes to the backing pmap for the machine affect the
65 * guest. Changes to the host's pmap do not affect the guest's
66 * backing pmap.
67 */
68 pmap_maybethreaded(&vm->vm_pmap);
69
70 return vm;
71 }
72
73 void
os_vmspace_destroy(os_vmspace_t * vm)74 os_vmspace_destroy(os_vmspace_t *vm)
75 {
76 pmap_del_all_cpus(vm);
77 vmspace_rel(vm);
78 }
79
80 int
os_vmspace_fault(os_vmspace_t * vm,vaddr_t va,vm_prot_t prot)81 os_vmspace_fault(os_vmspace_t *vm, vaddr_t va, vm_prot_t prot)
82 {
83 int fault_flags;
84
85 if (prot & VM_PROT_WRITE)
86 fault_flags = VM_FAULT_DIRTY;
87 else
88 fault_flags = VM_FAULT_NORMAL;
89
90 return vm_fault(&vm->vm_map, trunc_page(va), prot, fault_flags);
91 }
92
93 os_vmobj_t *
os_vmobj_create(voff_t size)94 os_vmobj_create(voff_t size)
95 {
96 struct vm_object *object;
97
98 object = default_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0);
99 vm_object_set_flag(object, OBJ_NOSPLIT);
100
101 return object;
102 }
103
104 void
os_vmobj_ref(os_vmobj_t * vmobj)105 os_vmobj_ref(os_vmobj_t *vmobj)
106 {
107 vm_object_hold(vmobj);
108 vm_object_reference_locked(vmobj);
109 vm_object_drop(vmobj);
110 }
111
112 void
os_vmobj_rel(os_vmobj_t * vmobj)113 os_vmobj_rel(os_vmobj_t *vmobj)
114 {
115 vm_object_deallocate(vmobj);
116 }
117
118 int
os_vmobj_map(struct vm_map * map,vaddr_t * addr,vsize_t size,os_vmobj_t * vmobj,voff_t offset,bool wired,bool fixed,bool shared,int prot,int maxprot)119 os_vmobj_map(struct vm_map *map, vaddr_t *addr, vsize_t size, os_vmobj_t *vmobj,
120 voff_t offset, bool wired, bool fixed, bool shared, int prot, int maxprot)
121 {
122 vm_prot_t vmprot, vmmaxprot;
123 vm_inherit_t inherit;
124 vm_offset_t start = *addr;
125 int rv = KERN_SUCCESS;
126 int count;
127
128 /* Convert prot. */
129 vmprot = 0;
130 if (prot & PROT_READ)
131 vmprot |= VM_PROT_READ;
132 if (prot & PROT_WRITE)
133 vmprot |= VM_PROT_WRITE;
134 if (prot & PROT_EXEC)
135 vmprot |= VM_PROT_EXECUTE;
136
137 /* Convert maxprot. */
138 vmmaxprot = 0;
139 if (maxprot & PROT_READ)
140 vmmaxprot |= VM_PROT_READ;
141 if (maxprot & PROT_WRITE)
142 vmmaxprot |= VM_PROT_WRITE;
143 if (maxprot & PROT_EXEC)
144 vmmaxprot |= VM_PROT_EXECUTE;
145
146 count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
147 vm_map_lock(map);
148
149 if (fixed) {
150 /*
151 * Remove any existing entries in the range, so the new
152 * mapping can be created at the requested address.
153 */
154 rv = vm_map_delete(map, start, start + size, &count);
155 } else {
156 if (vm_map_findspace(map, start, size, 1, 0, &start))
157 rv = KERN_NO_SPACE;
158 }
159 if (rv != KERN_SUCCESS) {
160 vm_map_unlock(map);
161 vm_map_entry_release(count);
162 return rv;
163 }
164
165 /* Get a reference to the object. */
166 os_vmobj_ref(vmobj);
167
168 /*
169 * Map the object. This consumes the reference on success only. On
170 * failure we must drop the reference manually.
171 */
172 vm_object_hold(vmobj);
173 rv = vm_map_insert(map, &count, vmobj, NULL, offset, NULL,
174 start, start + size, VM_MAPTYPE_NORMAL, VM_SUBSYS_NVMM,
175 vmprot, vmmaxprot, 0);
176 vm_object_drop(vmobj);
177 vm_map_unlock(map);
178 vm_map_entry_release(count);
179 if (rv != KERN_SUCCESS) {
180 /* Drop the ref. */
181 os_vmobj_rel(vmobj);
182 return rv;
183 }
184
185 inherit = shared ? VM_INHERIT_SHARE : VM_INHERIT_NONE;
186 rv = vm_map_inherit(map, start, start + size, inherit);
187 if (rv != KERN_SUCCESS) {
188 os_vmobj_unmap(map, start, start + size, false);
189 return rv;
190 }
191
192 if (wired) {
193 rv = vm_map_kernel_wiring(map, start, start + size, 0);
194 if (rv != KERN_SUCCESS) {
195 os_vmobj_unmap(map, start, start + size, false);
196 return rv;
197 }
198 }
199
200 *addr = start;
201 return 0;
202 }
203
204 void
os_vmobj_unmap(struct vm_map * map,vaddr_t start,vaddr_t end,bool wired)205 os_vmobj_unmap(struct vm_map *map, vaddr_t start, vaddr_t end, bool wired)
206 {
207 if (wired) {
208 /* Unwire kernel mappings before removing. */
209 vm_map_kernel_wiring(map, start, end, KM_PAGEABLE);
210 }
211 vm_map_remove(map, start, end);
212 }
213
214 void *
os_pagemem_zalloc(size_t size)215 os_pagemem_zalloc(size_t size)
216 {
217 void *ret;
218
219 /* NOTE: kmem_alloc() may return 0 ! */
220 ret = (void *)kmem_alloc(kernel_map, roundup(size, PAGE_SIZE),
221 VM_SUBSYS_NVMM);
222
223 OS_ASSERT((uintptr_t)ret % PAGE_SIZE == 0);
224
225 return ret;
226 }
227
228 void
os_pagemem_free(void * ptr,size_t size)229 os_pagemem_free(void *ptr, size_t size)
230 {
231 kmem_free(kernel_map, (vaddr_t)ptr, roundup(size, PAGE_SIZE));
232 }
233
234 paddr_t
os_pa_zalloc(void)235 os_pa_zalloc(void)
236 {
237 struct vm_page *pg;
238
239 pg = vm_page_alloczwq(0,
240 VM_ALLOC_SYSTEM | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
241
242 return VM_PAGE_TO_PHYS(pg);
243 }
244
245 void
os_pa_free(paddr_t pa)246 os_pa_free(paddr_t pa)
247 {
248 vm_page_freezwq(PHYS_TO_VM_PAGE(pa));
249 }
250
251 int
os_contigpa_zalloc(paddr_t * pa,vaddr_t * va,size_t npages)252 os_contigpa_zalloc(paddr_t *pa, vaddr_t *va, size_t npages)
253 {
254 void *addr;
255
256 addr = contigmalloc(npages * PAGE_SIZE, M_NVMM, M_WAITOK | M_ZERO,
257 0, ~0UL, PAGE_SIZE, 0);
258 if (addr == NULL)
259 return ENOMEM;
260
261 *va = (vaddr_t)addr;
262 *pa = vtophys(addr);
263 return 0;
264 }
265
266 void
os_contigpa_free(paddr_t pa __unused,vaddr_t va,size_t npages)267 os_contigpa_free(paddr_t pa __unused, vaddr_t va, size_t npages)
268 {
269 contigfree((void *)va, npages * PAGE_SIZE, M_NVMM);
270 }
271
272 /* -------------------------------------------------------------------------- */
273
274 #include <sys/conf.h>
275 #include <sys/devfs.h>
276 #include <sys/device.h>
277 #include <sys/fcntl.h>
278 #include <sys/module.h>
279
280 static d_open_t dfbsd_nvmm_open;
281 static d_ioctl_t dfbsd_nvmm_ioctl;
282 static d_priv_dtor_t dfbsd_nvmm_dtor;
283
284 static struct dev_ops nvmm_ops = {
285 { "nvmm", 0, D_MPSAFE },
286 .d_open = dfbsd_nvmm_open,
287 .d_ioctl = dfbsd_nvmm_ioctl,
288 };
289
290 static int
dfbsd_nvmm_open(struct dev_open_args * ap)291 dfbsd_nvmm_open(struct dev_open_args *ap)
292 {
293 int flags = ap->a_oflags;
294 struct nvmm_owner *owner;
295 struct file *fp;
296 int error;
297
298 if (__predict_false(nvmm_impl == NULL))
299 return ENXIO;
300 if (!(flags & O_CLOEXEC))
301 return EINVAL;
302
303 if (OFLAGS(flags) & O_WRONLY) {
304 owner = &nvmm_root_owner;
305 } else {
306 owner = os_mem_alloc(sizeof(*owner));
307 owner->pid = curthread->td_proc->p_pid;
308 }
309
310 fp = ap->a_fpp ? *ap->a_fpp : NULL;
311 error = devfs_set_cdevpriv(fp, owner, dfbsd_nvmm_dtor);
312 if (error) {
313 dfbsd_nvmm_dtor(owner);
314 return error;
315 }
316
317 return 0;
318 }
319
320 static void
dfbsd_nvmm_dtor(void * arg)321 dfbsd_nvmm_dtor(void *arg)
322 {
323 struct nvmm_owner *owner = arg;
324
325 OS_ASSERT(owner != NULL);
326 nvmm_kill_machines(owner);
327 if (owner != &nvmm_root_owner) {
328 os_mem_free(owner, sizeof(*owner));
329 }
330 }
331
332 static int
dfbsd_nvmm_ioctl(struct dev_ioctl_args * ap)333 dfbsd_nvmm_ioctl(struct dev_ioctl_args *ap)
334 {
335 unsigned long cmd = ap->a_cmd;
336 void *data = ap->a_data;
337 struct file *fp = ap->a_fp;
338 struct nvmm_owner *owner = NULL;
339
340 devfs_get_cdevpriv(fp, (void **)&owner);
341 OS_ASSERT(owner != NULL);
342
343 return nvmm_ioctl(owner, cmd, data);
344 }
345
346 /* -------------------------------------------------------------------------- */
347
348 static int
nvmm_attach(void)349 nvmm_attach(void)
350 {
351 int error;
352
353 error = nvmm_init();
354 if (error)
355 panic("%s: impossible", __func__);
356 os_printf("nvmm: attached, using backend %s\n", nvmm_impl->name);
357
358 return 0;
359 }
360
361 static int
nvmm_detach(void)362 nvmm_detach(void)
363 {
364 if (os_atomic_load_uint(&nmachines) > 0)
365 return EBUSY;
366
367 nvmm_fini();
368 return 0;
369 }
370
371 static int
nvmm_modevent(module_t mod __unused,int type,void * data __unused)372 nvmm_modevent(module_t mod __unused, int type, void *data __unused)
373 {
374 static cdev_t dev = NULL;
375 int error;
376
377 switch (type) {
378 case MOD_LOAD:
379 if (nvmm_ident() == NULL) {
380 os_printf("nvmm: cpu not supported\n");
381 return ENOTSUP;
382 }
383 error = nvmm_attach();
384 if (error)
385 return error;
386
387 dev = make_dev(&nvmm_ops, 0, UID_ROOT, GID_NVMM, 0640, "nvmm");
388 if (dev == NULL) {
389 os_printf("nvmm: unable to create device\n");
390 error = ENOMEM;
391 }
392 break;
393
394 case MOD_UNLOAD:
395 if (dev == NULL)
396 return 0;
397 error = nvmm_detach();
398 if (error == 0)
399 destroy_dev(dev);
400 break;
401
402 case MOD_SHUTDOWN:
403 error = 0;
404 break;
405
406 default:
407 error = EOPNOTSUPP;
408 break;
409 }
410
411 return error;
412 }
413
414 static moduledata_t nvmm_moddata = {
415 .name = "nvmm",
416 .evhand = nvmm_modevent,
417 .priv = NULL,
418 };
419
420 DECLARE_MODULE(nvmm, nvmm_moddata, SI_SUB_PSEUDO, SI_ORDER_ANY);
421 MODULE_VERSION(nvmm, NVMM_KERN_VERSION);
422