1 /* $NetBSD: privcmd.c,v 1.66 2022/09/01 15:32:16 bouyer Exp $ */
2
3 /*-
4 * Copyright (c) 2004 Christian Limpach.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.66 2022/09/01 15:32:16 bouyer Exp $");
31
32 #include "opt_xen.h"
33
34 #include "opt_xen.h"
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/vnode.h>
39 #include <sys/dirent.h>
40 #include <sys/stat.h>
41 #include <sys/proc.h>
42
43 #include <miscfs/specfs/specdev.h>
44 #include <miscfs/kernfs/kernfs.h>
45
46 #include <uvm/uvm.h>
47 #include <uvm/uvm_fault.h>
48 #include <uvm/uvm_fault_i.h>
49
50 #include <xen/kernfs_machdep.h>
51 #include <xen/hypervisor.h>
52 #include <xen/xen.h>
53 #include <xen/xenio.h>
54 #include <xen/xenmem.h>
55 #include <xen/xenpmap.h>
56 #include <xen/granttables.h>
57
58 #define PRIVCMD_MODE (S_IRUSR)
59
60 /* Magic value is used to mark invalid pages.
61 * This must be a value within the page-offset.
62 * Page-aligned values including 0x0 are used by the guest.
63 */
64 #define INVALID_PAGE 0xfff
65
66 typedef enum _privcmd_type {
67 PTYPE_PRIVCMD,
68 PTYPE_PRIVCMD_PHYSMAP,
69 PTYPE_GNTDEV_REF,
70 PTYPE_GNTDEV_ALLOC
71 } privcmd_type;
72
73 struct privcmd_object_privcmd {
74 paddr_t base_paddr; /* base address of physical space */
75 paddr_t *maddr; /* array of machine address to map */
76 int domid;
77 bool no_translate;
78 };
79
80 struct privcmd_object_gntref {
81 paddr_t base_paddr; /* base address of physical space */
82 struct ioctl_gntdev_grant_notify notify;
83 struct gnttab_map_grant_ref ops[1]; /* variable length */
84 };
85
86 struct privcmd_object_gntalloc {
87 vaddr_t gntva; /* granted area mapped in kernel */
88 uint16_t domid;
89 uint16_t flags;
90 struct ioctl_gntdev_grant_notify notify;
91 uint32_t gref_ids[1]; /* variable length */
92 };
93
94 struct privcmd_object {
95 struct uvm_object uobj;
96 privcmd_type type;
97 int npages;
98 union {
99 struct privcmd_object_privcmd pc;
100 struct privcmd_object_gntref gr;
101 struct privcmd_object_gntalloc ga;
102 } u;
103 };
104
105 #define PGO_GNTREF_LEN(count) \
106 (sizeof(struct privcmd_object) + \
107 sizeof(struct gnttab_map_grant_ref) * ((count) - 1))
108
109 #define PGO_GNTA_LEN(count) \
110 (sizeof(struct privcmd_object) + \
111 sizeof(uint32_t) * ((count) - 1))
112
113 int privcmd_nobjects = 0;
114
115 static void privpgop_reference(struct uvm_object *);
116 static void privpgop_detach(struct uvm_object *);
117 static int privpgop_fault(struct uvm_faultinfo *, vaddr_t , struct vm_page **,
118 int, int, vm_prot_t, int);
119 static int privcmd_map_obj(struct vm_map *, vaddr_t,
120 struct privcmd_object *, vm_prot_t);
121
122
123 static int
privcmd_xen2bsd_errno(int error)124 privcmd_xen2bsd_errno(int error)
125 {
126 /*
127 * Xen uses System V error codes.
128 * In order to keep bloat as minimal as possible,
129 * only convert what really impact us.
130 */
131
132 switch(-error) {
133 case 0:
134 return 0;
135 case 1:
136 return EPERM;
137 case 2:
138 return ENOENT;
139 case 3:
140 return ESRCH;
141 case 4:
142 return EINTR;
143 case 5:
144 return EIO;
145 case 6:
146 return ENXIO;
147 case 7:
148 return E2BIG;
149 case 8:
150 return ENOEXEC;
151 case 9:
152 return EBADF;
153 case 10:
154 return ECHILD;
155 case 11:
156 return EAGAIN;
157 case 12:
158 return ENOMEM;
159 case 13:
160 return EACCES;
161 case 14:
162 return EFAULT;
163 case 15:
164 return ENOTBLK;
165 case 16:
166 return EBUSY;
167 case 17:
168 return EEXIST;
169 case 18:
170 return EXDEV;
171 case 19:
172 return ENODEV;
173 case 20:
174 return ENOTDIR;
175 case 21:
176 return EISDIR;
177 case 22:
178 return EINVAL;
179 case 23:
180 return ENFILE;
181 case 24:
182 return EMFILE;
183 case 25:
184 return ENOTTY;
185 case 26:
186 return ETXTBSY;
187 case 27:
188 return EFBIG;
189 case 28:
190 return ENOSPC;
191 case 29:
192 return ESPIPE;
193 case 30:
194 return EROFS;
195 case 31:
196 return EMLINK;
197 case 32:
198 return EPIPE;
199 case 33:
200 return EDOM;
201 case 34:
202 return ERANGE;
203 case 35:
204 return EDEADLK;
205 case 36:
206 return ENAMETOOLONG;
207 case 37:
208 return ENOLCK;
209 case 38:
210 return ENOSYS;
211 case 39:
212 return ENOTEMPTY;
213 case 40:
214 return ELOOP;
215 case 42:
216 return ENOMSG;
217 case 43:
218 return EIDRM;
219 case 60:
220 return ENOSTR;
221 case 61:
222 return ENODATA;
223 case 62:
224 return ETIME;
225 case 63:
226 return ENOSR;
227 case 66:
228 return EREMOTE;
229 case 74:
230 return EBADMSG;
231 case 75:
232 return EOVERFLOW;
233 case 84:
234 return EILSEQ;
235 case 87:
236 return EUSERS;
237 case 88:
238 return ENOTSOCK;
239 case 89:
240 return EDESTADDRREQ;
241 case 90:
242 return EMSGSIZE;
243 case 91:
244 return EPROTOTYPE;
245 case 92:
246 return ENOPROTOOPT;
247 case 93:
248 return EPROTONOSUPPORT;
249 case 94:
250 return ESOCKTNOSUPPORT;
251 case 95:
252 return EOPNOTSUPP;
253 case 96:
254 return EPFNOSUPPORT;
255 case 97:
256 return EAFNOSUPPORT;
257 case 98:
258 return EADDRINUSE;
259 case 99:
260 return EADDRNOTAVAIL;
261 case 100:
262 return ENETDOWN;
263 case 101:
264 return ENETUNREACH;
265 case 102:
266 return ENETRESET;
267 case 103:
268 return ECONNABORTED;
269 case 104:
270 return ECONNRESET;
271 case 105:
272 return ENOBUFS;
273 case 106:
274 return EISCONN;
275 case 107:
276 return ENOTCONN;
277 case 108:
278 return ESHUTDOWN;
279 case 109:
280 return ETOOMANYREFS;
281 case 110:
282 return ETIMEDOUT;
283 case 111:
284 return ECONNREFUSED;
285 case 112:
286 return EHOSTDOWN;
287 case 113:
288 return EHOSTUNREACH;
289 case 114:
290 return EALREADY;
291 case 115:
292 return EINPROGRESS;
293 case 116:
294 return ESTALE;
295 case 122:
296 return EDQUOT;
297 default:
298 printf("unknown xen error code %d\n", -error);
299 return -error;
300 }
301 }
302
303 static vm_prot_t
privcmd_get_map_prot(struct vm_map * map,vaddr_t start,off_t size)304 privcmd_get_map_prot(struct vm_map *map, vaddr_t start, off_t size)
305 {
306 vm_prot_t prot;
307
308 vm_map_lock_read(map);
309 /* get protections. This also check for validity of mapping */
310 if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE))
311 prot = VM_PROT_READ | VM_PROT_WRITE;
312 else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ))
313 prot = VM_PROT_READ;
314 else {
315 printf("privcmd_get_map_prot 0x%lx -> 0x%lx "
316 "failed\n",
317 start, (unsigned long)(start + size - 1));
318 prot = UVM_PROT_NONE;
319 }
320 vm_map_unlock_read(map);
321 return prot;
322 }
323
324 static int
privcmd_mmap(struct vop_ioctl_args * ap)325 privcmd_mmap(struct vop_ioctl_args *ap)
326 {
327 #ifndef XENPV
328 printf("IOCTL_PRIVCMD_MMAP not supported\n");
329 return EINVAL;
330 #else
331 int i, j;
332 privcmd_mmap_t *mcmd = ap->a_data;
333 privcmd_mmap_entry_t mentry;
334 vaddr_t va;
335 paddr_t ma;
336 struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map;
337 paddr_t *maddr;
338 struct privcmd_object *obj;
339 vm_prot_t prot;
340 int error;
341
342 for (i = 0; i < mcmd->num; i++) {
343 error = copyin(&mcmd->entry[i], &mentry, sizeof(mentry));
344 if (error)
345 return EINVAL;
346 if (mentry.npages == 0)
347 return EINVAL;
348 if (mentry.va > VM_MAXUSER_ADDRESS)
349 return EINVAL;
350 va = mentry.va & ~PAGE_MASK;
351 prot = privcmd_get_map_prot(vmm, va, mentry.npages * PAGE_SIZE);
352 if (prot == UVM_PROT_NONE)
353 return EINVAL;
354 maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages,
355 KM_SLEEP);
356 ma = ((paddr_t)mentry.mfn) << PGSHIFT;
357 for (j = 0; j < mentry.npages; j++) {
358 maddr[j] = ma;
359 ma += PAGE_SIZE;
360 }
361 obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
362 obj->type = PTYPE_PRIVCMD;
363 obj->u.pc.maddr = maddr;
364 obj->u.pc.no_translate = false;
365 obj->npages = mentry.npages;
366 obj->u.pc.domid = mcmd->dom;
367 error = privcmd_map_obj(vmm, va, obj, prot);
368 if (error)
369 return error;
370 }
371 return 0;
372 #endif
373 }
374
375 static int
privcmd_mmapbatch(struct vop_ioctl_args * ap)376 privcmd_mmapbatch(struct vop_ioctl_args *ap)
377 {
378 #ifndef XENPV
379 printf("IOCTL_PRIVCMD_MMAPBATCH not supported\n");
380 return EINVAL;
381 #else
382 int i;
383 privcmd_mmapbatch_t* pmb = ap->a_data;
384 vaddr_t va0;
385 u_long mfn;
386 paddr_t ma;
387 struct vm_map *vmm;
388 vaddr_t trymap;
389 paddr_t *maddr;
390 struct privcmd_object *obj;
391 vm_prot_t prot;
392 int error;
393
394 vmm = &curlwp->l_proc->p_vmspace->vm_map;
395 va0 = pmb->addr & ~PAGE_MASK;
396
397 if (pmb->num == 0)
398 return EINVAL;
399 if (va0 > VM_MAXUSER_ADDRESS)
400 return EINVAL;
401 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num)
402 return EINVAL;
403
404 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
405 if (prot == UVM_PROT_NONE)
406 return EINVAL;
407
408 maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP);
409 /* get a page of KVA to check mappins */
410 trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE,
411 UVM_KMF_VAONLY);
412 if (trymap == 0) {
413 kmem_free(maddr, sizeof(paddr_t) * pmb->num);
414 return ENOMEM;
415 }
416
417 obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
418 obj->type = PTYPE_PRIVCMD;
419 obj->u.pc.maddr = maddr;
420 obj->u.pc.no_translate = false;
421 obj->npages = pmb->num;
422 obj->u.pc.domid = pmb->dom;
423
424 for(i = 0; i < pmb->num; ++i) {
425 error = copyin(&pmb->arr[i], &mfn, sizeof(mfn));
426 if (error != 0) {
427 /* XXX: mappings */
428 pmap_update(pmap_kernel());
429 kmem_free(maddr, sizeof(paddr_t) * pmb->num);
430 uvm_km_free(kernel_map, trymap, PAGE_SIZE,
431 UVM_KMF_VAONLY);
432 return error;
433 }
434 ma = ((paddr_t)mfn) << PGSHIFT;
435 if ((error = pmap_enter_ma(pmap_kernel(), trymap, ma, 0,
436 prot, PMAP_CANFAIL | prot, pmb->dom))) {
437 mfn |= 0xF0000000;
438 copyout(&mfn, &pmb->arr[i], sizeof(mfn));
439 maddr[i] = INVALID_PAGE;
440 } else {
441 pmap_remove(pmap_kernel(), trymap,
442 trymap + PAGE_SIZE);
443 maddr[i] = ma;
444 }
445 }
446 pmap_update(pmap_kernel());
447 uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY);
448
449 error = privcmd_map_obj(vmm, va0, obj, prot);
450
451 return error;
452 #endif
453 }
454
455 static int
privcmd_mmapbatch_v2(struct vop_ioctl_args * ap)456 privcmd_mmapbatch_v2(struct vop_ioctl_args *ap)
457 {
458 int i;
459 privcmd_mmapbatch_v2_t* pmb = ap->a_data;
460 vaddr_t va0;
461 u_long mfn;
462 struct vm_map *vmm;
463 paddr_t *maddr;
464 struct privcmd_object *obj;
465 vm_prot_t prot;
466 int error;
467 paddr_t base_paddr = 0;
468
469 vmm = &curlwp->l_proc->p_vmspace->vm_map;
470 va0 = pmb->addr & ~PAGE_MASK;
471
472 if (pmb->num == 0)
473 return EINVAL;
474 if (va0 > VM_MAXUSER_ADDRESS)
475 return EINVAL;
476 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num)
477 return EINVAL;
478
479 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
480 if (prot == UVM_PROT_NONE)
481 return EINVAL;
482
483 #ifndef XENPV
484 KASSERT(xen_feature(XENFEAT_auto_translated_physmap));
485 base_paddr = xenmem_alloc_pa(pmb->num * PAGE_SIZE, PAGE_SIZE, true);
486 KASSERT(base_paddr != 0);
487 #endif
488 maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP);
489 obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
490 obj->type = PTYPE_PRIVCMD_PHYSMAP;
491 obj->u.pc.maddr = maddr;
492 obj->u.pc.base_paddr = base_paddr;
493 obj->u.pc.no_translate = false;
494 obj->npages = pmb->num;
495 obj->u.pc.domid = pmb->dom;
496
497 for(i = 0; i < pmb->num; ++i) {
498 error = copyin(&pmb->arr[i], &mfn, sizeof(mfn));
499 if (error != 0) {
500 kmem_free(maddr, sizeof(paddr_t) * pmb->num);
501 kmem_free(obj, sizeof(*obj));
502 #ifndef XENPV
503 xenmem_free_pa(base_paddr, pmb->num * PAGE_SIZE);
504 #endif
505 return error;
506 }
507 #ifdef XENPV
508 maddr[i] = ((paddr_t)mfn) << PGSHIFT;
509 #else
510 maddr[i] = mfn; /* TMP argument for XENMEM_add_to_physmap */
511 #endif
512
513 }
514 error = privcmd_map_obj(vmm, va0, obj, prot);
515 if (error)
516 return error;
517
518 /*
519 * map the range in user process now.
520 * If Xenr return -ENOENT, retry (paging in progress)
521 */
522 for(i = 0; i < pmb->num; i++, va0 += PAGE_SIZE) {
523 int err, cerr;
524 #ifdef XENPV
525 for (int j = 0 ; j < 10; j++) {
526 err = pmap_enter_ma(vmm->pmap, va0, maddr[i], 0,
527 prot, PMAP_CANFAIL | prot,
528 pmb->dom);
529 if (err != -2) /* Xen ENOENT */
530 break;
531 if (kpause("xnoent", 1, mstohz(100), NULL))
532 break;
533 }
534 if (err) {
535 maddr[i] = INVALID_PAGE;
536 }
537 #else /* XENPV */
538 xen_add_to_physmap_batch_t add;
539 u_long idx;
540 xen_pfn_t gpfn;
541 int err2;
542 memset(&add, 0, sizeof(add));
543
544 add.domid = DOMID_SELF;
545 add.space = XENMAPSPACE_gmfn_foreign;
546 add.size = 1;
547 add.foreign_domid = pmb->dom;
548 idx = maddr[i];
549 set_xen_guest_handle(add.idxs, &idx);
550 maddr[i] = INVALID_PAGE;
551 gpfn = (base_paddr >> PGSHIFT) + i;
552 set_xen_guest_handle(add.gpfns, &gpfn);
553 err2 = 0;
554 set_xen_guest_handle(add.errs, &err2);
555 err = HYPERVISOR_memory_op(XENMEM_add_to_physmap_batch, &add);
556 if (err < 0) {
557 printf("privcmd_mmapbatch_v2: XENMEM_add_to_physmap_batch failed %d\n", err);
558 privpgop_detach(&obj->uobj);
559 return privcmd_xen2bsd_errno(err);
560 }
561 err = err2;
562 if (err == 0)
563 maddr[i] = base_paddr + i * PAGE_SIZE;
564 #endif /* XENPV */
565
566 cerr = copyout(&err, &pmb->err[i], sizeof(pmb->err[i]));
567 if (cerr) {
568 privpgop_detach(&obj->uobj);
569 return cerr;
570 }
571 }
572 return 0;
573 }
574
575 static int
privcmd_mmap_resource(struct vop_ioctl_args * ap)576 privcmd_mmap_resource(struct vop_ioctl_args *ap)
577 {
578 int i;
579 privcmd_mmap_resource_t* pmr = ap->a_data;
580 vaddr_t va0;
581 struct vm_map *vmm;
582 struct privcmd_object *obj;
583 vm_prot_t prot;
584 int error;
585 struct xen_mem_acquire_resource op;
586 xen_pfn_t *pfns;
587 paddr_t *maddr;
588 paddr_t base_paddr = 0;
589
590 vmm = &curlwp->l_proc->p_vmspace->vm_map;
591 va0 = pmr->addr & ~PAGE_MASK;
592
593 if (pmr->num == 0)
594 return EINVAL;
595 if (va0 > VM_MAXUSER_ADDRESS)
596 return EINVAL;
597 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmr->num)
598 return EINVAL;
599
600 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
601 if (prot == UVM_PROT_NONE)
602 return EINVAL;
603
604 pfns = kmem_alloc(sizeof(xen_pfn_t) * pmr->num, KM_SLEEP);
605 #ifndef XENPV
606 KASSERT(xen_feature(XENFEAT_auto_translated_physmap));
607 base_paddr = xenmem_alloc_pa(pmr->num * PAGE_SIZE, PAGE_SIZE, true);
608 KASSERT(base_paddr != 0);
609 for (i = 0; i < pmr->num; i++) {
610 pfns[i] = (base_paddr >> PGSHIFT) + i;
611 }
612 #else
613 KASSERT(!xen_feature(XENFEAT_auto_translated_physmap));
614 #endif
615
616 memset(&op, 0, sizeof(op));
617 op.domid = pmr->dom;
618 op.type = pmr->type;
619 op.id = pmr->id;
620 op.frame = pmr->idx;
621 op.nr_frames = pmr->num;
622 set_xen_guest_handle(op.frame_list, pfns);
623
624 error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &op);
625 if (error) {
626 printf("%s: XENMEM_acquire_resource failed: %d\n",
627 __func__, error);
628 return privcmd_xen2bsd_errno(error);
629 }
630 maddr = kmem_alloc(sizeof(paddr_t) * pmr->num, KM_SLEEP);
631 for (i = 0; i < pmr->num; i++) {
632 maddr[i] = pfns[i] << PGSHIFT;
633 }
634 kmem_free(pfns, sizeof(xen_pfn_t) * pmr->num);
635
636 obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
637 obj->type = PTYPE_PRIVCMD_PHYSMAP;
638 obj->u.pc.base_paddr = base_paddr;
639 obj->u.pc.maddr = maddr;
640 obj->u.pc.no_translate = true;
641 obj->npages = pmr->num;
642 obj->u.pc.domid = (op.flags & XENMEM_rsrc_acq_caller_owned) ?
643 DOMID_SELF : pmr->dom;
644
645 error = privcmd_map_obj(vmm, va0, obj, prot);
646 return error;
647 }
648
649 static int
privcmd_map_gref(struct vop_ioctl_args * ap)650 privcmd_map_gref(struct vop_ioctl_args *ap)
651 {
652 struct ioctl_gntdev_mmap_grant_ref *mgr = ap->a_data;
653 struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map;
654 struct privcmd_object *obj;
655 vaddr_t va0 = (vaddr_t)mgr->va & ~PAGE_MASK;
656 vm_prot_t prot;
657 int error;
658
659 if (mgr->count == 0)
660 return EINVAL;
661 if (va0 > VM_MAXUSER_ADDRESS)
662 return EINVAL;
663 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mgr->count)
664 return EINVAL;
665 if (mgr->notify.offset < 0 || mgr->notify.offset > mgr->count)
666 return EINVAL;
667
668 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
669 if (prot == UVM_PROT_NONE)
670 return EINVAL;
671
672 obj = kmem_alloc(PGO_GNTREF_LEN(mgr->count), KM_SLEEP);
673
674 obj->type = PTYPE_GNTDEV_REF;
675 obj->npages = mgr->count;
676 memcpy(&obj->u.gr.notify, &mgr->notify,
677 sizeof(obj->u.gr.notify));
678 #ifndef XENPV
679 KASSERT(xen_feature(XENFEAT_auto_translated_physmap));
680 obj->u.gr.base_paddr = xenmem_alloc_pa(obj->npages * PAGE_SIZE,
681 PAGE_SIZE, true);
682 KASSERT(obj->u.gr.base_paddr != 0);
683 #else
684 obj->u.gr.base_paddr = 0;
685 #endif /* !XENPV */
686
687 for (int i = 0; i < obj->npages; ++i) {
688 struct ioctl_gntdev_grant_ref gref;
689 error = copyin(&mgr->refs[i], &gref, sizeof(gref));
690 if (error != 0) {
691 goto err1;
692 }
693 #ifdef XENPV
694 obj->u.gr.ops[i].host_addr = 0;
695 obj->u.gr.ops[i].flags = GNTMAP_host_map |
696 GNTMAP_application_map | GNTMAP_contains_pte;
697 #else /* XENPV */
698 obj->u.gr.ops[i].host_addr =
699 obj->u.gr.base_paddr + PAGE_SIZE * i;
700 obj->u.gr.ops[i].flags = GNTMAP_host_map;
701 #endif /* XENPV */
702 obj->u.gr.ops[i].dev_bus_addr = 0;
703 obj->u.gr.ops[i].ref = gref.ref;
704 obj->u.gr.ops[i].dom = gref.domid;
705 obj->u.gr.ops[i].handle = -1;
706 if (prot == UVM_PROT_READ)
707 obj->u.gr.ops[i].flags |= GNTMAP_readonly;
708 }
709 error = privcmd_map_obj(vmm, va0, obj, prot);
710 return error;
711 err1:
712 #ifndef XENPV
713 xenmem_free_pa(obj->u.gr.base_paddr, obj->npages * PAGE_SIZE);
714 #endif
715 kmem_free(obj, PGO_GNTREF_LEN(obj->npages));
716 return error;
717 }
718
719 static int
privcmd_alloc_gref(struct vop_ioctl_args * ap)720 privcmd_alloc_gref(struct vop_ioctl_args *ap)
721 {
722 struct ioctl_gntdev_alloc_grant_ref *mga = ap->a_data;
723 struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map;
724 struct privcmd_object *obj;
725 vaddr_t va0 = (vaddr_t)mga->va & ~PAGE_MASK;
726 vm_prot_t prot;
727 int error, ret;
728
729 if (mga->count == 0)
730 return EINVAL;
731 if (va0 > VM_MAXUSER_ADDRESS)
732 return EINVAL;
733 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mga->count)
734 return EINVAL;
735 if (mga->notify.offset < 0 || mga->notify.offset > mga->count)
736 return EINVAL;
737
738 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
739 if (prot == UVM_PROT_NONE)
740 return EINVAL;
741
742 obj = kmem_alloc(PGO_GNTA_LEN(mga->count), KM_SLEEP);
743
744 obj->type = PTYPE_GNTDEV_ALLOC;
745 obj->npages = mga->count;
746 obj->u.ga.domid = mga->domid;
747 memcpy(&obj->u.ga.notify, &mga->notify,
748 sizeof(obj->u.ga.notify));
749 obj->u.ga.gntva = uvm_km_alloc(kernel_map,
750 PAGE_SIZE * obj->npages, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
751 if (obj->u.ga.gntva == 0) {
752 error = ENOMEM;
753 goto err1;
754 }
755
756 for (int i = 0; i < obj->npages; ++i) {
757 paddr_t ma;
758 vaddr_t va = obj->u.ga.gntva + i * PAGE_SIZE;
759 grant_ref_t id;
760 bool ro = ((mga->flags & GNTDEV_ALLOC_FLAG_WRITABLE) == 0);
761 (void)pmap_extract_ma(pmap_kernel(), va, &ma);
762 if ((ret = xengnt_grant_access(mga->domid, ma, ro, &id)) != 0) {
763 printf("%s: xengnt_grant_access failed: %d\n",
764 __func__, ret);
765 for (int j = 0; j < i; j++) {
766 xengnt_revoke_access(obj->u.ga.gref_ids[j]);
767 error = ret;
768 goto err2;
769 }
770 }
771 obj->u.ga.gref_ids[i] = id;
772 }
773
774 error = copyout(&obj->u.ga.gref_ids[0], mga->gref_ids,
775 sizeof(uint32_t) * obj->npages);
776 if (error) {
777 for (int i = 0; i < obj->npages; ++i) {
778 xengnt_revoke_access(obj->u.ga.gref_ids[i]);
779 }
780 goto err2;
781 }
782
783 error = privcmd_map_obj(vmm, va0, obj, prot);
784 return error;
785
786 err2:
787 uvm_km_free(kernel_map, obj->u.ga.gntva,
788 PAGE_SIZE * obj->npages, UVM_KMF_WIRED);
789 err1:
790 kmem_free(obj, PGO_GNTA_LEN(obj->npages));
791 return error;
792 }
793
794 static int
privcmd_ioctl(void * v)795 privcmd_ioctl(void *v)
796 {
797 struct vop_ioctl_args /* {
798 const struct vnodeop_desc *a_desc;
799 struct vnode *a_vp;
800 u_long a_command;
801 void *a_data;
802 int a_fflag;
803 kauth_cred_t a_cred;
804 } */ *ap = v;
805 int error = 0;
806
807 switch (ap->a_command) {
808 case IOCTL_PRIVCMD_HYPERCALL:
809 case IOCTL_PRIVCMD_HYPERCALL_OLD:
810 /*
811 * oprivcmd_hypercall_t is privcmd_hypercall_t without the last entry
812 */
813 {
814 privcmd_hypercall_t *hc = ap->a_data;
815 if (hc->op >= (PAGE_SIZE >> 5))
816 return EINVAL;
817 error = -EOPNOTSUPP;
818 #if defined(__i386__)
819 __asm volatile (
820 "pushl %%ebx; pushl %%ecx; pushl %%edx;"
821 "pushl %%esi; pushl %%edi; "
822 "movl 4(%%eax),%%ebx ;"
823 "movl 8(%%eax),%%ecx ;"
824 "movl 12(%%eax),%%edx ;"
825 "movl 16(%%eax),%%esi ;"
826 "movl 20(%%eax),%%edi ;"
827 "movl (%%eax),%%eax ;"
828 "shll $5,%%eax ;"
829 "addl $hypercall_page,%%eax ;"
830 "call *%%eax ;"
831 "popl %%edi; popl %%esi; popl %%edx;"
832 "popl %%ecx; popl %%ebx"
833 : "=a" (error) : "0" (ap->a_data) : "memory" );
834 #endif /* __i386__ */
835 #if defined(__x86_64__)
836 #ifndef XENPV
837 /* hypervisor can't access user memory if SMAP is enabled */
838 smap_disable();
839 #endif
840 {
841 long i1, i2, i3;
842 __asm volatile (
843 "movq %8,%%r10; movq %9,%%r8;"
844 "shll $5,%%eax ;"
845 "addq $hypercall_page,%%rax ;"
846 "call *%%rax"
847 : "=a" (error), "=D" (i1),
848 "=S" (i2), "=d" (i3)
849 : "0" ((unsigned int)hc->op),
850 "1" (hc->arg[0]),
851 "2" (hc->arg[1]),
852 "3" (hc->arg[2]),
853 "g" (hc->arg[3]),
854 "g" (hc->arg[4])
855 : "r8", "r10", "memory" );
856 }
857 #ifndef XENPV
858 smap_enable();
859 #endif
860 #endif /* __x86_64__ */
861 if (ap->a_command == IOCTL_PRIVCMD_HYPERCALL) {
862 if (error >= 0) {
863 hc->retval = error;
864 error = 0;
865 } else {
866 /* error occurred, return the errno */
867 error = privcmd_xen2bsd_errno(error);
868 hc->retval = 0;
869 }
870 } else {
871 error = privcmd_xen2bsd_errno(error);
872 }
873 break;
874 }
875 case IOCTL_PRIVCMD_MMAP:
876 return privcmd_mmap(ap);
877
878 case IOCTL_PRIVCMD_MMAPBATCH:
879 return privcmd_mmapbatch(ap);
880
881 case IOCTL_PRIVCMD_MMAPBATCH_V2:
882 return privcmd_mmapbatch_v2(ap);
883
884 case IOCTL_PRIVCMD_MMAP_RESOURCE:
885 return privcmd_mmap_resource(ap);
886
887 case IOCTL_GNTDEV_MMAP_GRANT_REF:
888 return privcmd_map_gref(ap);
889
890 case IOCTL_GNTDEV_ALLOC_GRANT_REF:
891 return privcmd_alloc_gref(ap);
892 default:
893 error = EINVAL;
894 }
895
896 return error;
897 }
898
899 static const struct uvm_pagerops privpgops = {
900 .pgo_reference = privpgop_reference,
901 .pgo_detach = privpgop_detach,
902 .pgo_fault = privpgop_fault,
903 };
904
905 static void
privpgop_reference(struct uvm_object * uobj)906 privpgop_reference(struct uvm_object *uobj)
907 {
908 rw_enter(uobj->vmobjlock, RW_WRITER);
909 uobj->uo_refs++;
910 rw_exit(uobj->vmobjlock);
911 }
912
913 static void
privcmd_notify(struct ioctl_gntdev_grant_notify * notify,vaddr_t va,struct gnttab_map_grant_ref * gmops)914 privcmd_notify(struct ioctl_gntdev_grant_notify *notify, vaddr_t va,
915 struct gnttab_map_grant_ref *gmops)
916 {
917 if (notify->action & UNMAP_NOTIFY_SEND_EVENT) {
918 hypervisor_notify_via_evtchn(notify->event_channel_port);
919 }
920 if ((notify->action & UNMAP_NOTIFY_CLEAR_BYTE) == 0) {
921 notify->action = 0;
922 return;
923 }
924 if (va == 0) {
925 struct gnttab_map_grant_ref op;
926 struct gnttab_unmap_grant_ref uop;
927 int i = notify->offset / PAGE_SIZE;
928 int o = notify->offset % PAGE_SIZE;
929 int err;
930 #ifndef XENPV
931 paddr_t base_paddr;
932 base_paddr = xenmem_alloc_pa(PAGE_SIZE, PAGE_SIZE, true);
933 #endif
934
935 KASSERT(gmops != NULL);
936 va = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE,
937 UVM_KMF_VAONLY | UVM_KMF_WAITVA);
938 #ifndef XENPV
939 op.host_addr = base_paddr;
940 #else
941 op.host_addr = va;
942 #endif
943 op.dev_bus_addr = 0;
944 op.ref = gmops[i].ref;
945 op.dom = gmops[i].dom;
946 op.handle = -1;
947 op.flags = GNTMAP_host_map;
948 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
949 if (err == 0 && op.status == GNTST_okay) {
950 #ifndef XENPV
951 pmap_kenter_pa(va, base_paddr,
952 VM_PROT_READ | VM_PROT_WRITE, 0);
953 #endif
954 char *n = (void *)(va + o);
955 *n = 0;
956 #ifndef XENPV
957 pmap_kremove(va, PAGE_SIZE);
958 uop.host_addr = base_paddr;
959 #else
960 uop.host_addr = va;
961 #endif
962 uop.handle = op.handle;
963 uop.dev_bus_addr = 0;
964 (void)HYPERVISOR_grant_table_op(
965 GNTTABOP_unmap_grant_ref, &uop, 1);
966 }
967 uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY);
968 #ifndef XENPV
969 xenmem_free_pa(base_paddr, PAGE_SIZE);
970 #endif
971 } else {
972 KASSERT(gmops == NULL);
973 char *n = (void *)(va + notify->offset);
974 *n = 0;
975 }
976 notify->action = 0;
977 }
978
979 static void
privpgop_detach(struct uvm_object * uobj)980 privpgop_detach(struct uvm_object *uobj)
981 {
982 struct privcmd_object *pobj = (struct privcmd_object *)uobj;
983
984 rw_enter(uobj->vmobjlock, RW_WRITER);
985 KASSERT(uobj->uo_refs > 0);
986 if (uobj->uo_refs > 1) {
987 uobj->uo_refs--;
988 rw_exit(uobj->vmobjlock);
989 return;
990 }
991 rw_exit(uobj->vmobjlock);
992 switch (pobj->type) {
993 case PTYPE_PRIVCMD_PHYSMAP:
994 #ifndef XENPV
995 for (int i = 0; i < pobj->npages; i++) {
996 if (pobj->u.pc.maddr[i] != INVALID_PAGE) {
997 struct xen_remove_from_physmap rm;
998 rm.domid = DOMID_SELF;
999 rm.gpfn = pobj->u.pc.maddr[i] >> PGSHIFT;
1000 HYPERVISOR_memory_op(
1001 XENMEM_remove_from_physmap, &rm);
1002 }
1003 }
1004 xenmem_free_pa(pobj->u.pc.base_paddr, pobj->npages * PAGE_SIZE);
1005 #endif
1006 /* FALLTHROUGH */
1007 case PTYPE_PRIVCMD:
1008 kmem_free(pobj->u.pc.maddr, sizeof(paddr_t) * pobj->npages);
1009 uvm_obj_destroy(uobj, true);
1010 kmem_free(pobj, sizeof(struct privcmd_object));
1011 break;
1012 case PTYPE_GNTDEV_REF:
1013 {
1014 privcmd_notify(&pobj->u.gr.notify, 0, pobj->u.gr.ops);
1015 #ifndef XENPV
1016 KASSERT(pobj->u.gr.base_paddr != 0);
1017 for (int i = 0; i < pobj->npages; i++) {
1018 struct xen_remove_from_physmap rm;
1019 rm.domid = DOMID_SELF;
1020 rm.gpfn = (pobj->u.gr.base_paddr << PGSHIFT) + i;
1021 HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
1022 }
1023 xenmem_free_pa(pobj->u.gr.base_paddr, pobj->npages * PAGE_SIZE);
1024 #endif
1025 kmem_free(pobj, PGO_GNTREF_LEN(pobj->npages));
1026 break;
1027 }
1028 case PTYPE_GNTDEV_ALLOC:
1029 privcmd_notify(&pobj->u.ga.notify, pobj->u.ga.gntva, NULL);
1030 for (int i = 0; i < pobj->npages; ++i) {
1031 xengnt_revoke_access(pobj->u.ga.gref_ids[i]);
1032 }
1033 uvm_km_free(kernel_map, pobj->u.ga.gntva,
1034 PAGE_SIZE * pobj->npages, UVM_KMF_WIRED);
1035 kmem_free(pobj, PGO_GNTA_LEN(pobj->npages));
1036 }
1037 privcmd_nobjects--;
1038 }
1039
1040 static int
privpgop_fault(struct uvm_faultinfo * ufi,vaddr_t vaddr,struct vm_page ** pps,int npages,int centeridx,vm_prot_t access_type,int flags)1041 privpgop_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1042 int npages, int centeridx, vm_prot_t access_type, int flags)
1043 {
1044 struct vm_map_entry *entry = ufi->entry;
1045 struct uvm_object *uobj = entry->object.uvm_obj;
1046 struct privcmd_object *pobj = (struct privcmd_object*)uobj;
1047 int maddr_i, i, error = 0;
1048
1049 /* compute offset from start of map */
1050 maddr_i = (entry->offset + (vaddr - entry->start)) >> PAGE_SHIFT;
1051 if (maddr_i + npages > pobj->npages) {
1052 return EINVAL;
1053 }
1054 for (i = 0; i < npages; i++, maddr_i++, vaddr+= PAGE_SIZE) {
1055 if ((flags & PGO_ALLPAGES) == 0 && i != centeridx)
1056 continue;
1057 if (pps[i] == PGO_DONTCARE)
1058 continue;
1059 switch(pobj->type) {
1060 case PTYPE_PRIVCMD:
1061 case PTYPE_PRIVCMD_PHYSMAP:
1062 {
1063 u_int pm_flags = PMAP_CANFAIL | ufi->entry->protection;
1064 #ifdef XENPV
1065 if (pobj->u.pc.no_translate)
1066 pm_flags |= PMAP_MD_XEN_NOTR;
1067 #endif
1068 if (pobj->u.pc.maddr[maddr_i] == INVALID_PAGE) {
1069 /* This has already been flagged as error. */
1070 error = EFAULT;
1071 goto out;
1072 }
1073 error = pmap_enter_ma(ufi->orig_map->pmap, vaddr,
1074 pobj->u.pc.maddr[maddr_i], 0,
1075 ufi->entry->protection, pm_flags,
1076 pobj->u.pc.domid);
1077 if (error == ENOMEM) {
1078 goto out;
1079 }
1080 if (error) {
1081 pobj->u.pc.maddr[maddr_i] = INVALID_PAGE;
1082 error = EFAULT;
1083 }
1084 break;
1085 }
1086 case PTYPE_GNTDEV_REF:
1087 {
1088 struct pmap *pmap = ufi->orig_map->pmap;
1089 if (pmap_enter_gnt(pmap, vaddr, entry->start, pobj->npages, &pobj->u.gr.ops[0]) != GNTST_okay) {
1090 error = EFAULT;
1091 goto out;
1092 }
1093 break;
1094 }
1095 case PTYPE_GNTDEV_ALLOC:
1096 {
1097 paddr_t pa;
1098 if (!pmap_extract(pmap_kernel(),
1099 pobj->u.ga.gntva + maddr_i * PAGE_SIZE, &pa)) {
1100 error = EFAULT;
1101 goto out;
1102 }
1103 error = pmap_enter(ufi->orig_map->pmap, vaddr, pa,
1104 ufi->entry->protection,
1105 PMAP_CANFAIL | ufi->entry->protection);
1106 if (error == ENOMEM) {
1107 goto out;
1108 }
1109 break;
1110 }
1111 }
1112 if (error) {
1113 /* XXX for proper ptp accountings */
1114 pmap_remove(ufi->orig_map->pmap, vaddr,
1115 vaddr + PAGE_SIZE);
1116 }
1117 }
1118 out:
1119 pmap_update(ufi->orig_map->pmap);
1120 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1121 return error;
1122 }
1123
1124 static int
privcmd_map_obj(struct vm_map * map,vaddr_t start,struct privcmd_object * obj,vm_prot_t prot)1125 privcmd_map_obj(struct vm_map *map, vaddr_t start, struct privcmd_object *obj,
1126 vm_prot_t prot)
1127 {
1128 int error;
1129 uvm_flag_t uvmflag;
1130 vaddr_t newstart = start;
1131 off_t size = ((off_t)obj->npages << PGSHIFT);
1132
1133 privcmd_nobjects++;
1134 uvm_obj_init(&obj->uobj, &privpgops, true, 1);
1135 uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL,
1136 UVM_FLAG_FIXED | UVM_FLAG_UNMAP | UVM_FLAG_NOMERGE);
1137 error = uvm_map(map, &newstart, size, &obj->uobj, 0, 0, uvmflag);
1138
1139 if (error)
1140 obj->uobj.pgops->pgo_detach(&obj->uobj);
1141 return error;
1142 }
1143
1144 static const struct kernfs_fileop privcmd_fileops[] = {
1145 { .kf_fileop = KERNFS_FILEOP_IOCTL, .kf_vop = privcmd_ioctl },
1146 };
1147
1148 void
xenprivcmd_init(void)1149 xenprivcmd_init(void)
1150 {
1151 kernfs_entry_t *dkt;
1152 kfstype kfst;
1153
1154 if (!xendomain_is_privileged())
1155 return;
1156
1157 kfst = KERNFS_ALLOCTYPE(privcmd_fileops);
1158
1159 KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
1160 KERNFS_INITENTRY(dkt, DT_REG, "privcmd", NULL, kfst, VREG,
1161 PRIVCMD_MODE);
1162 kernfs_addentry(kernxen_pkt, dkt);
1163 }
1164