xref: /netbsd-src/sys/arch/xen/xen/privcmd.c (revision aaff4d1b29b0c794ce0de202a3cfb613bbc99991)
1 /* $NetBSD: privcmd.c,v 1.66 2022/09/01 15:32:16 bouyer Exp $ */
2 
3 /*-
4  * Copyright (c) 2004 Christian Limpach.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.66 2022/09/01 15:32:16 bouyer Exp $");
31 
32 #include "opt_xen.h"
33 
34 #include "opt_xen.h"
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/vnode.h>
39 #include <sys/dirent.h>
40 #include <sys/stat.h>
41 #include <sys/proc.h>
42 
43 #include <miscfs/specfs/specdev.h>
44 #include <miscfs/kernfs/kernfs.h>
45 
46 #include <uvm/uvm.h>
47 #include <uvm/uvm_fault.h>
48 #include <uvm/uvm_fault_i.h>
49 
50 #include <xen/kernfs_machdep.h>
51 #include <xen/hypervisor.h>
52 #include <xen/xen.h>
53 #include <xen/xenio.h>
54 #include <xen/xenmem.h>
55 #include <xen/xenpmap.h>
56 #include <xen/granttables.h>
57 
58 #define	PRIVCMD_MODE	(S_IRUSR)
59 
60 /* Magic value is used to mark invalid pages.
61  * This must be a value within the page-offset.
62  * Page-aligned values including 0x0 are used by the guest.
63  */
64 #define INVALID_PAGE	0xfff
65 
66 typedef enum _privcmd_type {
67 	PTYPE_PRIVCMD,
68 	PTYPE_PRIVCMD_PHYSMAP,
69 	PTYPE_GNTDEV_REF,
70 	PTYPE_GNTDEV_ALLOC
71 } privcmd_type;
72 
73 struct privcmd_object_privcmd {
74 	paddr_t base_paddr; /* base address of physical space */
75         paddr_t *maddr; /* array of machine address to map */
76         int     domid;
77         bool    no_translate;
78 };
79 
80 struct privcmd_object_gntref {
81 	paddr_t base_paddr; /* base address of physical space */
82         struct ioctl_gntdev_grant_notify notify;
83 	struct gnttab_map_grant_ref ops[1]; /* variable length */
84 };
85 
86 struct privcmd_object_gntalloc {
87         vaddr_t	gntva;	/* granted area mapped in kernel */
88         uint16_t domid;
89         uint16_t flags;
90         struct ioctl_gntdev_grant_notify notify;
91 	uint32_t gref_ids[1]; /* variable length */
92 };
93 
94 struct privcmd_object {
95 	struct uvm_object uobj;
96 	privcmd_type type;
97 	int	npages;
98 	union {
99 		struct privcmd_object_privcmd pc;
100 		struct privcmd_object_gntref gr;
101 		struct privcmd_object_gntalloc ga;
102 	} u;
103 };
104 
105 #define PGO_GNTREF_LEN(count) \
106     (sizeof(struct privcmd_object) + \
107 	sizeof(struct gnttab_map_grant_ref) * ((count) - 1))
108 
109 #define PGO_GNTA_LEN(count) \
110     (sizeof(struct privcmd_object) + \
111 	sizeof(uint32_t) * ((count) - 1))
112 
113 int privcmd_nobjects = 0;
114 
115 static void privpgop_reference(struct uvm_object *);
116 static void privpgop_detach(struct uvm_object *);
117 static int privpgop_fault(struct uvm_faultinfo *, vaddr_t , struct vm_page **,
118 			  int, int, vm_prot_t, int);
119 static int privcmd_map_obj(struct vm_map *, vaddr_t,
120 			   struct privcmd_object *, vm_prot_t);
121 
122 
123 static int
privcmd_xen2bsd_errno(int error)124 privcmd_xen2bsd_errno(int error)
125 {
126 	/*
127 	 * Xen uses System V error codes.
128 	 * In order to keep bloat as minimal as possible,
129 	 * only convert what really impact us.
130 	 */
131 
132 	switch(-error) {
133 	case 0:
134 		return 0;
135 	case 1:
136 		return EPERM;
137 	case 2:
138 		return ENOENT;
139 	case 3:
140 		return ESRCH;
141 	case 4:
142 		return EINTR;
143 	case 5:
144 		return EIO;
145 	case 6:
146 		return ENXIO;
147 	case 7:
148 		return E2BIG;
149 	case 8:
150 		return ENOEXEC;
151 	case 9:
152 		return EBADF;
153 	case 10:
154 		return ECHILD;
155 	case 11:
156 		return EAGAIN;
157 	case 12:
158 		return ENOMEM;
159 	case 13:
160 		return EACCES;
161 	case 14:
162 		return EFAULT;
163 	case 15:
164 		return ENOTBLK;
165 	case 16:
166 		return EBUSY;
167 	case 17:
168 		return EEXIST;
169 	case 18:
170 		return EXDEV;
171 	case 19:
172 		return ENODEV;
173 	case 20:
174 		return ENOTDIR;
175 	case 21:
176 		return EISDIR;
177 	case 22:
178 		return EINVAL;
179 	case 23:
180 		return ENFILE;
181 	case 24:
182 		return EMFILE;
183 	case 25:
184 		return ENOTTY;
185 	case 26:
186 		return ETXTBSY;
187 	case 27:
188 		return EFBIG;
189 	case 28:
190 		return ENOSPC;
191 	case 29:
192 		return ESPIPE;
193 	case 30:
194 		return EROFS;
195 	case 31:
196 		return EMLINK;
197 	case 32:
198 		return EPIPE;
199 	case 33:
200 		return EDOM;
201 	case 34:
202 		return ERANGE;
203 	case 35:
204 		return EDEADLK;
205 	case 36:
206 		return ENAMETOOLONG;
207 	case 37:
208 		return ENOLCK;
209 	case 38:
210 		return ENOSYS;
211 	case 39:
212 		return ENOTEMPTY;
213 	case 40:
214 		return ELOOP;
215 	case 42:
216 		return ENOMSG;
217 	case 43:
218 		return EIDRM;
219 	case 60:
220 		return ENOSTR;
221 	case 61:
222 		return ENODATA;
223 	case 62:
224 		return ETIME;
225 	case 63:
226 		return ENOSR;
227 	case 66:
228 		return EREMOTE;
229 	case 74:
230 		return EBADMSG;
231 	case 75:
232 		return EOVERFLOW;
233 	case 84:
234 		return EILSEQ;
235 	case 87:
236 		return EUSERS;
237 	case 88:
238 		return ENOTSOCK;
239 	case 89:
240 		return EDESTADDRREQ;
241 	case 90:
242 		return EMSGSIZE;
243 	case 91:
244 		return EPROTOTYPE;
245 	case 92:
246 		return ENOPROTOOPT;
247 	case 93:
248 		return EPROTONOSUPPORT;
249 	case 94:
250 		return ESOCKTNOSUPPORT;
251 	case 95:
252 		return EOPNOTSUPP;
253 	case 96:
254 		return EPFNOSUPPORT;
255 	case 97:
256 		return EAFNOSUPPORT;
257 	case 98:
258 		return EADDRINUSE;
259 	case 99:
260 		return EADDRNOTAVAIL;
261 	case 100:
262 		return ENETDOWN;
263 	case 101:
264 		return ENETUNREACH;
265 	case 102:
266 		return ENETRESET;
267 	case 103:
268 		return ECONNABORTED;
269 	case 104:
270 		return ECONNRESET;
271 	case 105:
272 		return ENOBUFS;
273 	case 106:
274 		return EISCONN;
275 	case 107:
276 		return ENOTCONN;
277 	case 108:
278 		return ESHUTDOWN;
279 	case 109:
280 		return ETOOMANYREFS;
281 	case 110:
282 		return ETIMEDOUT;
283 	case 111:
284 		return ECONNREFUSED;
285 	case 112:
286 		return EHOSTDOWN;
287 	case 113:
288 		return EHOSTUNREACH;
289 	case 114:
290 		return EALREADY;
291 	case 115:
292 		return EINPROGRESS;
293 	case 116:
294 		return ESTALE;
295 	case 122:
296 		return EDQUOT;
297 	default:
298 		printf("unknown xen error code %d\n", -error);
299 		return -error;
300 	}
301 }
302 
303 static vm_prot_t
privcmd_get_map_prot(struct vm_map * map,vaddr_t start,off_t size)304 privcmd_get_map_prot(struct vm_map *map, vaddr_t start, off_t size)
305 {
306 	vm_prot_t prot;
307 
308 	vm_map_lock_read(map);
309 	/* get protections. This also check for validity of mapping */
310 	if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE))
311 		prot = VM_PROT_READ | VM_PROT_WRITE;
312 	else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ))
313 		prot = VM_PROT_READ;
314 	else {
315 		printf("privcmd_get_map_prot 0x%lx -> 0x%lx "
316 		    "failed\n",
317 		    start, (unsigned long)(start + size - 1));
318 		prot = UVM_PROT_NONE;
319 	}
320 	vm_map_unlock_read(map);
321 	return prot;
322 }
323 
324 static int
privcmd_mmap(struct vop_ioctl_args * ap)325 privcmd_mmap(struct vop_ioctl_args *ap)
326 {
327 #ifndef XENPV
328 	printf("IOCTL_PRIVCMD_MMAP not supported\n");
329 	return EINVAL;
330 #else
331 	int i, j;
332 	privcmd_mmap_t *mcmd = ap->a_data;
333 	privcmd_mmap_entry_t mentry;
334 	vaddr_t va;
335 	paddr_t ma;
336 	struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map;
337 	paddr_t *maddr;
338 	struct privcmd_object *obj;
339 	vm_prot_t prot;
340 	int error;
341 
342 	for (i = 0; i < mcmd->num; i++) {
343 		error = copyin(&mcmd->entry[i], &mentry, sizeof(mentry));
344 		if (error)
345 			return EINVAL;
346 		if (mentry.npages == 0)
347 			return EINVAL;
348 		if (mentry.va > VM_MAXUSER_ADDRESS)
349 			return EINVAL;
350 		va = mentry.va & ~PAGE_MASK;
351 		prot = privcmd_get_map_prot(vmm, va, mentry.npages * PAGE_SIZE);
352 		if (prot == UVM_PROT_NONE)
353 			return EINVAL;
354 		maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages,
355 		    KM_SLEEP);
356 		ma = ((paddr_t)mentry.mfn) <<  PGSHIFT;
357 		for (j = 0; j < mentry.npages; j++) {
358 			maddr[j] = ma;
359 			ma += PAGE_SIZE;
360 		}
361 		obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
362 		obj->type = PTYPE_PRIVCMD;
363 		obj->u.pc.maddr = maddr;
364 		obj->u.pc.no_translate = false;
365 		obj->npages = mentry.npages;
366 		obj->u.pc.domid = mcmd->dom;
367 		error  = privcmd_map_obj(vmm, va, obj, prot);
368 		if (error)
369 			return error;
370 	}
371 	return 0;
372 #endif
373 }
374 
375 static int
privcmd_mmapbatch(struct vop_ioctl_args * ap)376 privcmd_mmapbatch(struct vop_ioctl_args *ap)
377 {
378 #ifndef XENPV
379 	printf("IOCTL_PRIVCMD_MMAPBATCH not supported\n");
380 	return EINVAL;
381 #else
382 	int i;
383 	privcmd_mmapbatch_t* pmb = ap->a_data;
384 	vaddr_t va0;
385 	u_long mfn;
386 	paddr_t ma;
387 	struct vm_map *vmm;
388 	vaddr_t trymap;
389 	paddr_t *maddr;
390 	struct privcmd_object *obj;
391 	vm_prot_t prot;
392 	int error;
393 
394 	vmm = &curlwp->l_proc->p_vmspace->vm_map;
395 	va0 = pmb->addr & ~PAGE_MASK;
396 
397 	if (pmb->num == 0)
398 		return EINVAL;
399 	if (va0 > VM_MAXUSER_ADDRESS)
400 		return EINVAL;
401 	if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num)
402 		return EINVAL;
403 
404 	prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
405 	if (prot == UVM_PROT_NONE)
406 		return EINVAL;
407 
408 	maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP);
409 	/* get a page of KVA to check mappins */
410 	trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE,
411 	    UVM_KMF_VAONLY);
412 	if (trymap == 0) {
413 		kmem_free(maddr, sizeof(paddr_t) * pmb->num);
414 		return ENOMEM;
415 	}
416 
417 	obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
418 	obj->type = PTYPE_PRIVCMD;
419 	obj->u.pc.maddr = maddr;
420 	obj->u.pc.no_translate = false;
421 	obj->npages = pmb->num;
422 	obj->u.pc.domid = pmb->dom;
423 
424 	for(i = 0; i < pmb->num; ++i) {
425 		error = copyin(&pmb->arr[i], &mfn, sizeof(mfn));
426 		if (error != 0) {
427 			/* XXX: mappings */
428 			pmap_update(pmap_kernel());
429 			kmem_free(maddr, sizeof(paddr_t) * pmb->num);
430 			uvm_km_free(kernel_map, trymap, PAGE_SIZE,
431 			    UVM_KMF_VAONLY);
432 			return error;
433 		}
434 		ma = ((paddr_t)mfn) << PGSHIFT;
435 		if ((error = pmap_enter_ma(pmap_kernel(), trymap, ma, 0,
436 		    prot, PMAP_CANFAIL | prot, pmb->dom))) {
437 			mfn |= 0xF0000000;
438 			copyout(&mfn, &pmb->arr[i], sizeof(mfn));
439 			maddr[i] = INVALID_PAGE;
440 		} else {
441 			pmap_remove(pmap_kernel(), trymap,
442 			    trymap + PAGE_SIZE);
443 			maddr[i] = ma;
444 		}
445 	}
446 	pmap_update(pmap_kernel());
447 	uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY);
448 
449 	error = privcmd_map_obj(vmm, va0, obj, prot);
450 
451 	return error;
452 #endif
453 }
454 
455 static int
privcmd_mmapbatch_v2(struct vop_ioctl_args * ap)456 privcmd_mmapbatch_v2(struct vop_ioctl_args *ap)
457 {
458 	int i;
459 	privcmd_mmapbatch_v2_t* pmb = ap->a_data;
460 	vaddr_t va0;
461 	u_long mfn;
462 	struct vm_map *vmm;
463 	paddr_t *maddr;
464 	struct privcmd_object *obj;
465 	vm_prot_t prot;
466 	int error;
467 	paddr_t base_paddr = 0;
468 
469 	vmm = &curlwp->l_proc->p_vmspace->vm_map;
470 	va0 = pmb->addr & ~PAGE_MASK;
471 
472 	if (pmb->num == 0)
473 		return EINVAL;
474 	if (va0 > VM_MAXUSER_ADDRESS)
475 		return EINVAL;
476 	if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num)
477 		return EINVAL;
478 
479 	prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
480 	if (prot == UVM_PROT_NONE)
481 		return EINVAL;
482 
483 #ifndef XENPV
484 	KASSERT(xen_feature(XENFEAT_auto_translated_physmap));
485 	base_paddr = xenmem_alloc_pa(pmb->num * PAGE_SIZE, PAGE_SIZE, true);
486 	KASSERT(base_paddr != 0);
487 #endif
488 	maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP);
489 	obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
490 	obj->type = PTYPE_PRIVCMD_PHYSMAP;
491 	obj->u.pc.maddr = maddr;
492 	obj->u.pc.base_paddr = base_paddr;
493 	obj->u.pc.no_translate = false;
494 	obj->npages = pmb->num;
495 	obj->u.pc.domid = pmb->dom;
496 
497 	for(i = 0; i < pmb->num; ++i) {
498 		error = copyin(&pmb->arr[i], &mfn, sizeof(mfn));
499 		if (error != 0) {
500 			kmem_free(maddr, sizeof(paddr_t) * pmb->num);
501 			kmem_free(obj, sizeof(*obj));
502 #ifndef XENPV
503 			xenmem_free_pa(base_paddr, pmb->num * PAGE_SIZE);
504 #endif
505 			return error;
506 		}
507 #ifdef XENPV
508 		maddr[i] = ((paddr_t)mfn) << PGSHIFT;
509 #else
510 		maddr[i] = mfn; /* TMP argument for XENMEM_add_to_physmap */
511 #endif
512 
513 	}
514 	error = privcmd_map_obj(vmm, va0, obj, prot);
515 	if (error)
516 		return error;
517 
518 	/*
519 	 * map the range in user process now.
520 	 * If Xenr return -ENOENT, retry (paging in progress)
521 	 */
522 	for(i = 0; i < pmb->num; i++, va0 += PAGE_SIZE) {
523 		int err, cerr;
524 #ifdef XENPV
525 		for (int j = 0 ; j < 10; j++) {
526 			err = pmap_enter_ma(vmm->pmap, va0, maddr[i], 0,
527 			    prot, PMAP_CANFAIL | prot,
528 			    pmb->dom);
529 			if (err != -2) /* Xen ENOENT */
530 				break;
531 			if (kpause("xnoent", 1, mstohz(100), NULL))
532 				break;
533 		}
534 		if (err) {
535 			maddr[i] = INVALID_PAGE;
536 		}
537 #else /* XENPV */
538 		xen_add_to_physmap_batch_t add;
539 		u_long idx;
540 		xen_pfn_t gpfn;
541 		int err2;
542 		memset(&add, 0, sizeof(add));
543 
544 		add.domid = DOMID_SELF;
545 		add.space = XENMAPSPACE_gmfn_foreign;
546 		add.size = 1;
547 		add.foreign_domid = pmb->dom;
548 		idx = maddr[i];
549 		set_xen_guest_handle(add.idxs, &idx);
550 		maddr[i] = INVALID_PAGE;
551 		gpfn = (base_paddr >> PGSHIFT) + i;
552 		set_xen_guest_handle(add.gpfns, &gpfn);
553 		err2 = 0;
554 		set_xen_guest_handle(add.errs, &err2);
555 		err = HYPERVISOR_memory_op(XENMEM_add_to_physmap_batch, &add);
556 		if (err < 0) {
557 			printf("privcmd_mmapbatch_v2: XENMEM_add_to_physmap_batch failed %d\n", err);
558 			privpgop_detach(&obj->uobj);
559 			return privcmd_xen2bsd_errno(err);
560 		}
561 		err = err2;
562 		if (err == 0)
563 			maddr[i] = base_paddr + i * PAGE_SIZE;
564 #endif /* XENPV */
565 
566 		cerr = copyout(&err, &pmb->err[i], sizeof(pmb->err[i]));
567 		if (cerr) {
568 			privpgop_detach(&obj->uobj);
569 			return cerr;
570 		}
571 	}
572 	return 0;
573 }
574 
575 static int
privcmd_mmap_resource(struct vop_ioctl_args * ap)576 privcmd_mmap_resource(struct vop_ioctl_args *ap)
577 {
578 	int i;
579 	privcmd_mmap_resource_t* pmr = ap->a_data;
580 	vaddr_t va0;
581 	struct vm_map *vmm;
582 	struct privcmd_object *obj;
583 	vm_prot_t prot;
584 	int error;
585 	struct xen_mem_acquire_resource op;
586 	xen_pfn_t *pfns;
587 	paddr_t *maddr;
588 	paddr_t base_paddr = 0;
589 
590 	vmm = &curlwp->l_proc->p_vmspace->vm_map;
591 	va0 = pmr->addr & ~PAGE_MASK;
592 
593 	if (pmr->num == 0)
594 		return EINVAL;
595 	if (va0 > VM_MAXUSER_ADDRESS)
596 		return EINVAL;
597 	if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmr->num)
598 		return EINVAL;
599 
600 	prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
601 	if (prot == UVM_PROT_NONE)
602 		return EINVAL;
603 
604 	pfns = kmem_alloc(sizeof(xen_pfn_t) * pmr->num, KM_SLEEP);
605 #ifndef XENPV
606 	KASSERT(xen_feature(XENFEAT_auto_translated_physmap));
607 	base_paddr = xenmem_alloc_pa(pmr->num * PAGE_SIZE, PAGE_SIZE, true);
608 	KASSERT(base_paddr != 0);
609 	for (i = 0; i < pmr->num; i++) {
610 		pfns[i] = (base_paddr >> PGSHIFT) + i;
611 	}
612 #else
613 	KASSERT(!xen_feature(XENFEAT_auto_translated_physmap));
614 #endif
615 
616 	memset(&op, 0, sizeof(op));
617 	op.domid = pmr->dom;
618 	op.type = pmr->type;
619 	op.id = pmr->id;
620 	op.frame = pmr->idx;
621 	op.nr_frames = pmr->num;
622 	set_xen_guest_handle(op.frame_list, pfns);
623 
624 	error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &op);
625 	if (error) {
626 		printf("%s: XENMEM_acquire_resource failed: %d\n",
627 		    __func__, error);
628 		return privcmd_xen2bsd_errno(error);
629 	}
630 	maddr = kmem_alloc(sizeof(paddr_t) * pmr->num, KM_SLEEP);
631 	for (i = 0; i < pmr->num; i++) {
632 		maddr[i] = pfns[i] << PGSHIFT;
633 	}
634 	kmem_free(pfns, sizeof(xen_pfn_t) * pmr->num);
635 
636 	obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
637 	obj->type = PTYPE_PRIVCMD_PHYSMAP;
638 	obj->u.pc.base_paddr = base_paddr;
639 	obj->u.pc.maddr = maddr;
640 	obj->u.pc.no_translate = true;
641 	obj->npages = pmr->num;
642 	obj->u.pc.domid = (op.flags & XENMEM_rsrc_acq_caller_owned) ?
643 	    DOMID_SELF : pmr->dom;
644 
645 	error = privcmd_map_obj(vmm, va0, obj, prot);
646 	return error;
647 }
648 
649 static int
privcmd_map_gref(struct vop_ioctl_args * ap)650 privcmd_map_gref(struct vop_ioctl_args *ap)
651 {
652 	struct ioctl_gntdev_mmap_grant_ref *mgr = ap->a_data;
653 	struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map;
654 	struct privcmd_object *obj;
655 	vaddr_t va0 = (vaddr_t)mgr->va & ~PAGE_MASK;
656 	vm_prot_t prot;
657 	int error;
658 
659 	if (mgr->count == 0)
660 		return EINVAL;
661 	if (va0 > VM_MAXUSER_ADDRESS)
662 		return EINVAL;
663 	if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mgr->count)
664 		return EINVAL;
665 	if (mgr->notify.offset < 0 || mgr->notify.offset > mgr->count)
666 		return EINVAL;
667 
668 	prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
669 	if (prot == UVM_PROT_NONE)
670 		return EINVAL;
671 
672 	obj = kmem_alloc(PGO_GNTREF_LEN(mgr->count), KM_SLEEP);
673 
674 	obj->type  = PTYPE_GNTDEV_REF;
675 	obj->npages = mgr->count;
676 	memcpy(&obj->u.gr.notify, &mgr->notify,
677 	    sizeof(obj->u.gr.notify));
678 #ifndef XENPV
679 	KASSERT(xen_feature(XENFEAT_auto_translated_physmap));
680 	obj->u.gr.base_paddr = xenmem_alloc_pa(obj->npages * PAGE_SIZE,
681 	    PAGE_SIZE, true);
682 	KASSERT(obj->u.gr.base_paddr != 0);
683 #else
684 	obj->u.gr.base_paddr = 0;
685 #endif /* !XENPV */
686 
687 	for (int i = 0; i < obj->npages; ++i) {
688 		struct ioctl_gntdev_grant_ref gref;
689 		error = copyin(&mgr->refs[i], &gref, sizeof(gref));
690 		if (error != 0) {
691 			goto err1;
692 		}
693 #ifdef XENPV
694 		obj->u.gr.ops[i].host_addr = 0;
695 		obj->u.gr.ops[i].flags = GNTMAP_host_map |
696 		    GNTMAP_application_map | GNTMAP_contains_pte;
697 #else /* XENPV */
698 		obj->u.gr.ops[i].host_addr =
699 		    obj->u.gr.base_paddr + PAGE_SIZE * i;
700 		obj->u.gr.ops[i].flags = GNTMAP_host_map;
701 #endif /* XENPV */
702 		obj->u.gr.ops[i].dev_bus_addr = 0;
703 		obj->u.gr.ops[i].ref = gref.ref;
704 		obj->u.gr.ops[i].dom = gref.domid;
705 		obj->u.gr.ops[i].handle = -1;
706 		if (prot == UVM_PROT_READ)
707 			obj->u.gr.ops[i].flags |= GNTMAP_readonly;
708 	}
709 	error = privcmd_map_obj(vmm, va0, obj, prot);
710 	return error;
711 err1:
712 #ifndef XENPV
713 	xenmem_free_pa(obj->u.gr.base_paddr, obj->npages * PAGE_SIZE);
714 #endif
715 	kmem_free(obj, PGO_GNTREF_LEN(obj->npages));
716 	return error;
717 }
718 
719 static int
privcmd_alloc_gref(struct vop_ioctl_args * ap)720 privcmd_alloc_gref(struct vop_ioctl_args *ap)
721 {
722 	struct ioctl_gntdev_alloc_grant_ref *mga = ap->a_data;
723 	struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map;
724 	struct privcmd_object *obj;
725 	vaddr_t va0 = (vaddr_t)mga->va & ~PAGE_MASK;
726 	vm_prot_t prot;
727 	int error, ret;
728 
729 	if (mga->count == 0)
730 		return EINVAL;
731 	if (va0 > VM_MAXUSER_ADDRESS)
732 		return EINVAL;
733 	if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mga->count)
734 		return EINVAL;
735 	if (mga->notify.offset < 0 || mga->notify.offset > mga->count)
736 		return EINVAL;
737 
738 	prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE);
739 	if (prot == UVM_PROT_NONE)
740 		return EINVAL;
741 
742 	obj = kmem_alloc(PGO_GNTA_LEN(mga->count), KM_SLEEP);
743 
744 	obj->type  = PTYPE_GNTDEV_ALLOC;
745 	obj->npages = mga->count;
746 	obj->u.ga.domid = mga->domid;
747 	memcpy(&obj->u.ga.notify, &mga->notify,
748 	    sizeof(obj->u.ga.notify));
749 	obj->u.ga.gntva = uvm_km_alloc(kernel_map,
750 	    PAGE_SIZE * obj->npages, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
751 	if (obj->u.ga.gntva == 0) {
752 		error = ENOMEM;
753 		goto err1;
754 	}
755 
756 	for (int i = 0; i < obj->npages; ++i) {
757 		paddr_t ma;
758 		vaddr_t va = obj->u.ga.gntva + i * PAGE_SIZE;
759 		grant_ref_t id;
760 		bool ro = ((mga->flags & GNTDEV_ALLOC_FLAG_WRITABLE) == 0);
761 		(void)pmap_extract_ma(pmap_kernel(), va, &ma);
762 		if ((ret = xengnt_grant_access(mga->domid, ma, ro, &id)) != 0) {
763 			printf("%s: xengnt_grant_access failed: %d\n",
764 			    __func__, ret);
765 			for (int j = 0; j < i; j++) {
766 				xengnt_revoke_access(obj->u.ga.gref_ids[j]);
767 				error = ret;
768 				goto err2;
769 			}
770 		}
771 		obj->u.ga.gref_ids[i] = id;
772 	}
773 
774 	error = copyout(&obj->u.ga.gref_ids[0], mga->gref_ids,
775 	    sizeof(uint32_t) * obj->npages);
776 	if (error) {
777 		for (int i = 0; i < obj->npages; ++i) {
778 			xengnt_revoke_access(obj->u.ga.gref_ids[i]);
779 		}
780 		goto err2;
781 	}
782 
783 	error = privcmd_map_obj(vmm, va0, obj, prot);
784 	return error;
785 
786 err2:
787 	uvm_km_free(kernel_map, obj->u.ga.gntva,
788 	    PAGE_SIZE * obj->npages, UVM_KMF_WIRED);
789 err1:
790 	kmem_free(obj, PGO_GNTA_LEN(obj->npages));
791 	return error;
792 }
793 
794 static int
privcmd_ioctl(void * v)795 privcmd_ioctl(void *v)
796 {
797 	struct vop_ioctl_args /* {
798 		const struct vnodeop_desc *a_desc;
799 		struct vnode *a_vp;
800 		u_long a_command;
801 		void *a_data;
802 		int a_fflag;
803 		kauth_cred_t a_cred;
804 	} */ *ap = v;
805 	int error = 0;
806 
807 	switch (ap->a_command) {
808 	case IOCTL_PRIVCMD_HYPERCALL:
809 	case IOCTL_PRIVCMD_HYPERCALL_OLD:
810 	/*
811 	 * oprivcmd_hypercall_t is privcmd_hypercall_t without the last entry
812 	 */
813 	{
814 		privcmd_hypercall_t *hc = ap->a_data;
815 		if (hc->op >= (PAGE_SIZE >> 5))
816 			return EINVAL;
817 		error = -EOPNOTSUPP;
818 #if defined(__i386__)
819 		__asm volatile (
820 			"pushl %%ebx; pushl %%ecx; pushl %%edx;"
821 			"pushl %%esi; pushl %%edi; "
822 			"movl  4(%%eax),%%ebx ;"
823 			"movl  8(%%eax),%%ecx ;"
824 			"movl 12(%%eax),%%edx ;"
825 			"movl 16(%%eax),%%esi ;"
826 			"movl 20(%%eax),%%edi ;"
827 			"movl   (%%eax),%%eax ;"
828 			"shll $5,%%eax ;"
829 			"addl $hypercall_page,%%eax ;"
830 			"call *%%eax ;"
831 			"popl %%edi; popl %%esi; popl %%edx;"
832 			"popl %%ecx; popl %%ebx"
833 			: "=a" (error) : "0" (ap->a_data) : "memory" );
834 #endif /* __i386__ */
835 #if defined(__x86_64__)
836 #ifndef XENPV
837 		/* hypervisor can't access user memory if SMAP is enabled */
838 		smap_disable();
839 #endif
840 		{
841 		long i1, i2, i3;
842 		__asm volatile (
843 			"movq %8,%%r10; movq %9,%%r8;"
844 			"shll $5,%%eax ;"
845 			"addq $hypercall_page,%%rax ;"
846 			"call *%%rax"
847 			: "=a" (error), "=D" (i1),
848 			  "=S" (i2), "=d" (i3)
849 			: "0" ((unsigned int)hc->op),
850 			  "1" (hc->arg[0]),
851 			  "2" (hc->arg[1]),
852 			  "3" (hc->arg[2]),
853 			  "g" (hc->arg[3]),
854 			  "g" (hc->arg[4])
855 			: "r8", "r10", "memory" );
856 		}
857 #ifndef XENPV
858 		smap_enable();
859 #endif
860 #endif /* __x86_64__ */
861 		if (ap->a_command == IOCTL_PRIVCMD_HYPERCALL) {
862 			if (error >= 0) {
863 				hc->retval = error;
864 				error = 0;
865 			} else {
866 				/* error occurred, return the errno */
867 				error = privcmd_xen2bsd_errno(error);
868 				hc->retval = 0;
869 			}
870 		} else {
871 			error = privcmd_xen2bsd_errno(error);
872 		}
873 		break;
874 	}
875 	case IOCTL_PRIVCMD_MMAP:
876 		return privcmd_mmap(ap);
877 
878 	case IOCTL_PRIVCMD_MMAPBATCH:
879 		return privcmd_mmapbatch(ap);
880 
881 	case IOCTL_PRIVCMD_MMAPBATCH_V2:
882 		return privcmd_mmapbatch_v2(ap);
883 
884 	case IOCTL_PRIVCMD_MMAP_RESOURCE:
885 		return privcmd_mmap_resource(ap);
886 
887 	case IOCTL_GNTDEV_MMAP_GRANT_REF:
888 		return privcmd_map_gref(ap);
889 
890 	case IOCTL_GNTDEV_ALLOC_GRANT_REF:
891 		return privcmd_alloc_gref(ap);
892 	default:
893 		error = EINVAL;
894 	}
895 
896 	return error;
897 }
898 
899 static const struct uvm_pagerops privpgops = {
900   .pgo_reference = privpgop_reference,
901   .pgo_detach = privpgop_detach,
902   .pgo_fault = privpgop_fault,
903 };
904 
905 static void
privpgop_reference(struct uvm_object * uobj)906 privpgop_reference(struct uvm_object *uobj)
907 {
908 	rw_enter(uobj->vmobjlock, RW_WRITER);
909 	uobj->uo_refs++;
910 	rw_exit(uobj->vmobjlock);
911 }
912 
913 static void
privcmd_notify(struct ioctl_gntdev_grant_notify * notify,vaddr_t va,struct gnttab_map_grant_ref * gmops)914 privcmd_notify(struct ioctl_gntdev_grant_notify *notify, vaddr_t va,
915     struct gnttab_map_grant_ref *gmops)
916 {
917 	if (notify->action & UNMAP_NOTIFY_SEND_EVENT) {
918 		hypervisor_notify_via_evtchn(notify->event_channel_port);
919 	}
920 	if ((notify->action & UNMAP_NOTIFY_CLEAR_BYTE) == 0) {
921 		notify->action = 0;
922 		return;
923 	}
924 	if (va == 0) {
925 		struct gnttab_map_grant_ref op;
926 		struct gnttab_unmap_grant_ref uop;
927 		int i = notify->offset / PAGE_SIZE;
928 		int o = notify->offset % PAGE_SIZE;
929 		int err;
930 #ifndef XENPV
931 		paddr_t base_paddr;
932 		base_paddr = xenmem_alloc_pa(PAGE_SIZE, PAGE_SIZE, true);
933 #endif
934 
935 		KASSERT(gmops != NULL);
936 		va = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE,
937 		    UVM_KMF_VAONLY | UVM_KMF_WAITVA);
938 #ifndef XENPV
939 		op.host_addr = base_paddr;
940 #else
941 		op.host_addr = va;
942 #endif
943 		op.dev_bus_addr = 0;
944 		op.ref = gmops[i].ref;
945 		op.dom = gmops[i].dom;
946 		op.handle = -1;
947 		op.flags = GNTMAP_host_map;
948 		err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
949 		if (err == 0 && op.status == GNTST_okay) {
950 #ifndef XENPV
951 			pmap_kenter_pa(va, base_paddr,
952 			    VM_PROT_READ | VM_PROT_WRITE, 0);
953 #endif
954 			char *n = (void *)(va + o);
955 			*n = 0;
956 #ifndef XENPV
957 			pmap_kremove(va, PAGE_SIZE);
958 			uop.host_addr = base_paddr;
959 #else
960 			uop.host_addr = va;
961 #endif
962 			uop.handle = op.handle;
963 			uop.dev_bus_addr = 0;
964 			(void)HYPERVISOR_grant_table_op(
965 			    GNTTABOP_unmap_grant_ref, &uop, 1);
966 		}
967 		uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY);
968 #ifndef XENPV
969 		xenmem_free_pa(base_paddr, PAGE_SIZE);
970 #endif
971 	} else {
972 		KASSERT(gmops == NULL);
973 		char *n = (void *)(va + notify->offset);
974 		*n = 0;
975 	}
976 	notify->action = 0;
977 }
978 
979 static void
privpgop_detach(struct uvm_object * uobj)980 privpgop_detach(struct uvm_object *uobj)
981 {
982 	struct privcmd_object *pobj = (struct privcmd_object *)uobj;
983 
984 	rw_enter(uobj->vmobjlock, RW_WRITER);
985 	KASSERT(uobj->uo_refs > 0);
986 	if (uobj->uo_refs > 1) {
987 		uobj->uo_refs--;
988 		rw_exit(uobj->vmobjlock);
989 		return;
990 	}
991 	rw_exit(uobj->vmobjlock);
992 	switch (pobj->type) {
993 	case PTYPE_PRIVCMD_PHYSMAP:
994 #ifndef XENPV
995 		for (int i = 0; i < pobj->npages; i++) {
996 			if (pobj->u.pc.maddr[i] != INVALID_PAGE) {
997 				struct xen_remove_from_physmap rm;
998 				rm.domid = DOMID_SELF;
999 				rm.gpfn = pobj->u.pc.maddr[i] >> PGSHIFT;
1000 				HYPERVISOR_memory_op(
1001 				    XENMEM_remove_from_physmap, &rm);
1002 			}
1003 		}
1004 		xenmem_free_pa(pobj->u.pc.base_paddr, pobj->npages * PAGE_SIZE);
1005 #endif
1006 		/* FALLTHROUGH */
1007 	case PTYPE_PRIVCMD:
1008 		kmem_free(pobj->u.pc.maddr, sizeof(paddr_t) * pobj->npages);
1009 		uvm_obj_destroy(uobj, true);
1010 		kmem_free(pobj, sizeof(struct privcmd_object));
1011 		break;
1012 	case PTYPE_GNTDEV_REF:
1013 	{
1014 		privcmd_notify(&pobj->u.gr.notify, 0, pobj->u.gr.ops);
1015 #ifndef XENPV
1016 		KASSERT(pobj->u.gr.base_paddr != 0);
1017 		for (int i = 0; i < pobj->npages; i++) {
1018 			struct xen_remove_from_physmap rm;
1019 			rm.domid = DOMID_SELF;
1020 			rm.gpfn = (pobj->u.gr.base_paddr << PGSHIFT) + i;
1021 			HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
1022 		}
1023 		xenmem_free_pa(pobj->u.gr.base_paddr, pobj->npages * PAGE_SIZE);
1024 #endif
1025 		kmem_free(pobj, PGO_GNTREF_LEN(pobj->npages));
1026 		break;
1027 	}
1028 	case PTYPE_GNTDEV_ALLOC:
1029 		privcmd_notify(&pobj->u.ga.notify, pobj->u.ga.gntva, NULL);
1030 		for (int i = 0; i < pobj->npages; ++i) {
1031 			xengnt_revoke_access(pobj->u.ga.gref_ids[i]);
1032 		}
1033 		uvm_km_free(kernel_map, pobj->u.ga.gntva,
1034 		    PAGE_SIZE * pobj->npages, UVM_KMF_WIRED);
1035 		kmem_free(pobj, PGO_GNTA_LEN(pobj->npages));
1036 	}
1037 	privcmd_nobjects--;
1038 }
1039 
1040 static int
privpgop_fault(struct uvm_faultinfo * ufi,vaddr_t vaddr,struct vm_page ** pps,int npages,int centeridx,vm_prot_t access_type,int flags)1041 privpgop_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1042     int npages, int centeridx, vm_prot_t access_type, int flags)
1043 {
1044 	struct vm_map_entry *entry = ufi->entry;
1045 	struct uvm_object *uobj = entry->object.uvm_obj;
1046 	struct privcmd_object *pobj = (struct privcmd_object*)uobj;
1047 	int maddr_i, i, error = 0;
1048 
1049 	/* compute offset from start of map */
1050 	maddr_i = (entry->offset + (vaddr - entry->start)) >> PAGE_SHIFT;
1051 	if (maddr_i + npages > pobj->npages) {
1052 		return EINVAL;
1053 	}
1054 	for (i = 0; i < npages; i++, maddr_i++, vaddr+= PAGE_SIZE) {
1055 		if ((flags & PGO_ALLPAGES) == 0 && i != centeridx)
1056 			continue;
1057 		if (pps[i] == PGO_DONTCARE)
1058 			continue;
1059 		switch(pobj->type) {
1060 		case PTYPE_PRIVCMD:
1061 		case PTYPE_PRIVCMD_PHYSMAP:
1062 		{
1063 			u_int pm_flags = PMAP_CANFAIL | ufi->entry->protection;
1064 #ifdef XENPV
1065 			if (pobj->u.pc.no_translate)
1066 				pm_flags |= PMAP_MD_XEN_NOTR;
1067 #endif
1068 			if (pobj->u.pc.maddr[maddr_i] == INVALID_PAGE) {
1069 				/* This has already been flagged as error. */
1070 				error = EFAULT;
1071 				goto out;
1072 			}
1073 			error = pmap_enter_ma(ufi->orig_map->pmap, vaddr,
1074 			    pobj->u.pc.maddr[maddr_i], 0,
1075 			    ufi->entry->protection, pm_flags,
1076 			    pobj->u.pc.domid);
1077 			if (error == ENOMEM) {
1078 				goto out;
1079 			}
1080 			if (error) {
1081 				pobj->u.pc.maddr[maddr_i] = INVALID_PAGE;
1082 				error = EFAULT;
1083 			}
1084 			break;
1085 		}
1086 		case PTYPE_GNTDEV_REF:
1087 		{
1088 			struct pmap *pmap = ufi->orig_map->pmap;
1089 			if (pmap_enter_gnt(pmap, vaddr, entry->start, pobj->npages, &pobj->u.gr.ops[0]) != GNTST_okay) {
1090 				error = EFAULT;
1091 				goto out;
1092 			}
1093 			break;
1094 		}
1095 		case PTYPE_GNTDEV_ALLOC:
1096 		{
1097 			paddr_t pa;
1098 			if (!pmap_extract(pmap_kernel(),
1099 			    pobj->u.ga.gntva + maddr_i * PAGE_SIZE, &pa)) {
1100 				error = EFAULT;
1101 				goto out;
1102 			}
1103 			error = pmap_enter(ufi->orig_map->pmap, vaddr, pa,
1104 			    ufi->entry->protection,
1105 			    PMAP_CANFAIL | ufi->entry->protection);
1106 			if (error == ENOMEM) {
1107 				goto out;
1108 			}
1109 			break;
1110 		}
1111 		}
1112 		if (error) {
1113 			/* XXX for proper ptp accountings */
1114 			pmap_remove(ufi->orig_map->pmap, vaddr,
1115 			    vaddr + PAGE_SIZE);
1116 		}
1117 	}
1118 out:
1119 	pmap_update(ufi->orig_map->pmap);
1120 	uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1121 	return error;
1122 }
1123 
1124 static int
privcmd_map_obj(struct vm_map * map,vaddr_t start,struct privcmd_object * obj,vm_prot_t prot)1125 privcmd_map_obj(struct vm_map *map, vaddr_t start, struct privcmd_object *obj,
1126     vm_prot_t prot)
1127 {
1128 	int error;
1129 	uvm_flag_t uvmflag;
1130 	vaddr_t newstart = start;
1131 	off_t size = ((off_t)obj->npages << PGSHIFT);
1132 
1133 	privcmd_nobjects++;
1134 	uvm_obj_init(&obj->uobj, &privpgops, true, 1);
1135 	uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL,
1136 	    UVM_FLAG_FIXED | UVM_FLAG_UNMAP | UVM_FLAG_NOMERGE);
1137 	error = uvm_map(map, &newstart, size, &obj->uobj, 0, 0, uvmflag);
1138 
1139 	if (error)
1140 		obj->uobj.pgops->pgo_detach(&obj->uobj);
1141 	return error;
1142 }
1143 
1144 static const struct kernfs_fileop privcmd_fileops[] = {
1145   { .kf_fileop = KERNFS_FILEOP_IOCTL, .kf_vop = privcmd_ioctl },
1146 };
1147 
1148 void
xenprivcmd_init(void)1149 xenprivcmd_init(void)
1150 {
1151 	kernfs_entry_t *dkt;
1152 	kfstype kfst;
1153 
1154 	if (!xendomain_is_privileged())
1155 		return;
1156 
1157 	kfst = KERNFS_ALLOCTYPE(privcmd_fileops);
1158 
1159 	KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
1160 	KERNFS_INITENTRY(dkt, DT_REG, "privcmd", NULL, kfst, VREG,
1161 	    PRIVCMD_MODE);
1162 	kernfs_addentry(kernxen_pkt, dkt);
1163 }
1164