xref: /netbsd-src/sys/arch/xen/xen/xengnt.c (revision bad5336a8ae570a32c43ca88459b6d245ed99b73)
1 /*      $NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $");
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kmem.h>
35 #include <sys/queue.h>
36 #include <sys/extent.h>
37 #include <sys/kernel.h>
38 #include <sys/mutex.h>
39 #include <uvm/uvm.h>
40 
41 #include <xen/hypervisor.h>
42 #include <xen/xen.h>
43 #include <xen/granttables.h>
44 
45 #include "opt_xen.h"
46 
47 /* #define XENDEBUG */
48 #ifdef XENDEBUG
49 #define DPRINTF(x) printf x
50 #else
51 #define DPRINTF(x)
52 #endif
53 
54 /* External tools reserve first few grant table entries. */
55 #define NR_RESERVED_ENTRIES 8
56 
57 /* current supported version */
58 int gnt_v = 0;
59 #define GNT_ISV1 (gnt_v == 1)
60 #define GNT_ISV2 (gnt_v == 2)
61 /* Current number of frames making up the grant table */
62 int gnt_nr_grant_frames;
63 /* Maximum number of frames that can make up the grant table */
64 int gnt_max_grant_frames;
65 
66 /* table of free grant entries */
67 grant_ref_t *gnt_entries;
68 /* last free entry */
69 int last_gnt_entry;
70 /* empty entry in the list */
71 #define XENGNT_NO_ENTRY 0xffffffff
72 
73 /* VM address of the grant table */
74 #define NR_GRANT_ENTRIES_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t))
75 #define NR_GRANT_ENTRIES_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t))
76 #define NR_GRANT_ENTRIES_PER_PAGE \
77     ((gnt_v == 1) ? NR_GRANT_ENTRIES_PER_PAGE_V1 : NR_GRANT_ENTRIES_PER_PAGE_V2)
78 #define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
79 
80 union {
81 	grant_entry_v1_t *gntt_v1;
82 	grant_entry_v2_t *gntt_v2;
83 	void *gntt;
84 } grant_table;
85 
86 /* Number of grant status frames (v2 only)*/
87 int gnt_status_frames;
88 
89 grant_status_t *grant_status;
90 kmutex_t grant_lock;
91 
92 static grant_ref_t xengnt_get_entry(void);
93 static void xengnt_free_entry(grant_ref_t);
94 static int xengnt_more_entries(void);
95 static int xengnt_map_status(void);
96 static bool xengnt_finish_init(void);
97 
98 void
xengnt_init(void)99 xengnt_init(void)
100 {
101 	struct gnttab_query_size query;
102 	int rc;
103 	int nr_grant_entries;
104 	int i;
105 
106 	/* first try to see which version we support */
107 	struct gnttab_set_version gntversion;
108 	gnt_v = gntversion.version = 2;
109 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
110 	if (rc < 0 || gntversion.version != 2) {
111 		aprint_debug("GNTTABOP_set_version 2 failed (%d), "
112 		    "fall back to version 1\n", rc);
113 		gnt_v = 1;
114 	}
115 
116 	query.dom = DOMID_SELF;
117 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
118 	if ((rc < 0) || (query.status != GNTST_okay))
119 		gnt_max_grant_frames = 4; /* Legacy max number of frames */
120 	else
121 		gnt_max_grant_frames = query.max_nr_frames;
122 
123 	/*
124 	 * Always allocate max number of grant frames, never expand in runtime
125 	 */
126 	gnt_nr_grant_frames = gnt_max_grant_frames;
127 
128 	nr_grant_entries =
129 	    gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE;
130 
131 	grant_table.gntt = (void *)uvm_km_alloc(kernel_map,
132 	    gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
133 	if (grant_table.gntt == NULL)
134 		panic("xengnt_init() table no VM space");
135 
136 	gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t),
137 	    KM_SLEEP);
138 	for (i = 0; i <= nr_grant_entries; i++)
139 		gnt_entries[i] = XENGNT_NO_ENTRY;
140 
141 	if (GNT_ISV2) {
142 		gnt_status_frames =
143 		    round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE;
144 		grant_status = (void *)uvm_km_alloc(kernel_map,
145 		    gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
146 		if (grant_status == NULL)
147 			panic("xengnt_init() status no VM space");
148 	}
149 
150 	mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM);
151 
152 	xengnt_finish_init();
153 }
154 
155 /*
156  * Resume grant table state
157  */
158 bool
xengnt_resume(void)159 xengnt_resume(void)
160 {
161 	int rc;
162 
163 	struct gnttab_set_version gntversion;
164 	KASSERT(gnt_v == 1 || gnt_v == 2);
165 	gntversion.version = gnt_v;
166 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
167 
168 	if (GNT_ISV2) {
169 		if (rc < 0 || gntversion.version != 2) {
170 			panic("GNTTABOP_set_version 2 failed %d", rc);
171 		}
172 	} else {
173 		if (rc == 0 && gntversion.version != 1) {
174 			panic("GNTTABOP_set_version 1 failed");
175 		}
176 	}
177 
178 	return xengnt_finish_init();
179 }
180 
181 static bool
xengnt_finish_init(void)182 xengnt_finish_init(void)
183 {
184 	int previous_nr_grant_frames = gnt_nr_grant_frames;
185 
186 	last_gnt_entry = 0;
187 	gnt_nr_grant_frames = 0;
188 
189 	mutex_enter(&grant_lock);
190 	while (gnt_nr_grant_frames < previous_nr_grant_frames) {
191 		if (xengnt_more_entries() != 0)
192 			panic("xengnt_resume: can't restore grant frames");
193 	}
194 	if (GNT_ISV2)
195 		xengnt_map_status();
196 	mutex_exit(&grant_lock);
197 	return true;
198 }
199 
200 /*
201  * Suspend grant table state
202  */
203 bool
xengnt_suspend(void)204 xengnt_suspend(void) {
205 
206 	int i;
207 
208 	mutex_enter(&grant_lock);
209 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
210 
211 	for (i = 0; i < last_gnt_entry; i++) {
212 		/* invalidate all grant entries (necessary for resume) */
213 		gnt_entries[i] = XENGNT_NO_ENTRY;
214 	}
215 
216 	/* Remove virtual => machine mapping for grant table */
217 	pmap_kremove((vaddr_t)grant_table.gntt, gnt_nr_grant_frames * PAGE_SIZE);
218 
219 	if (GNT_ISV2) {
220 		/* Remove virtual => machine mapping for status table */
221 		pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE);
222 	}
223 
224 	pmap_update(pmap_kernel());
225 	mutex_exit(&grant_lock);
226 	return true;
227 }
228 
229 /*
230  * Get status frames and enter them into the VA space.
231  */
232 static int
xengnt_map_status(void)233 xengnt_map_status(void)
234 {
235 	uint64_t *pages;
236 	size_t sz;
237 	KASSERT(mutex_owned(&grant_lock));
238 	KASSERT(GNT_ISV2);
239 
240 	sz = gnt_status_frames * sizeof(*pages);
241 	pages = kmem_alloc(sz, KM_NOSLEEP);
242 	if (pages == NULL)
243 		return ENOMEM;
244 
245 #ifdef XENPV
246 	gnttab_get_status_frames_t getstatus;
247 	int err;
248 
249 	getstatus.dom = DOMID_SELF;
250 	getstatus.nr_frames = gnt_status_frames;
251 	set_xen_guest_handle(getstatus.frame_list, pages);
252 
253 	/*
254 	 * get the status frames, and return the list of their virtual
255 	 * addresses in 'pages'
256 	 */
257 	if ((err = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
258 	    &getstatus, 1)) != 0)
259 		panic("%s: get_status_frames failed: %d", __func__, err);
260 	if (getstatus.status != GNTST_okay) {
261 		aprint_error("%s: get_status_frames returned %d\n",
262 		    __func__, getstatus.status);
263 		kmem_free(pages, sz);
264 		return ENOMEM;
265 	}
266 #else /* XENPV */
267 	for (int i = 0; i < gnt_status_frames; i++) {
268 		struct vm_page *pg;
269 		struct xen_add_to_physmap xmap;
270 
271 		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
272 		pages[i] = atop(uvm_vm_page_to_phys(pg));
273 
274 		xmap.domid = DOMID_SELF;
275 		xmap.space = XENMAPSPACE_grant_table;
276 		xmap.idx = i | XENMAPIDX_grant_table_status;
277 		xmap.gpfn = pages[i];
278 
279 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
280 			panic("%s: Unable to add grant tables\n", __func__);
281 	}
282 #endif /* XENPV */
283 	/*
284 	 * map between status_table addresses and the machine addresses of
285 	 * the status table frames
286 	 */
287 	for (int i = 0; i < gnt_status_frames; i++) {
288 		pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE,
289 		    ((paddr_t)pages[i]) << PAGE_SHIFT,
290 		    VM_PROT_WRITE, 0);
291 	}
292 	pmap_update(pmap_kernel());
293 
294 	kmem_free(pages, sz);
295 	return 0;
296 }
297 
298 /*
299  * Add another page to the grant table
300  * Returns 0 on success, ENOMEM on failure
301  */
302 static int
xengnt_more_entries(void)303 xengnt_more_entries(void)
304 {
305 	gnttab_setup_table_t setup;
306 	u_long *pages;
307 	int nframes_new = gnt_nr_grant_frames + 1;
308 	int i, start_gnt;
309 	size_t sz;
310 	KASSERT(mutex_owned(&grant_lock));
311 
312 	if (gnt_nr_grant_frames == gnt_max_grant_frames)
313 		return ENOMEM;
314 
315 	sz = nframes_new * sizeof(*pages);
316 	pages = kmem_alloc(sz, KM_NOSLEEP);
317 	if (pages == NULL)
318 		return ENOMEM;
319 
320 	if (xen_feature(XENFEAT_auto_translated_physmap)) {
321 		/*
322 		 * Note: Although we allocate space for the entire
323 		 * table, in this mode we only update one entry at a
324 		 * time.
325 		 */
326 		struct vm_page *pg;
327 		struct xen_add_to_physmap xmap;
328 
329 		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
330 		pages[gnt_nr_grant_frames] = atop(uvm_vm_page_to_phys(pg));
331 
332 		xmap.domid = DOMID_SELF;
333 		xmap.space = XENMAPSPACE_grant_table;
334 		xmap.idx = gnt_nr_grant_frames;
335 		xmap.gpfn = pages[gnt_nr_grant_frames];
336 
337 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
338 			panic("%s: Unable to add grant frames\n", __func__);
339 
340 	} else {
341 		setup.dom = DOMID_SELF;
342 		setup.nr_frames = nframes_new;
343 		set_xen_guest_handle(setup.frame_list, pages);
344 
345 		/*
346 		 * setup the grant table, made of nframes_new frames
347 		 * and return the list of their virtual addresses
348 		 * in 'pages'
349 		 */
350 		if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
351 			panic("%s: setup table failed", __func__);
352 		if (setup.status != GNTST_okay) {
353 			aprint_error("%s: setup table returned %d\n",
354 			    __func__, setup.status);
355 			kmem_free(pages, sz);
356 			return ENOMEM;
357 		}
358 	}
359 
360 	DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n",
361 	    pages[gnt_nr_grant_frames],
362 	    (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE));
363 
364 	/*
365 	 * map between grant_table addresses and the machine addresses of
366 	 * the grant table frames
367 	 */
368 	pmap_kenter_ma(((vaddr_t)grant_table.gntt) + gnt_nr_grant_frames * PAGE_SIZE,
369 	    ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT,
370 	    VM_PROT_WRITE, 0);
371 	pmap_update(pmap_kernel());
372 
373 	/*
374 	 * add the grant entries associated to the last grant table frame
375 	 * and mark them as free. Prevent using the first grants (from 0 to 8)
376 	 * since they are used by the tools.
377 	 */
378 	start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) <
379 	            (NR_RESERVED_ENTRIES + 1) ?
380 	            (NR_RESERVED_ENTRIES + 1) :
381 	            (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
382 	for (i = start_gnt;
383 	    i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE;
384 	    i++) {
385 		KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
386 		gnt_entries[last_gnt_entry] = i;
387 		last_gnt_entry++;
388 	}
389 	gnt_nr_grant_frames = nframes_new;
390 	kmem_free(pages, sz);
391 	return 0;
392 }
393 
394 /*
395  * Returns a reference to the first free entry in grant table
396  */
397 static grant_ref_t
xengnt_get_entry(void)398 xengnt_get_entry(void)
399 {
400 	grant_ref_t entry;
401 	static struct timeval xengnt_nonmemtime;
402 	static const struct timeval xengnt_nonmemintvl = {5,0};
403 
404 	KASSERT(mutex_owned(&grant_lock));
405 
406 	if (__predict_false(last_gnt_entry == 0)) {
407 		if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
408 			printf("xengnt_get_entry: out of grant "
409 			    "table entries\n");
410 		return XENGNT_NO_ENTRY;
411 	}
412 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
413 	last_gnt_entry--;
414 	entry = gnt_entries[last_gnt_entry];
415 	gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY;
416 	KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES);
417 	KASSERT(last_gnt_entry >= 0);
418 	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
419 	return entry;
420 }
421 
422 /*
423  * Mark the grant table entry as free
424  */
425 static void
xengnt_free_entry(grant_ref_t entry)426 xengnt_free_entry(grant_ref_t entry)
427 {
428 	mutex_enter(&grant_lock);
429 	KASSERT(entry > NR_RESERVED_ENTRIES);
430 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
431 	KASSERT(last_gnt_entry >= 0);
432 	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
433 	gnt_entries[last_gnt_entry] = entry;
434 	last_gnt_entry++;
435 	mutex_exit(&grant_lock);
436 }
437 
438 int
xengnt_grant_access(domid_t dom,paddr_t ma,int ro,grant_ref_t * entryp)439 xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp)
440 {
441 	mutex_enter(&grant_lock);
442 
443 	*entryp = xengnt_get_entry();
444 	if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
445 		mutex_exit(&grant_lock);
446 		return ENOMEM;
447 	}
448 
449 	if (GNT_ISV2) {
450 		grant_table.gntt_v2[*entryp].full_page.frame = ma >> PAGE_SHIFT;
451 		grant_table.gntt_v2[*entryp].hdr.domid = dom;
452 		/*
453 		 * ensure that the above values reach global visibility
454 		 * before permitting frame's access (done when we set flags)
455 		 */
456 		xen_wmb();
457 		grant_table.gntt_v2[*entryp].hdr.flags =
458 		    GTF_permit_access | (ro ? GTF_readonly : 0);
459 	} else {
460 		grant_table.gntt_v1[*entryp].frame = ma >> PAGE_SHIFT;
461 		grant_table.gntt_v1[*entryp].domid = dom;
462 		/*
463 		* ensure that the above values reach global visibility
464 		* before permitting frame's access (done when we set flags)
465 		*/
466 		xen_wmb();
467 		grant_table.gntt_v1[*entryp].flags =
468 		   GTF_permit_access | (ro ? GTF_readonly : 0);
469 	}
470 	mutex_exit(&grant_lock);
471 	return 0;
472 }
473 
474 static inline uint16_t
xen_atomic_cmpxchg16(volatile uint16_t * ptr,uint16_t val,uint16_t newval)475 xen_atomic_cmpxchg16(volatile uint16_t *ptr, uint16_t  val, uint16_t newval)
476 {
477 	unsigned long result;
478 
479 	__asm volatile(__LOCK_PREFIX
480 	   "cmpxchgw %w1,%2"
481 	   :"=a" (result)
482 	   :"q"(newval), "m" (*ptr), "0" (val)
483 	   :"memory");
484 
485 	return result;
486 }
487 
488 void
xengnt_revoke_access(grant_ref_t entry)489 xengnt_revoke_access(grant_ref_t entry)
490 {
491 	if (GNT_ISV2) {
492 		grant_table.gntt_v2[entry].hdr.flags = 0;
493 		xen_mb();	/* Concurrent access by hypervisor */
494 
495 		if (__predict_false(
496 		    (grant_status[entry] & (GTF_reading|GTF_writing)) != 0)) {
497 			printf("xengnt_revoke_access(%u): still in use\n",
498 			    entry);
499 		} else {
500 
501 			/*
502 			 * The read of grant_status needs to have acquire
503 			 * semantics.
504 			 * Reads already have that on x86, so need only protect
505 			 * against compiler reordering. May need full barrier
506 			 * on other architectures.
507 			 */
508 			__insn_barrier();
509 		}
510 	} else {
511 		uint16_t flags, nflags;
512 
513 		nflags = grant_table.gntt_v1[entry].flags;
514 
515 		do {
516 		       if ((flags = nflags) & (GTF_reading|GTF_writing))
517 			       panic("xengnt_revoke_access: still in use");
518 		       nflags = xen_atomic_cmpxchg16(
519 			    &grant_table.gntt_v1[entry].flags, flags, 0);
520 		} while (nflags != flags);
521 
522 	}
523 	xengnt_free_entry(entry);
524 }
525 
526 int
xengnt_status(grant_ref_t entry)527 xengnt_status(grant_ref_t entry)
528 {
529 	if (GNT_ISV2)
530 		return grant_status[entry] & (GTF_reading|GTF_writing);
531 	else
532 		return (grant_table.gntt_v1[entry].flags & (GTF_reading|GTF_writing));
533 }
534