1 /* $NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $");
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kmem.h>
35 #include <sys/queue.h>
36 #include <sys/extent.h>
37 #include <sys/kernel.h>
38 #include <sys/mutex.h>
39 #include <uvm/uvm.h>
40
41 #include <xen/hypervisor.h>
42 #include <xen/xen.h>
43 #include <xen/granttables.h>
44
45 #include "opt_xen.h"
46
47 /* #define XENDEBUG */
48 #ifdef XENDEBUG
49 #define DPRINTF(x) printf x
50 #else
51 #define DPRINTF(x)
52 #endif
53
54 /* External tools reserve first few grant table entries. */
55 #define NR_RESERVED_ENTRIES 8
56
57 /* current supported version */
58 int gnt_v = 0;
59 #define GNT_ISV1 (gnt_v == 1)
60 #define GNT_ISV2 (gnt_v == 2)
61 /* Current number of frames making up the grant table */
62 int gnt_nr_grant_frames;
63 /* Maximum number of frames that can make up the grant table */
64 int gnt_max_grant_frames;
65
66 /* table of free grant entries */
67 grant_ref_t *gnt_entries;
68 /* last free entry */
69 int last_gnt_entry;
70 /* empty entry in the list */
71 #define XENGNT_NO_ENTRY 0xffffffff
72
73 /* VM address of the grant table */
74 #define NR_GRANT_ENTRIES_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t))
75 #define NR_GRANT_ENTRIES_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t))
76 #define NR_GRANT_ENTRIES_PER_PAGE \
77 ((gnt_v == 1) ? NR_GRANT_ENTRIES_PER_PAGE_V1 : NR_GRANT_ENTRIES_PER_PAGE_V2)
78 #define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
79
80 union {
81 grant_entry_v1_t *gntt_v1;
82 grant_entry_v2_t *gntt_v2;
83 void *gntt;
84 } grant_table;
85
86 /* Number of grant status frames (v2 only)*/
87 int gnt_status_frames;
88
89 grant_status_t *grant_status;
90 kmutex_t grant_lock;
91
92 static grant_ref_t xengnt_get_entry(void);
93 static void xengnt_free_entry(grant_ref_t);
94 static int xengnt_more_entries(void);
95 static int xengnt_map_status(void);
96 static bool xengnt_finish_init(void);
97
98 void
xengnt_init(void)99 xengnt_init(void)
100 {
101 struct gnttab_query_size query;
102 int rc;
103 int nr_grant_entries;
104 int i;
105
106 /* first try to see which version we support */
107 struct gnttab_set_version gntversion;
108 gnt_v = gntversion.version = 2;
109 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
110 if (rc < 0 || gntversion.version != 2) {
111 aprint_debug("GNTTABOP_set_version 2 failed (%d), "
112 "fall back to version 1\n", rc);
113 gnt_v = 1;
114 }
115
116 query.dom = DOMID_SELF;
117 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
118 if ((rc < 0) || (query.status != GNTST_okay))
119 gnt_max_grant_frames = 4; /* Legacy max number of frames */
120 else
121 gnt_max_grant_frames = query.max_nr_frames;
122
123 /*
124 * Always allocate max number of grant frames, never expand in runtime
125 */
126 gnt_nr_grant_frames = gnt_max_grant_frames;
127
128 nr_grant_entries =
129 gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE;
130
131 grant_table.gntt = (void *)uvm_km_alloc(kernel_map,
132 gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
133 if (grant_table.gntt == NULL)
134 panic("xengnt_init() table no VM space");
135
136 gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t),
137 KM_SLEEP);
138 for (i = 0; i <= nr_grant_entries; i++)
139 gnt_entries[i] = XENGNT_NO_ENTRY;
140
141 if (GNT_ISV2) {
142 gnt_status_frames =
143 round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE;
144 grant_status = (void *)uvm_km_alloc(kernel_map,
145 gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
146 if (grant_status == NULL)
147 panic("xengnt_init() status no VM space");
148 }
149
150 mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM);
151
152 xengnt_finish_init();
153 }
154
155 /*
156 * Resume grant table state
157 */
158 bool
xengnt_resume(void)159 xengnt_resume(void)
160 {
161 int rc;
162
163 struct gnttab_set_version gntversion;
164 KASSERT(gnt_v == 1 || gnt_v == 2);
165 gntversion.version = gnt_v;
166 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
167
168 if (GNT_ISV2) {
169 if (rc < 0 || gntversion.version != 2) {
170 panic("GNTTABOP_set_version 2 failed %d", rc);
171 }
172 } else {
173 if (rc == 0 && gntversion.version != 1) {
174 panic("GNTTABOP_set_version 1 failed");
175 }
176 }
177
178 return xengnt_finish_init();
179 }
180
181 static bool
xengnt_finish_init(void)182 xengnt_finish_init(void)
183 {
184 int previous_nr_grant_frames = gnt_nr_grant_frames;
185
186 last_gnt_entry = 0;
187 gnt_nr_grant_frames = 0;
188
189 mutex_enter(&grant_lock);
190 while (gnt_nr_grant_frames < previous_nr_grant_frames) {
191 if (xengnt_more_entries() != 0)
192 panic("xengnt_resume: can't restore grant frames");
193 }
194 if (GNT_ISV2)
195 xengnt_map_status();
196 mutex_exit(&grant_lock);
197 return true;
198 }
199
200 /*
201 * Suspend grant table state
202 */
203 bool
xengnt_suspend(void)204 xengnt_suspend(void) {
205
206 int i;
207
208 mutex_enter(&grant_lock);
209 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
210
211 for (i = 0; i < last_gnt_entry; i++) {
212 /* invalidate all grant entries (necessary for resume) */
213 gnt_entries[i] = XENGNT_NO_ENTRY;
214 }
215
216 /* Remove virtual => machine mapping for grant table */
217 pmap_kremove((vaddr_t)grant_table.gntt, gnt_nr_grant_frames * PAGE_SIZE);
218
219 if (GNT_ISV2) {
220 /* Remove virtual => machine mapping for status table */
221 pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE);
222 }
223
224 pmap_update(pmap_kernel());
225 mutex_exit(&grant_lock);
226 return true;
227 }
228
229 /*
230 * Get status frames and enter them into the VA space.
231 */
232 static int
xengnt_map_status(void)233 xengnt_map_status(void)
234 {
235 uint64_t *pages;
236 size_t sz;
237 KASSERT(mutex_owned(&grant_lock));
238 KASSERT(GNT_ISV2);
239
240 sz = gnt_status_frames * sizeof(*pages);
241 pages = kmem_alloc(sz, KM_NOSLEEP);
242 if (pages == NULL)
243 return ENOMEM;
244
245 #ifdef XENPV
246 gnttab_get_status_frames_t getstatus;
247 int err;
248
249 getstatus.dom = DOMID_SELF;
250 getstatus.nr_frames = gnt_status_frames;
251 set_xen_guest_handle(getstatus.frame_list, pages);
252
253 /*
254 * get the status frames, and return the list of their virtual
255 * addresses in 'pages'
256 */
257 if ((err = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
258 &getstatus, 1)) != 0)
259 panic("%s: get_status_frames failed: %d", __func__, err);
260 if (getstatus.status != GNTST_okay) {
261 aprint_error("%s: get_status_frames returned %d\n",
262 __func__, getstatus.status);
263 kmem_free(pages, sz);
264 return ENOMEM;
265 }
266 #else /* XENPV */
267 for (int i = 0; i < gnt_status_frames; i++) {
268 struct vm_page *pg;
269 struct xen_add_to_physmap xmap;
270
271 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
272 pages[i] = atop(uvm_vm_page_to_phys(pg));
273
274 xmap.domid = DOMID_SELF;
275 xmap.space = XENMAPSPACE_grant_table;
276 xmap.idx = i | XENMAPIDX_grant_table_status;
277 xmap.gpfn = pages[i];
278
279 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
280 panic("%s: Unable to add grant tables\n", __func__);
281 }
282 #endif /* XENPV */
283 /*
284 * map between status_table addresses and the machine addresses of
285 * the status table frames
286 */
287 for (int i = 0; i < gnt_status_frames; i++) {
288 pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE,
289 ((paddr_t)pages[i]) << PAGE_SHIFT,
290 VM_PROT_WRITE, 0);
291 }
292 pmap_update(pmap_kernel());
293
294 kmem_free(pages, sz);
295 return 0;
296 }
297
298 /*
299 * Add another page to the grant table
300 * Returns 0 on success, ENOMEM on failure
301 */
302 static int
xengnt_more_entries(void)303 xengnt_more_entries(void)
304 {
305 gnttab_setup_table_t setup;
306 u_long *pages;
307 int nframes_new = gnt_nr_grant_frames + 1;
308 int i, start_gnt;
309 size_t sz;
310 KASSERT(mutex_owned(&grant_lock));
311
312 if (gnt_nr_grant_frames == gnt_max_grant_frames)
313 return ENOMEM;
314
315 sz = nframes_new * sizeof(*pages);
316 pages = kmem_alloc(sz, KM_NOSLEEP);
317 if (pages == NULL)
318 return ENOMEM;
319
320 if (xen_feature(XENFEAT_auto_translated_physmap)) {
321 /*
322 * Note: Although we allocate space for the entire
323 * table, in this mode we only update one entry at a
324 * time.
325 */
326 struct vm_page *pg;
327 struct xen_add_to_physmap xmap;
328
329 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
330 pages[gnt_nr_grant_frames] = atop(uvm_vm_page_to_phys(pg));
331
332 xmap.domid = DOMID_SELF;
333 xmap.space = XENMAPSPACE_grant_table;
334 xmap.idx = gnt_nr_grant_frames;
335 xmap.gpfn = pages[gnt_nr_grant_frames];
336
337 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
338 panic("%s: Unable to add grant frames\n", __func__);
339
340 } else {
341 setup.dom = DOMID_SELF;
342 setup.nr_frames = nframes_new;
343 set_xen_guest_handle(setup.frame_list, pages);
344
345 /*
346 * setup the grant table, made of nframes_new frames
347 * and return the list of their virtual addresses
348 * in 'pages'
349 */
350 if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
351 panic("%s: setup table failed", __func__);
352 if (setup.status != GNTST_okay) {
353 aprint_error("%s: setup table returned %d\n",
354 __func__, setup.status);
355 kmem_free(pages, sz);
356 return ENOMEM;
357 }
358 }
359
360 DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n",
361 pages[gnt_nr_grant_frames],
362 (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE));
363
364 /*
365 * map between grant_table addresses and the machine addresses of
366 * the grant table frames
367 */
368 pmap_kenter_ma(((vaddr_t)grant_table.gntt) + gnt_nr_grant_frames * PAGE_SIZE,
369 ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT,
370 VM_PROT_WRITE, 0);
371 pmap_update(pmap_kernel());
372
373 /*
374 * add the grant entries associated to the last grant table frame
375 * and mark them as free. Prevent using the first grants (from 0 to 8)
376 * since they are used by the tools.
377 */
378 start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) <
379 (NR_RESERVED_ENTRIES + 1) ?
380 (NR_RESERVED_ENTRIES + 1) :
381 (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
382 for (i = start_gnt;
383 i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE;
384 i++) {
385 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
386 gnt_entries[last_gnt_entry] = i;
387 last_gnt_entry++;
388 }
389 gnt_nr_grant_frames = nframes_new;
390 kmem_free(pages, sz);
391 return 0;
392 }
393
394 /*
395 * Returns a reference to the first free entry in grant table
396 */
397 static grant_ref_t
xengnt_get_entry(void)398 xengnt_get_entry(void)
399 {
400 grant_ref_t entry;
401 static struct timeval xengnt_nonmemtime;
402 static const struct timeval xengnt_nonmemintvl = {5,0};
403
404 KASSERT(mutex_owned(&grant_lock));
405
406 if (__predict_false(last_gnt_entry == 0)) {
407 if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
408 printf("xengnt_get_entry: out of grant "
409 "table entries\n");
410 return XENGNT_NO_ENTRY;
411 }
412 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
413 last_gnt_entry--;
414 entry = gnt_entries[last_gnt_entry];
415 gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY;
416 KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES);
417 KASSERT(last_gnt_entry >= 0);
418 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
419 return entry;
420 }
421
422 /*
423 * Mark the grant table entry as free
424 */
425 static void
xengnt_free_entry(grant_ref_t entry)426 xengnt_free_entry(grant_ref_t entry)
427 {
428 mutex_enter(&grant_lock);
429 KASSERT(entry > NR_RESERVED_ENTRIES);
430 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
431 KASSERT(last_gnt_entry >= 0);
432 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
433 gnt_entries[last_gnt_entry] = entry;
434 last_gnt_entry++;
435 mutex_exit(&grant_lock);
436 }
437
438 int
xengnt_grant_access(domid_t dom,paddr_t ma,int ro,grant_ref_t * entryp)439 xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp)
440 {
441 mutex_enter(&grant_lock);
442
443 *entryp = xengnt_get_entry();
444 if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
445 mutex_exit(&grant_lock);
446 return ENOMEM;
447 }
448
449 if (GNT_ISV2) {
450 grant_table.gntt_v2[*entryp].full_page.frame = ma >> PAGE_SHIFT;
451 grant_table.gntt_v2[*entryp].hdr.domid = dom;
452 /*
453 * ensure that the above values reach global visibility
454 * before permitting frame's access (done when we set flags)
455 */
456 xen_wmb();
457 grant_table.gntt_v2[*entryp].hdr.flags =
458 GTF_permit_access | (ro ? GTF_readonly : 0);
459 } else {
460 grant_table.gntt_v1[*entryp].frame = ma >> PAGE_SHIFT;
461 grant_table.gntt_v1[*entryp].domid = dom;
462 /*
463 * ensure that the above values reach global visibility
464 * before permitting frame's access (done when we set flags)
465 */
466 xen_wmb();
467 grant_table.gntt_v1[*entryp].flags =
468 GTF_permit_access | (ro ? GTF_readonly : 0);
469 }
470 mutex_exit(&grant_lock);
471 return 0;
472 }
473
474 static inline uint16_t
xen_atomic_cmpxchg16(volatile uint16_t * ptr,uint16_t val,uint16_t newval)475 xen_atomic_cmpxchg16(volatile uint16_t *ptr, uint16_t val, uint16_t newval)
476 {
477 unsigned long result;
478
479 __asm volatile(__LOCK_PREFIX
480 "cmpxchgw %w1,%2"
481 :"=a" (result)
482 :"q"(newval), "m" (*ptr), "0" (val)
483 :"memory");
484
485 return result;
486 }
487
488 void
xengnt_revoke_access(grant_ref_t entry)489 xengnt_revoke_access(grant_ref_t entry)
490 {
491 if (GNT_ISV2) {
492 grant_table.gntt_v2[entry].hdr.flags = 0;
493 xen_mb(); /* Concurrent access by hypervisor */
494
495 if (__predict_false(
496 (grant_status[entry] & (GTF_reading|GTF_writing)) != 0)) {
497 printf("xengnt_revoke_access(%u): still in use\n",
498 entry);
499 } else {
500
501 /*
502 * The read of grant_status needs to have acquire
503 * semantics.
504 * Reads already have that on x86, so need only protect
505 * against compiler reordering. May need full barrier
506 * on other architectures.
507 */
508 __insn_barrier();
509 }
510 } else {
511 uint16_t flags, nflags;
512
513 nflags = grant_table.gntt_v1[entry].flags;
514
515 do {
516 if ((flags = nflags) & (GTF_reading|GTF_writing))
517 panic("xengnt_revoke_access: still in use");
518 nflags = xen_atomic_cmpxchg16(
519 &grant_table.gntt_v1[entry].flags, flags, 0);
520 } while (nflags != flags);
521
522 }
523 xengnt_free_entry(entry);
524 }
525
526 int
xengnt_status(grant_ref_t entry)527 xengnt_status(grant_ref_t entry)
528 {
529 if (GNT_ISV2)
530 return grant_status[entry] & (GTF_reading|GTF_writing);
531 else
532 return (grant_table.gntt_v1[entry].flags & (GTF_reading|GTF_writing));
533 }
534