xref: /netbsd-src/sys/arch/xen/xen/xengnt.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*      $NetBSD: xengnt.c,v 1.38 2020/05/13 16:13:14 jdolecek Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.38 2020/05/13 16:13:14 jdolecek Exp $");
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kmem.h>
35 #include <sys/queue.h>
36 #include <sys/extent.h>
37 #include <sys/kernel.h>
38 #include <sys/mutex.h>
39 #include <uvm/uvm.h>
40 
41 #include <xen/hypervisor.h>
42 #include <xen/xen.h>
43 #include <xen/granttables.h>
44 
45 #include "opt_xen.h"
46 
47 /* #define XENDEBUG */
48 #ifdef XENDEBUG
49 #define DPRINTF(x) printf x
50 #else
51 #define DPRINTF(x)
52 #endif
53 
54 /* External tools reserve first few grant table entries. */
55 #define NR_RESERVED_ENTRIES 8
56 
57 /* Current number of frames making up the grant table */
58 int gnt_nr_grant_frames;
59 /* Maximum number of frames that can make up the grant table */
60 int gnt_max_grant_frames;
61 
62 /* table of free grant entries */
63 grant_ref_t *gnt_entries;
64 /* last free entry */
65 int last_gnt_entry;
66 /* empty entry in the list */
67 #define XENGNT_NO_ENTRY 0xffffffff
68 
69 /* VM address of the grant table */
70 #define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_v2_t))
71 #define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
72 
73 grant_entry_v2_t *grant_table;
74 /* Number of grant status frames */
75 int gnt_status_frames;
76 
77 grant_status_t *grant_status;
78 kmutex_t grant_lock;
79 
80 static grant_ref_t xengnt_get_entry(void);
81 static void xengnt_free_entry(grant_ref_t);
82 static int xengnt_more_entries(void);
83 static int xengnt_map_status(void);
84 
85 void
86 xengnt_init(void)
87 {
88 	struct gnttab_query_size query;
89 	int rc;
90 	int nr_grant_entries;
91 	int i;
92 
93 	query.dom = DOMID_SELF;
94 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
95 	if ((rc < 0) || (query.status != GNTST_okay))
96 		gnt_max_grant_frames = 4; /* Legacy max number of frames */
97 	else
98 		gnt_max_grant_frames = query.max_nr_frames;
99 
100 	/*
101 	 * Always allocate max number of grant frames, never expand in runtime
102 	 */
103 	gnt_nr_grant_frames = gnt_max_grant_frames;
104 
105 	nr_grant_entries =
106 	    gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE;
107 
108 	grant_table = (void *)uvm_km_alloc(kernel_map,
109 	    gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
110 	if (grant_table == NULL)
111 		panic("xengnt_init() table no VM space");
112 
113 	gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t),
114 	    KM_SLEEP);
115 	for (i = 0; i <= nr_grant_entries; i++)
116 		gnt_entries[i] = XENGNT_NO_ENTRY;
117 
118 	gnt_status_frames =
119 	    round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE;
120 	grant_status = (void *)uvm_km_alloc(kernel_map,
121 	    gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
122 	if (grant_status == NULL)
123 		panic("xengnt_init() status no VM space");
124 
125 	mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM);
126 
127 	xengnt_resume();
128 }
129 
130 /*
131  * Resume grant table state
132  */
133 bool
134 xengnt_resume(void)
135 {
136 	int rc;
137 	int previous_nr_grant_frames = gnt_nr_grant_frames;
138 
139 	struct gnttab_set_version gntversion;
140 	gntversion.version = 2;
141 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
142 	if (rc < 0 || gntversion.version != 2)
143 		panic("GNTTABOP_set_version 2 failed %d", rc);
144 
145 	last_gnt_entry = 0;
146 	gnt_nr_grant_frames = 0;
147 
148 	mutex_enter(&grant_lock);
149 	while (gnt_nr_grant_frames < previous_nr_grant_frames) {
150 		if (xengnt_more_entries() != 0)
151 			panic("xengnt_resume: can't restore grant frames");
152 	}
153 	xengnt_map_status();
154 	mutex_exit(&grant_lock);
155 	return true;
156 }
157 
158 /*
159  * Suspend grant table state
160  */
161 bool
162 xengnt_suspend(void) {
163 
164 	int i;
165 
166 	mutex_enter(&grant_lock);
167 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
168 
169 	for (i = 0; i < last_gnt_entry; i++) {
170 		/* invalidate all grant entries (necessary for resume) */
171 		gnt_entries[i] = XENGNT_NO_ENTRY;
172 	}
173 
174 	/* Remove virtual => machine mapping for grant table */
175 	pmap_kremove((vaddr_t)grant_table, gnt_nr_grant_frames * PAGE_SIZE);
176 
177 	/* Remove virtual => machine mapping for status table */
178 	pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE);
179 
180 	pmap_update(pmap_kernel());
181 	mutex_exit(&grant_lock);
182 	return true;
183 }
184 
185 /*
186  * Get status frames and enter them into the VA space.
187  */
188 static int
189 xengnt_map_status(void)
190 {
191 	uint64_t *pages;
192 	size_t sz;
193 	KASSERT(mutex_owned(&grant_lock));
194 
195 	sz = gnt_status_frames * sizeof(*pages);
196 	pages = kmem_alloc(sz, KM_NOSLEEP);
197 	if (pages == NULL)
198 		return ENOMEM;
199 
200 #ifdef XENPV
201 	gnttab_get_status_frames_t getstatus;
202 	int err;
203 
204 	getstatus.dom = DOMID_SELF;
205 	getstatus.nr_frames = gnt_status_frames;
206 	set_xen_guest_handle(getstatus.frame_list, pages);
207 
208 	/*
209 	 * get the status frames, and return the list of their virtual
210 	 * addresses in 'pages'
211 	 */
212 	if ((err = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
213 	    &getstatus, 1)) != 0)
214 		panic("%s: get_status_frames failed: %d", __func__, err);
215 	if (getstatus.status != GNTST_okay) {
216 		aprint_error("%s: get_status_frames returned %d\n",
217 		    __func__, getstatus.status);
218 		kmem_free(pages, sz);
219 		return ENOMEM;
220 	}
221 #else /* XENPV */
222 	for (int i = 0; i < gnt_status_frames; i++) {
223 		struct vm_page *pg;
224 		struct xen_add_to_physmap xmap;
225 
226 		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
227 		pages[i] = atop(uvm_vm_page_to_phys(pg));
228 
229 		xmap.domid = DOMID_SELF;
230 		xmap.space = XENMAPSPACE_grant_table;
231 		xmap.idx = i | XENMAPIDX_grant_table_status;
232 		xmap.gpfn = pages[i];
233 
234 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
235 			panic("%s: Unable to add grant tables\n", __func__);
236 	}
237 #endif /* XENPV */
238 	/*
239 	 * map between status_table addresses and the machine addresses of
240 	 * the status table frames
241 	 */
242 	for (int i = 0; i < gnt_status_frames; i++) {
243 		pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE,
244 		    ((paddr_t)pages[i]) << PAGE_SHIFT,
245 		    VM_PROT_WRITE, 0);
246 	}
247 	pmap_update(pmap_kernel());
248 
249 	kmem_free(pages, sz);
250 	return 0;
251 }
252 
253 /*
254  * Add another page to the grant table
255  * Returns 0 on success, ENOMEM on failure
256  */
257 static int
258 xengnt_more_entries(void)
259 {
260 	gnttab_setup_table_t setup;
261 	u_long *pages;
262 	int nframes_new = gnt_nr_grant_frames + 1;
263 	int i, start_gnt;
264 	size_t sz;
265 	KASSERT(mutex_owned(&grant_lock));
266 
267 	if (gnt_nr_grant_frames == gnt_max_grant_frames)
268 		return ENOMEM;
269 
270 	sz = nframes_new * sizeof(*pages);
271 	pages = kmem_alloc(sz, KM_NOSLEEP);
272 	if (pages == NULL)
273 		return ENOMEM;
274 
275 	if (xen_feature(XENFEAT_auto_translated_physmap)) {
276 		/*
277 		 * Note: Although we allocate space for the entire
278 		 * table, in this mode we only update one entry at a
279 		 * time.
280 		 */
281 		struct vm_page *pg;
282 		struct xen_add_to_physmap xmap;
283 
284 		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
285 		pages[gnt_nr_grant_frames] = atop(uvm_vm_page_to_phys(pg));
286 
287 		xmap.domid = DOMID_SELF;
288 		xmap.space = XENMAPSPACE_grant_table;
289 		xmap.idx = gnt_nr_grant_frames;
290 		xmap.gpfn = pages[gnt_nr_grant_frames];
291 
292 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
293 			panic("%s: Unable to add grant frames\n", __func__);
294 
295 	} else {
296 		setup.dom = DOMID_SELF;
297 		setup.nr_frames = nframes_new;
298 		set_xen_guest_handle(setup.frame_list, pages);
299 
300 		/*
301 		 * setup the grant table, made of nframes_new frames
302 		 * and return the list of their virtual addresses
303 		 * in 'pages'
304 		 */
305 		if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
306 			panic("%s: setup table failed", __func__);
307 		if (setup.status != GNTST_okay) {
308 			aprint_error("%s: setup table returned %d\n",
309 			    __func__, setup.status);
310 			kmem_free(pages, sz);
311 			return ENOMEM;
312 		}
313 	}
314 
315 	DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n",
316 	    pages[gnt_nr_grant_frames],
317 	    (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE));
318 
319 	/*
320 	 * map between grant_table addresses and the machine addresses of
321 	 * the grant table frames
322 	 */
323 	pmap_kenter_ma(((vaddr_t)grant_table) + gnt_nr_grant_frames * PAGE_SIZE,
324 	    ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT,
325 	    VM_PROT_WRITE, 0);
326 	pmap_update(pmap_kernel());
327 
328 	/*
329 	 * add the grant entries associated to the last grant table frame
330 	 * and mark them as free. Prevent using the first grants (from 0 to 8)
331 	 * since they are used by the tools.
332 	 */
333 	start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) <
334 	            (NR_RESERVED_ENTRIES + 1) ?
335 	            (NR_RESERVED_ENTRIES + 1) :
336 	            (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
337 	for (i = start_gnt;
338 	    i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE;
339 	    i++) {
340 		KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
341 		gnt_entries[last_gnt_entry] = i;
342 		last_gnt_entry++;
343 	}
344 	gnt_nr_grant_frames = nframes_new;
345 	kmem_free(pages, sz);
346 	return 0;
347 }
348 
349 /*
350  * Returns a reference to the first free entry in grant table
351  */
352 static grant_ref_t
353 xengnt_get_entry(void)
354 {
355 	grant_ref_t entry;
356 	static struct timeval xengnt_nonmemtime;
357 	static const struct timeval xengnt_nonmemintvl = {5,0};
358 
359 	KASSERT(mutex_owned(&grant_lock));
360 
361 	if (__predict_false(last_gnt_entry == 0)) {
362 		if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
363 			printf("xengnt_get_entry: out of grant "
364 			    "table entries\n");
365 		return XENGNT_NO_ENTRY;
366 	}
367 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
368 	last_gnt_entry--;
369 	entry = gnt_entries[last_gnt_entry];
370 	gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY;
371 	KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES);
372 	KASSERT(last_gnt_entry >= 0);
373 	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
374 	return entry;
375 }
376 
377 /*
378  * Mark the grant table entry as free
379  */
380 static void
381 xengnt_free_entry(grant_ref_t entry)
382 {
383 	mutex_enter(&grant_lock);
384 	KASSERT(entry > NR_RESERVED_ENTRIES);
385 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
386 	KASSERT(last_gnt_entry >= 0);
387 	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
388 	gnt_entries[last_gnt_entry] = entry;
389 	last_gnt_entry++;
390 	mutex_exit(&grant_lock);
391 }
392 
393 int
394 xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp)
395 {
396 	mutex_enter(&grant_lock);
397 
398 	*entryp = xengnt_get_entry();
399 	if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
400 		mutex_exit(&grant_lock);
401 		return ENOMEM;
402 	}
403 
404 	grant_table[*entryp].full_page.frame = ma >> PAGE_SHIFT;
405 	grant_table[*entryp].hdr.domid = dom;
406 	/*
407 	 * ensure that the above values reach global visibility
408 	 * before permitting frame's access (done when we set flags)
409 	 */
410 	xen_rmb();
411 	grant_table[*entryp].hdr.flags =
412 	    GTF_permit_access | (ro ? GTF_readonly : 0);
413 	mutex_exit(&grant_lock);
414 	return 0;
415 }
416 
417 void
418 xengnt_revoke_access(grant_ref_t entry)
419 {
420 	grant_table[entry].hdr.flags = 0;
421 	xen_mb();	/* Concurrent access by hypervisor */
422 
423 	if (__predict_false((grant_status[entry] & (GTF_reading|GTF_writing))
424 	    != 0))
425 		printf("xengnt_revoke_access(%u): still in use\n",
426 		    entry);
427 	else {
428 
429 		/*
430 		 * The read of grant_status needs to have acquire semantics.
431 		 * Reads already have that on x86, so need only protect
432 		 * against compiler reordering. May need full barrier
433 		 * on other architectures.
434 		 */
435 		__insn_barrier();
436 	}
437 	xengnt_free_entry(entry);
438 }
439 
440 int
441 xengnt_status(grant_ref_t entry)
442 {
443 	return grant_status[entry] & (GTF_reading|GTF_writing);
444 }
445