xref: /netbsd-src/sys/arch/xen/x86/xen_pmap.c (revision d16b7486a53dcb8072b60ec6fcb4373a2d0c27b7)
1 /*	$NetBSD: xen_pmap.c,v 1.40 2022/08/20 23:48:51 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2007 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
29  *
30  * Permission to use, copy, modify, and distribute this software for any
31  * purpose with or without fee is hereby granted, provided that the above
32  * copyright notice and this permission notice appear in all copies.
33  *
34  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
35  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
36  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
37  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
39  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
40  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 /*
44  * Copyright (c) 1997 Charles D. Cranor and Washington University.
45  * All rights reserved.
46  *
47  * Redistribution and use in source and binary forms, with or without
48  * modification, are permitted provided that the following conditions
49  * are met:
50  * 1. Redistributions of source code must retain the above copyright
51  *    notice, this list of conditions and the following disclaimer.
52  * 2. Redistributions in binary form must reproduce the above copyright
53  *    notice, this list of conditions and the following disclaimer in the
54  *    documentation and/or other materials provided with the distribution.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
58  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
59  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
60  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
61  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
65  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66  */
67 
68 /*
69  * Copyright 2001 (c) Wasabi Systems, Inc.
70  * All rights reserved.
71  *
72  * Written by Frank van der Linden for Wasabi Systems, Inc.
73  *
74  * Redistribution and use in source and binary forms, with or without
75  * modification, are permitted provided that the following conditions
76  * are met:
77  * 1. Redistributions of source code must retain the above copyright
78  *    notice, this list of conditions and the following disclaimer.
79  * 2. Redistributions in binary form must reproduce the above copyright
80  *    notice, this list of conditions and the following disclaimer in the
81  *    documentation and/or other materials provided with the distribution.
82  * 3. All advertising materials mentioning features or use of this software
83  *    must display the following acknowledgement:
84  *      This product includes software developed for the NetBSD Project by
85  *      Wasabi Systems, Inc.
86  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
87  *    or promote products derived from this software without specific prior
88  *    written permission.
89  *
90  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
91  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
92  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
93  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
94  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
95  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
96  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
97  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
98  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
99  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
100  * POSSIBILITY OF SUCH DAMAGE.
101  */
102 
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.40 2022/08/20 23:48:51 riastradh Exp $");
105 
106 #include "opt_lockdebug.h"
107 #include "opt_multiprocessor.h"
108 #include "opt_xen.h"
109 
110 #include <sys/param.h>
111 #include <sys/systm.h>
112 #include <sys/proc.h>
113 #include <sys/pool.h>
114 #include <sys/kernel.h>
115 #include <sys/atomic.h>
116 #include <sys/cpu.h>
117 #include <sys/intr.h>
118 
119 #include <uvm/uvm.h>
120 
121 #include <dev/isa/isareg.h>
122 
123 #include <machine/specialreg.h>
124 #include <machine/gdt.h>
125 #include <machine/isa_machdep.h>
126 #include <machine/cpuvar.h>
127 #include <machine/pmap_private.h>
128 
129 #include <x86/pmap_pv.h>
130 
131 #include <x86/i82489reg.h>
132 #include <x86/i82489var.h>
133 
134 #include <xen/include/public/xen.h>
135 #include <xen/hypervisor.h>
136 #include <xen/xenpmap.h>
137 
138 #define COUNT(x)	/* nothing */
139 
140 extern pd_entry_t * const normal_pdes[];
141 
142 extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
143 extern paddr_t pmap_pa_end;   /* PA of last physical page for this domain */
144 
145 int
146 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
147 {
148 	paddr_t ma;
149 
150 	if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) {
151 		ma = pa; /* XXX hack */
152 	} else {
153 		ma = xpmap_ptom(pa);
154 	}
155 
156 	return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF);
157 }
158 
159 /*
160  * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
161  *
162  * => no need to lock anything, assume va is already allocated
163  * => should be faster than normal pmap enter function
164  * => we expect a MACHINE address
165  */
166 
167 void
168 pmap_kenter_ma(vaddr_t va, paddr_t ma, vm_prot_t prot, u_int flags)
169 {
170 	pt_entry_t *pte, opte, npte;
171 
172 	if (va < VM_MIN_KERNEL_ADDRESS)
173 		pte = vtopte(va);
174 	else
175 		pte = kvtopte(va);
176 
177 	npte = ma | ((prot & VM_PROT_WRITE) ? PTE_W : 0) | PTE_P;
178 	if (flags & PMAP_NOCACHE)
179 		npte |= PTE_PCD;
180 
181 	if ((cpu_feature[2] & CPUID_NOX) && !(prot & VM_PROT_EXECUTE))
182 		npte |= PTE_NX;
183 
184 	opte = pmap_pte_testset(pte, npte); /* zap! */
185 
186 	if (pmap_valid_entry(opte)) {
187 #if defined(MULTIPROCESSOR)
188 		if (__predict_false(x86_mp_online == false)) {
189 			pmap_update_pg(va);
190 		} else {
191 			kpreempt_disable();
192 			pmap_tlb_shootdown(pmap_kernel(), va, opte,
193 			    TLBSHOOT_KENTER);
194 			kpreempt_enable();
195 		}
196 #else
197 		/* Don't bother deferring in the single CPU case. */
198 		pmap_update_pg(va);
199 #endif
200 	}
201 }
202 
203 /*
204  * pmap_extract_ma: extract a MA for the given VA
205  */
206 
207 bool
208 pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap)
209 {
210 	pt_entry_t *ptes, pte;
211 	pd_entry_t pde;
212 	pd_entry_t * const *pdes;
213 	struct pmap *pmap2;
214 	int lvl;
215 
216 	if (pmap != pmap_kernel()) {
217 		mutex_enter(&pmap->pm_lock);
218 	}
219 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
220 	if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) {
221 		pmap_unmap_ptes(pmap, pmap2);
222 		if (pmap != pmap_kernel()) {
223 			mutex_exit(&pmap->pm_lock);
224 		}
225 		return false;
226 	}
227 
228 	KASSERT(lvl == 1);
229 	pte = ptes[pl1_i(va)];
230 	pmap_unmap_ptes(pmap, pmap2);
231 	if (pmap != pmap_kernel()) {
232 		mutex_exit(&pmap->pm_lock);
233 	}
234 
235 	if (__predict_true((pte & PTE_P) != 0)) {
236 		if (pap != NULL)
237 			*pap = (pte & PTE_4KFRAME) | (va & (NBPD_L1 - 1));
238 		return true;
239 	}
240 
241 	return false;
242 }
243 
244 /*
245  * Xen pmap's handlers for save/restore
246  */
247 void
248 pmap_xen_suspend(void)
249 {
250 	pmap_unmap_recursive_entries();
251 
252 	xpq_flush_queue();
253 }
254 
255 void
256 pmap_xen_resume(void)
257 {
258 	pmap_map_recursive_entries();
259 
260 	xpq_flush_queue();
261 }
262 
263 /*
264  * NetBSD uses L2 shadow pages to support PAE with Xen. However, Xen does not
265  * handle them correctly during save/restore, leading to incorrect page
266  * tracking and pinning during restore.
267  * For save/restore to succeed, two functions are introduced:
268  * - pmap_map_recursive_entries(), used by resume code to set the recursive
269  *   mapping entries to their correct value
270  * - pmap_unmap_recursive_entries(), used by suspend code to clear all
271  *   PDIR_SLOT_PTE entries
272  */
273 void
274 pmap_map_recursive_entries(void)
275 {
276 	int i;
277 	struct pmap *pm;
278 
279 	mutex_enter(&pmaps_lock);
280 	LIST_FOREACH(pm, &pmaps, pm_list) {
281 		for (i = 0; i < PDP_SIZE; i++) {
282 			xpq_queue_pte_update(
283 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)),
284 			    xpmap_ptom((pm)->pm_pdirpa[i]) | PTE_P);
285 		}
286 	}
287 	mutex_exit(&pmaps_lock);
288 
289 	for (i = 0; i < PDP_SIZE; i++) {
290 		xpq_queue_pte_update(
291 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
292 		    xpmap_ptom(pmap_kernel()->pm_pdirpa[i]) | PTE_P);
293 	}
294 }
295 
296 /*
297  * Unmap recursive entries found in pmaps. Required during Xen
298  * save/restore operations, as Xen does not handle recursive mappings
299  * properly.
300  */
301 void
302 pmap_unmap_recursive_entries(void)
303 {
304 	int i;
305 	struct pmap *pm;
306 
307 	/*
308 	 * Invalidate pmap_pdp_cache as it contains L2-pinned objects with
309 	 * recursive entries.
310 	 * XXX jym@ : find a way to drain per-CPU caches to. pool_cache_inv
311 	 * does not do that.
312 	 */
313 	pool_cache_invalidate(&pmap_cache);
314 
315 	mutex_enter(&pmaps_lock);
316 	LIST_FOREACH(pm, &pmaps, pm_list) {
317 		for (i = 0; i < PDP_SIZE; i++) {
318 			xpq_queue_pte_update(
319 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 0);
320 		}
321 	}
322 	mutex_exit(&pmaps_lock);
323 
324 	/* do it for pmap_kernel() too! */
325 	for (i = 0; i < PDP_SIZE; i++) {
326 		xpq_queue_pte_update(
327 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
328 		    0);
329 	}
330 }
331 
332 static __inline void
333 pmap_kpm_setpte(struct cpu_info *ci, struct pmap *pmap, int index)
334 {
335 	KASSERT(mutex_owned(&pmap->pm_lock));
336 	KASSERT(mutex_owned(&ci->ci_kpm_mtx));
337 	if (pmap == pmap_kernel()) {
338 		KASSERT(index >= PDIR_SLOT_KERN);
339 	}
340 
341 #ifdef __x86_64__
342 	xpq_queue_pte_update(
343 	    xpmap_ptetomach(&ci->ci_kpm_pdir[index]),
344 	    pmap->pm_pdir[index]);
345 #else
346 	xpq_queue_pte_update(
347 	    xpmap_ptetomach(&ci->ci_kpm_pdir[l2tol2(index)]),
348 	    pmap->pm_pdir[index]);
349 #endif
350 
351 	xpq_flush_queue();
352 }
353 
354 /*
355  * Synchronise shadow pdir with the pmap on all cpus on which it is
356  * loaded.
357  */
358 void
359 xen_kpm_sync(struct pmap *pmap, int index)
360 {
361 	CPU_INFO_ITERATOR cii;
362 	struct cpu_info *ci;
363 
364 	KASSERT(pmap != NULL);
365 	KASSERT(kpreempt_disabled());
366 
367 	pmap_pte_flush();
368 
369 	for (CPU_INFO_FOREACH(cii, ci)) {
370 		if (ci == NULL) {
371 			continue;
372 		}
373 		cpuid_t cid = cpu_index(ci);
374 		if (pmap != pmap_kernel() &&
375 		    !kcpuset_isset(pmap->pm_xen_ptp_cpus, cid))
376 			continue;
377 
378 		/* take the lock and check again */
379 		mutex_enter(&ci->ci_kpm_mtx);
380 		if (pmap == pmap_kernel() ||
381 		    kcpuset_isset(pmap->pm_xen_ptp_cpus, cid)) {
382 			pmap_kpm_setpte(ci, pmap, index);
383 		}
384 		mutex_exit(&ci->ci_kpm_mtx);
385 	}
386 }
387