1 /* $NetBSD: xen_pmap.c,v 1.41 2023/08/25 08:05:19 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2007 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /*
28 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
29 *
30 * Permission to use, copy, modify, and distribute this software for any
31 * purpose with or without fee is hereby granted, provided that the above
32 * copyright notice and this permission notice appear in all copies.
33 *
34 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
35 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
36 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
37 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
39 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
40 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41 */
42
43 /*
44 * Copyright (c) 1997 Charles D. Cranor and Washington University.
45 * All rights reserved.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
58 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
59 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
60 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
61 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
65 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66 */
67
68 /*
69 * Copyright 2001 (c) Wasabi Systems, Inc.
70 * All rights reserved.
71 *
72 * Written by Frank van der Linden for Wasabi Systems, Inc.
73 *
74 * Redistribution and use in source and binary forms, with or without
75 * modification, are permitted provided that the following conditions
76 * are met:
77 * 1. Redistributions of source code must retain the above copyright
78 * notice, this list of conditions and the following disclaimer.
79 * 2. Redistributions in binary form must reproduce the above copyright
80 * notice, this list of conditions and the following disclaimer in the
81 * documentation and/or other materials provided with the distribution.
82 * 3. All advertising materials mentioning features or use of this software
83 * must display the following acknowledgement:
84 * This product includes software developed for the NetBSD Project by
85 * Wasabi Systems, Inc.
86 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
87 * or promote products derived from this software without specific prior
88 * written permission.
89 *
90 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
91 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
92 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
93 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
94 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
95 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
96 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
97 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
98 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
99 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
100 * POSSIBILITY OF SUCH DAMAGE.
101 */
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.41 2023/08/25 08:05:19 riastradh Exp $");
105
106 #include "opt_lockdebug.h"
107 #include "opt_multiprocessor.h"
108 #include "opt_xen.h"
109
110 #include <sys/param.h>
111 #include <sys/systm.h>
112 #include <sys/proc.h>
113 #include <sys/pool.h>
114 #include <sys/kernel.h>
115 #include <sys/atomic.h>
116 #include <sys/cpu.h>
117 #include <sys/intr.h>
118
119 #include <uvm/uvm.h>
120
121 #include <dev/isa/isareg.h>
122
123 #include <machine/specialreg.h>
124 #include <machine/gdt.h>
125 #include <machine/isa_machdep.h>
126 #include <machine/cpuvar.h>
127 #include <machine/pmap_private.h>
128
129 #include <x86/machdep.h>
130 #include <x86/pmap_pv.h>
131
132 #include <x86/i82489reg.h>
133 #include <x86/i82489var.h>
134
135 #include <xen/include/public/xen.h>
136 #include <xen/hypervisor.h>
137 #include <xen/xenpmap.h>
138
139 #define COUNT(x) /* nothing */
140
141 extern pd_entry_t * const normal_pdes[];
142
143 extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
144 extern paddr_t pmap_pa_end; /* PA of last physical page for this domain */
145
146 int
pmap_enter(struct pmap * pmap,vaddr_t va,paddr_t pa,vm_prot_t prot,u_int flags)147 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
148 {
149 paddr_t ma;
150
151 if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) {
152 ma = pa; /* XXX hack */
153 } else {
154 ma = xpmap_ptom(pa);
155 }
156
157 return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF);
158 }
159
160 /*
161 * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
162 *
163 * => no need to lock anything, assume va is already allocated
164 * => should be faster than normal pmap enter function
165 * => we expect a MACHINE address
166 */
167
168 void
pmap_kenter_ma(vaddr_t va,paddr_t ma,vm_prot_t prot,u_int flags)169 pmap_kenter_ma(vaddr_t va, paddr_t ma, vm_prot_t prot, u_int flags)
170 {
171 pt_entry_t *pte, opte, npte;
172
173 if (va < VM_MIN_KERNEL_ADDRESS)
174 pte = vtopte(va);
175 else
176 pte = kvtopte(va);
177
178 npte = ma | ((prot & VM_PROT_WRITE) ? PTE_W : 0) | PTE_P;
179 if (flags & PMAP_NOCACHE)
180 npte |= PTE_PCD;
181
182 if ((cpu_feature[2] & CPUID_NOX) && !(prot & VM_PROT_EXECUTE))
183 npte |= PTE_NX;
184
185 opte = pmap_pte_testset(pte, npte); /* zap! */
186
187 if (pmap_valid_entry(opte)) {
188 #if defined(MULTIPROCESSOR)
189 if (__predict_false(x86_mp_online == false)) {
190 pmap_update_pg(va);
191 } else {
192 kpreempt_disable();
193 pmap_tlb_shootdown(pmap_kernel(), va, opte,
194 TLBSHOOT_KENTER);
195 kpreempt_enable();
196 }
197 #else
198 /* Don't bother deferring in the single CPU case. */
199 pmap_update_pg(va);
200 #endif
201 }
202 }
203
204 /*
205 * pmap_extract_ma: extract a MA for the given VA
206 */
207
208 bool
pmap_extract_ma(struct pmap * pmap,vaddr_t va,paddr_t * pap)209 pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap)
210 {
211 pt_entry_t *ptes, pte;
212 pd_entry_t pde;
213 pd_entry_t * const *pdes;
214 struct pmap *pmap2;
215 int lvl;
216
217 if (pmap != pmap_kernel()) {
218 mutex_enter(&pmap->pm_lock);
219 }
220 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
221 if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) {
222 pmap_unmap_ptes(pmap, pmap2);
223 if (pmap != pmap_kernel()) {
224 mutex_exit(&pmap->pm_lock);
225 }
226 return false;
227 }
228
229 KASSERT(lvl == 1);
230 pte = ptes[pl1_i(va)];
231 pmap_unmap_ptes(pmap, pmap2);
232 if (pmap != pmap_kernel()) {
233 mutex_exit(&pmap->pm_lock);
234 }
235
236 if (__predict_true((pte & PTE_P) != 0)) {
237 if (pap != NULL)
238 *pap = (pte & PTE_4KFRAME) | (va & (NBPD_L1 - 1));
239 return true;
240 }
241
242 return false;
243 }
244
245 /*
246 * Xen pmap's handlers for save/restore
247 */
248 void
pmap_xen_suspend(void)249 pmap_xen_suspend(void)
250 {
251 pmap_unmap_recursive_entries();
252
253 xpq_flush_queue();
254 }
255
256 void
pmap_xen_resume(void)257 pmap_xen_resume(void)
258 {
259 pmap_map_recursive_entries();
260
261 xpq_flush_queue();
262 }
263
264 /*
265 * NetBSD uses L2 shadow pages to support PAE with Xen. However, Xen does not
266 * handle them correctly during save/restore, leading to incorrect page
267 * tracking and pinning during restore.
268 * For save/restore to succeed, two functions are introduced:
269 * - pmap_map_recursive_entries(), used by resume code to set the recursive
270 * mapping entries to their correct value
271 * - pmap_unmap_recursive_entries(), used by suspend code to clear all
272 * PDIR_SLOT_PTE entries
273 */
274 void
pmap_map_recursive_entries(void)275 pmap_map_recursive_entries(void)
276 {
277 int i;
278 struct pmap *pm;
279
280 mutex_enter(&pmaps_lock);
281 LIST_FOREACH(pm, &pmaps, pm_list) {
282 for (i = 0; i < PDP_SIZE; i++) {
283 xpq_queue_pte_update(
284 xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)),
285 xpmap_ptom((pm)->pm_pdirpa[i]) | PTE_P);
286 }
287 }
288 mutex_exit(&pmaps_lock);
289
290 for (i = 0; i < PDP_SIZE; i++) {
291 xpq_queue_pte_update(
292 xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
293 xpmap_ptom(pmap_kernel()->pm_pdirpa[i]) | PTE_P);
294 }
295 }
296
297 /*
298 * Unmap recursive entries found in pmaps. Required during Xen
299 * save/restore operations, as Xen does not handle recursive mappings
300 * properly.
301 */
302 void
pmap_unmap_recursive_entries(void)303 pmap_unmap_recursive_entries(void)
304 {
305 int i;
306 struct pmap *pm;
307
308 /*
309 * Invalidate pmap_pdp_cache as it contains L2-pinned objects with
310 * recursive entries.
311 * XXX jym@ : find a way to drain per-CPU caches to. pool_cache_inv
312 * does not do that.
313 */
314 pool_cache_invalidate(&pmap_cache);
315
316 mutex_enter(&pmaps_lock);
317 LIST_FOREACH(pm, &pmaps, pm_list) {
318 for (i = 0; i < PDP_SIZE; i++) {
319 xpq_queue_pte_update(
320 xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 0);
321 }
322 }
323 mutex_exit(&pmaps_lock);
324
325 /* do it for pmap_kernel() too! */
326 for (i = 0; i < PDP_SIZE; i++) {
327 xpq_queue_pte_update(
328 xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
329 0);
330 }
331 }
332
333 static __inline void
pmap_kpm_setpte(struct cpu_info * ci,struct pmap * pmap,int index)334 pmap_kpm_setpte(struct cpu_info *ci, struct pmap *pmap, int index)
335 {
336 KASSERT(mutex_owned(&pmap->pm_lock));
337 KASSERT(mutex_owned(&ci->ci_kpm_mtx));
338 if (pmap == pmap_kernel()) {
339 KASSERT(index >= PDIR_SLOT_KERN);
340 }
341
342 #ifdef __x86_64__
343 xpq_queue_pte_update(
344 xpmap_ptetomach(&ci->ci_kpm_pdir[index]),
345 pmap->pm_pdir[index]);
346 #else
347 xpq_queue_pte_update(
348 xpmap_ptetomach(&ci->ci_kpm_pdir[l2tol2(index)]),
349 pmap->pm_pdir[index]);
350 #endif
351
352 xpq_flush_queue();
353 }
354
355 /*
356 * Synchronise shadow pdir with the pmap on all cpus on which it is
357 * loaded.
358 */
359 void
xen_kpm_sync(struct pmap * pmap,int index)360 xen_kpm_sync(struct pmap *pmap, int index)
361 {
362 CPU_INFO_ITERATOR cii;
363 struct cpu_info *ci;
364
365 KASSERT(pmap != NULL);
366 KASSERT(kpreempt_disabled());
367
368 pmap_pte_flush();
369
370 for (CPU_INFO_FOREACH(cii, ci)) {
371 if (ci == NULL) {
372 continue;
373 }
374 cpuid_t cid = cpu_index(ci);
375 if (pmap != pmap_kernel() &&
376 !kcpuset_isset(pmap->pm_xen_ptp_cpus, cid))
377 continue;
378
379 /* take the lock and check again */
380 mutex_enter(&ci->ci_kpm_mtx);
381 if (pmap == pmap_kernel() ||
382 kcpuset_isset(pmap->pm_xen_ptp_cpus, cid)) {
383 pmap_kpm_setpte(ci, pmap, index);
384 }
385 mutex_exit(&ci->ci_kpm_mtx);
386 }
387 }
388
389 int
x86_select_freelist(uint64_t maxaddr)390 x86_select_freelist(uint64_t maxaddr)
391 {
392 return VM_FREELIST_DEFAULT;
393 }
394