xref: /netbsd-src/sys/arch/xen/x86/xen_pmap.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: xen_pmap.c,v 1.22 2012/06/24 18:31:53 jym Exp $	*/
2 
3 /*
4  * Copyright (c) 2007 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
30  *
31  * Permission to use, copy, modify, and distribute this software for any
32  * purpose with or without fee is hereby granted, provided that the above
33  * copyright notice and this permission notice appear in all copies.
34  *
35  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
36  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
37  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
38  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
39  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
40  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
41  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
42  */
43 
44 /*
45  * Copyright (c) 1997 Charles D. Cranor and Washington University.
46  * All rights reserved.
47  *
48  * Redistribution and use in source and binary forms, with or without
49  * modification, are permitted provided that the following conditions
50  * are met:
51  * 1. Redistributions of source code must retain the above copyright
52  *    notice, this list of conditions and the following disclaimer.
53  * 2. Redistributions in binary form must reproduce the above copyright
54  *    notice, this list of conditions and the following disclaimer in the
55  *    documentation and/or other materials provided with the distribution.
56  *
57  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
58  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
59  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
60  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
61  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
62  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
63  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
65  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
66  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 /*
70  * Copyright 2001 (c) Wasabi Systems, Inc.
71  * All rights reserved.
72  *
73  * Written by Frank van der Linden for Wasabi Systems, Inc.
74  *
75  * Redistribution and use in source and binary forms, with or without
76  * modification, are permitted provided that the following conditions
77  * are met:
78  * 1. Redistributions of source code must retain the above copyright
79  *    notice, this list of conditions and the following disclaimer.
80  * 2. Redistributions in binary form must reproduce the above copyright
81  *    notice, this list of conditions and the following disclaimer in the
82  *    documentation and/or other materials provided with the distribution.
83  * 3. All advertising materials mentioning features or use of this software
84  *    must display the following acknowledgement:
85  *      This product includes software developed for the NetBSD Project by
86  *      Wasabi Systems, Inc.
87  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
88  *    or promote products derived from this software without specific prior
89  *    written permission.
90  *
91  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
93  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
94  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
95  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
96  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
97  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
98  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
99  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
100  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
101  * POSSIBILITY OF SUCH DAMAGE.
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.22 2012/06/24 18:31:53 jym Exp $");
106 
107 #include "opt_user_ldt.h"
108 #include "opt_lockdebug.h"
109 #include "opt_multiprocessor.h"
110 #include "opt_xen.h"
111 #if !defined(__x86_64__)
112 #include "opt_kstack_dr0.h"
113 #endif /* !defined(__x86_64__) */
114 
115 #include <sys/param.h>
116 #include <sys/systm.h>
117 #include <sys/proc.h>
118 #include <sys/pool.h>
119 #include <sys/kernel.h>
120 #include <sys/atomic.h>
121 #include <sys/cpu.h>
122 #include <sys/intr.h>
123 #include <sys/xcall.h>
124 
125 #include <uvm/uvm.h>
126 
127 #include <dev/isa/isareg.h>
128 
129 #include <machine/specialreg.h>
130 #include <machine/gdt.h>
131 #include <machine/isa_machdep.h>
132 #include <machine/cpuvar.h>
133 
134 #include <x86/pmap.h>
135 #include <x86/pmap_pv.h>
136 
137 #include <x86/i82489reg.h>
138 #include <x86/i82489var.h>
139 
140 #include <xen/xen-public/xen.h>
141 #include <xen/hypervisor.h>
142 #include <xen/xenpmap.h>
143 
144 #define COUNT(x)	/* nothing */
145 
146 extern pd_entry_t * const normal_pdes[];
147 
148 extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
149 extern paddr_t pmap_pa_end;   /* PA of last physical page for this domain */
150 
151 int
152 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
153 {
154         paddr_t ma;
155 
156 	if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) {
157 		ma = pa; /* XXX hack */
158 	} else {
159 		ma = xpmap_ptom(pa);
160 	}
161 
162 	return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF);
163 }
164 
165 /*
166  * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
167  *
168  * => no need to lock anything, assume va is already allocated
169  * => should be faster than normal pmap enter function
170  * => we expect a MACHINE address
171  */
172 
173 void
174 pmap_kenter_ma(vaddr_t va, paddr_t ma, vm_prot_t prot, u_int flags)
175 {
176 	pt_entry_t *pte, opte, npte;
177 
178 	if (va < VM_MIN_KERNEL_ADDRESS)
179 		pte = vtopte(va);
180 	else
181 		pte = kvtopte(va);
182 
183 	npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
184 	     PG_V | PG_k;
185 	if (flags & PMAP_NOCACHE)
186 		npte |= PG_N;
187 
188 	if ((cpu_feature[2] & CPUID_NOX) && !(prot & VM_PROT_EXECUTE))
189 		npte |= PG_NX;
190 
191 	opte = pmap_pte_testset (pte, npte); /* zap! */
192 
193 	if (pmap_valid_entry(opte)) {
194 #if defined(MULTIPROCESSOR)
195 		kpreempt_disable();
196 		pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
197 		kpreempt_enable();
198 #else
199 		/* Don't bother deferring in the single CPU case. */
200 		pmap_update_pg(va);
201 #endif
202 	}
203 }
204 
205 /*
206  * pmap_extract_ma: extract a MA for the given VA
207  */
208 
209 bool
210 pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap)
211 {
212 	pt_entry_t *ptes, pte;
213 	pd_entry_t pde;
214 	pd_entry_t * const *pdes;
215 	struct pmap *pmap2;
216 
217 	kpreempt_disable();
218 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
219 	if (!pmap_pdes_valid(va, pdes, &pde)) {
220 		pmap_unmap_ptes(pmap, pmap2);
221 		kpreempt_enable();
222 		return false;
223 	}
224 
225 	pte = ptes[pl1_i(va)];
226 	pmap_unmap_ptes(pmap, pmap2);
227 	kpreempt_enable();
228 
229 	if (__predict_true((pte & PG_V) != 0)) {
230 		if (pap != NULL)
231 			*pap = (pte & PG_FRAME) | (va & (NBPD_L1 - 1));
232 		return true;
233 	}
234 
235 	return false;
236 }
237 
238 /*
239  * Xen pmap's handlers for save/restore
240  */
241 void
242 pmap_xen_suspend(void)
243 {
244 	pmap_unmap_recursive_entries();
245 
246 	xpq_flush_queue();
247 }
248 
249 void
250 pmap_xen_resume(void)
251 {
252 	pmap_map_recursive_entries();
253 
254 	xpq_flush_queue();
255 }
256 
257 /*
258  * NetBSD uses L2 shadow pages to support PAE with Xen. However, Xen does not
259  * handle them correctly during save/restore, leading to incorrect page
260  * tracking and pinning during restore.
261  * For save/restore to succeed, two functions are introduced:
262  * - pmap_map_recursive_entries(), used by resume code to set the recursive
263  *   mapping entries to their correct value
264  * - pmap_unmap_recursive_entries(), used by suspend code to clear all
265  *   PDIR_SLOT_PTE entries
266  */
267 void
268 pmap_map_recursive_entries(void)
269 {
270 	int i;
271 	struct pmap *pm;
272 
273 	mutex_enter(&pmaps_lock);
274 	LIST_FOREACH(pm, &pmaps, pm_list) {
275 		for (i = 0; i < PDP_SIZE; i++) {
276 			xpq_queue_pte_update(
277 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)),
278 			    xpmap_ptom((pm)->pm_pdirpa[i]) | PG_V);
279 		}
280 	}
281 	mutex_exit(&pmaps_lock);
282 
283 	for (i = 0; i < PDP_SIZE; i++) {
284 		xpq_queue_pte_update(
285 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
286 		    xpmap_ptom(pmap_kernel()->pm_pdirpa[i]) | PG_V);
287 	}
288 }
289 
290 /*
291  * Unmap recursive entries found in pmaps. Required during Xen
292  * save/restore operations, as Xen does not handle recursive mappings
293  * properly.
294  */
295 void
296 pmap_unmap_recursive_entries(void)
297 {
298 	int i;
299 	struct pmap *pm;
300 
301 	/*
302 	 * Invalidate pmap_pdp_cache as it contains L2-pinned objects with
303 	 * recursive entries.
304 	 * XXX jym@ : find a way to drain per-CPU caches to. pool_cache_inv
305 	 * does not do that.
306 	 */
307 	pool_cache_invalidate(&pmap_pdp_cache);
308 
309 	mutex_enter(&pmaps_lock);
310 	LIST_FOREACH(pm, &pmaps, pm_list) {
311 		for (i = 0; i < PDP_SIZE; i++) {
312 			xpq_queue_pte_update(
313 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 0);
314 		}
315 	}
316 	mutex_exit(&pmaps_lock);
317 
318 	/* do it for pmap_kernel() too! */
319 	for (i = 0; i < PDP_SIZE; i++) {
320 		xpq_queue_pte_update(
321 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
322 		    0);
323 	}
324 }
325 
326 #if defined(PAE) || defined(__x86_64__)
327 
328 static __inline void
329 pmap_kpm_setpte(struct cpu_info *ci, struct pmap *pmap, int index)
330 {
331 	KASSERT(mutex_owned(pmap->pm_lock));
332 	KASSERT(mutex_owned(&ci->ci_kpm_mtx));
333 	if (pmap == pmap_kernel()) {
334 		KASSERT(index >= PDIR_SLOT_KERN);
335 	}
336 #ifdef PAE
337 	xpq_queue_pte_update(
338 		xpmap_ptetomach(&ci->ci_kpm_pdir[l2tol2(index)]),
339 		pmap->pm_pdir[index]);
340 #elif defined(__x86_64__)
341 	xpq_queue_pte_update(
342 		xpmap_ptetomach(&ci->ci_kpm_pdir[index]),
343 		pmap->pm_pdir[index]);
344 #endif /* PAE */
345 	xpq_flush_queue();
346 }
347 
348 /*
349  * Synchronise shadow pdir with the pmap on all cpus on which it is
350  * loaded.
351  */
352 void
353 xen_kpm_sync(struct pmap *pmap, int index)
354 {
355 	CPU_INFO_ITERATOR cii;
356 	struct cpu_info *ci;
357 
358 	KASSERT(pmap != NULL);
359 	KASSERT(kpreempt_disabled());
360 
361 	pmap_pte_flush();
362 
363 	for (CPU_INFO_FOREACH(cii, ci)) {
364 		if (ci == NULL) {
365 			continue;
366 		}
367 		cpuid_t cid = cpu_index(ci);
368 		if (pmap != pmap_kernel() &&
369 		    !kcpuset_isset(pmap->pm_xen_ptp_cpus, cid))
370 			continue;
371 
372 		/* take the lock and check again */
373 		mutex_enter(&ci->ci_kpm_mtx);
374 		if (pmap == pmap_kernel() ||
375 		    kcpuset_isset(pmap->pm_xen_ptp_cpus, cid)) {
376 			pmap_kpm_setpte(ci, pmap, index);
377 		}
378 		mutex_exit(&ci->ci_kpm_mtx);
379 	}
380 }
381 
382 #endif /* PAE || __x86_64__ */
383