1 /* $NetBSD: xen_pmap.c,v 1.8 2011/11/08 17:16:52 cherry Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 /* 29 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 30 * 31 * Permission to use, copy, modify, and distribute this software for any 32 * purpose with or without fee is hereby granted, provided that the above 33 * copyright notice and this permission notice appear in all copies. 34 * 35 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 36 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 37 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 38 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 39 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 40 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 41 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 42 */ 43 44 /* 45 * Copyright (c) 1997 Charles D. Cranor and Washington University. 46 * All rights reserved. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 58 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 59 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 60 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 61 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 62 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 63 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 64 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 65 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 66 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright 2001 (c) Wasabi Systems, Inc. 71 * All rights reserved. 72 * 73 * Written by Frank van der Linden for Wasabi Systems, Inc. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. All advertising materials mentioning features or use of this software 84 * must display the following acknowledgement: 85 * This product includes software developed for the NetBSD Project by 86 * Wasabi Systems, Inc. 87 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 88 * or promote products derived from this software without specific prior 89 * written permission. 90 * 91 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 93 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 94 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 95 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 96 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 97 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 98 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 99 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 100 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 101 * POSSIBILITY OF SUCH DAMAGE. 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.8 2011/11/08 17:16:52 cherry Exp $"); 106 107 #include "opt_user_ldt.h" 108 #include "opt_lockdebug.h" 109 #include "opt_multiprocessor.h" 110 #include "opt_xen.h" 111 #if !defined(__x86_64__) 112 #include "opt_kstack_dr0.h" 113 #endif /* !defined(__x86_64__) */ 114 115 #include <sys/param.h> 116 #include <sys/systm.h> 117 #include <sys/proc.h> 118 #include <sys/pool.h> 119 #include <sys/kernel.h> 120 #include <sys/atomic.h> 121 #include <sys/cpu.h> 122 #include <sys/intr.h> 123 #include <sys/xcall.h> 124 125 #include <uvm/uvm.h> 126 127 #include <dev/isa/isareg.h> 128 129 #include <machine/specialreg.h> 130 #include <machine/gdt.h> 131 #include <machine/isa_machdep.h> 132 #include <machine/cpuvar.h> 133 134 #include <x86/pmap.h> 135 #include <x86/pmap_pv.h> 136 137 #include <x86/i82489reg.h> 138 #include <x86/i82489var.h> 139 140 #ifdef XEN 141 #include <xen/xen3-public/xen.h> 142 #include <xen/hypervisor.h> 143 #endif 144 145 #define COUNT(x) /* nothing */ 146 147 static pd_entry_t * const alternate_pdes[] = APDES_INITIALIZER; 148 extern pd_entry_t * const normal_pdes[]; 149 150 extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */ 151 extern paddr_t pmap_pa_end; /* PA of last physical page for this domain */ 152 153 void 154 pmap_apte_flush(struct pmap *pmap) 155 { 156 157 KASSERT(kpreempt_disabled()); 158 159 /* 160 * Flush the APTE mapping from all other CPUs that 161 * are using the pmap we are using (who's APTE space 162 * is the one we've just modified). 163 * 164 * XXXthorpej -- find a way to defer the IPI. 165 */ 166 pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_APTE); 167 pmap_tlb_shootnow(); 168 } 169 170 /* 171 * Unmap the content of APDP PDEs 172 */ 173 void 174 pmap_unmap_apdp(void) 175 { 176 int i; 177 178 for (i = 0; i < PDP_SIZE; i++) { 179 pmap_pte_set(APDP_PDE+i, 0); 180 #if defined (PAE) 181 /* 182 * For PAE, there are two places where alternative recursive 183 * mappings could be found with Xen: 184 * - in the L2 shadow pages 185 * - the "real" L2 kernel page (pmap_kl2pd), which is unique 186 * and static. 187 * We first clear the APDP for the current pmap. As L2 kernel 188 * page is unique, we only need to do it once for all pmaps. 189 */ 190 pmap_pte_set(APDP_PDE_SHADOW+i, 0); 191 #endif 192 } 193 } 194 195 /* 196 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in 197 * 198 * => we lock enough pmaps to keep things locked in 199 * => must be undone with pmap_unmap_ptes before returning 200 */ 201 202 void 203 pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, 204 pd_entry_t **ptepp, pd_entry_t * const **pdeppp) 205 { 206 pd_entry_t opde, npde; 207 struct pmap *ourpmap; 208 struct cpu_info *ci; 209 struct lwp *l; 210 bool iscurrent; 211 uint64_t ncsw; 212 int s; 213 214 /* the kernel's pmap is always accessible */ 215 if (pmap == pmap_kernel()) { 216 *pmap2 = NULL; 217 *ptepp = PTE_BASE; 218 *pdeppp = normal_pdes; 219 return; 220 } 221 KASSERT(kpreempt_disabled()); 222 223 retry: 224 l = curlwp; 225 ncsw = l->l_ncsw; 226 ourpmap = NULL; 227 ci = curcpu(); 228 #if defined(__x86_64__) 229 /* 230 * curmap can only be pmap_kernel so at this point 231 * pmap_is_curpmap is always false 232 */ 233 iscurrent = 0; 234 ourpmap = pmap_kernel(); 235 #else /* __x86_64__*/ 236 if (ci->ci_want_pmapload && 237 vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { 238 pmap_load(); 239 if (l->l_ncsw != ncsw) 240 goto retry; 241 } 242 iscurrent = pmap_is_curpmap(pmap); 243 /* if curpmap then we are always mapped */ 244 if (iscurrent) { 245 mutex_enter(pmap->pm_lock); 246 *pmap2 = NULL; 247 *ptepp = PTE_BASE; 248 *pdeppp = normal_pdes; 249 goto out; 250 } 251 ourpmap = ci->ci_pmap; 252 #endif /* __x86_64__ */ 253 254 /* need to lock both curpmap and pmap: use ordered locking */ 255 pmap_reference(ourpmap); 256 if ((uintptr_t) pmap < (uintptr_t) ourpmap) { 257 mutex_enter(pmap->pm_lock); 258 mutex_enter(ourpmap->pm_lock); 259 } else { 260 mutex_enter(ourpmap->pm_lock); 261 mutex_enter(pmap->pm_lock); 262 } 263 264 if (l->l_ncsw != ncsw) 265 goto unlock_and_retry; 266 267 /* need to load a new alternate pt space into curpmap? */ 268 COUNT(apdp_pde_map); 269 opde = *APDP_PDE; 270 if (!pmap_valid_entry(opde) || 271 pmap_pte2pa(opde) != pmap_pdirpa(pmap, 0)) { 272 int i; 273 s = splvm(); 274 /* Make recursive entry usable in user PGD */ 275 for (i = 0; i < PDP_SIZE; i++) { 276 npde = pmap_pa2pte( 277 pmap_pdirpa(pmap, i * NPDPG)) | PG_k | PG_V; 278 xpq_queue_pte_update( 279 xpmap_ptom(pmap_pdirpa(pmap, PDIR_SLOT_PTE + i)), 280 npde); 281 xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]), 282 npde); 283 #ifdef PAE 284 /* update shadow entry too */ 285 xpq_queue_pte_update( 286 xpmap_ptetomach(&APDP_PDE_SHADOW[i]), npde); 287 #endif /* PAE */ 288 xpq_queue_invlpg( 289 (vaddr_t)&pmap->pm_pdir[PDIR_SLOT_PTE + i]); 290 } 291 if (pmap_valid_entry(opde)) 292 pmap_apte_flush(ourpmap); 293 splx(s); 294 } 295 *pmap2 = ourpmap; 296 *ptepp = APTE_BASE; 297 *pdeppp = alternate_pdes; 298 KASSERT(l->l_ncsw == ncsw); 299 #if !defined(__x86_64__) 300 out: 301 #endif 302 /* 303 * might have blocked, need to retry? 304 */ 305 if (l->l_ncsw != ncsw) { 306 unlock_and_retry: 307 if (ourpmap != NULL) { 308 mutex_exit(ourpmap->pm_lock); 309 pmap_destroy(ourpmap); 310 } 311 mutex_exit(pmap->pm_lock); 312 goto retry; 313 } 314 } 315 316 /* 317 * pmap_unmap_ptes: unlock the PTE mapping of "pmap" 318 */ 319 320 void 321 pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) 322 { 323 324 if (pmap == pmap_kernel()) { 325 return; 326 } 327 KASSERT(kpreempt_disabled()); 328 if (pmap2 == NULL) { 329 mutex_exit(pmap->pm_lock); 330 } else { 331 #if defined(__x86_64__) 332 KASSERT(pmap2 == pmap_kernel()); 333 #else 334 KASSERT(curcpu()->ci_pmap == pmap2); 335 #endif 336 #if defined(MULTIPROCESSOR) 337 pmap_unmap_apdp(); 338 pmap_pte_flush(); 339 pmap_apte_flush(pmap2); 340 #endif /* MULTIPROCESSOR */ 341 COUNT(apdp_pde_unmap); 342 mutex_exit(pmap->pm_lock); 343 mutex_exit(pmap2->pm_lock); 344 pmap_destroy(pmap2); 345 } 346 } 347 348 int 349 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 350 { 351 paddr_t ma; 352 353 if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) { 354 ma = pa; /* XXX hack */ 355 } else { 356 ma = xpmap_ptom(pa); 357 } 358 359 return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF); 360 } 361 362 /* 363 * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking 364 * 365 * => no need to lock anything, assume va is already allocated 366 * => should be faster than normal pmap enter function 367 * => we expect a MACHINE address 368 */ 369 370 void 371 pmap_kenter_ma(vaddr_t va, paddr_t ma, vm_prot_t prot, u_int flags) 372 { 373 pt_entry_t *pte, opte, npte; 374 375 if (va < VM_MIN_KERNEL_ADDRESS) 376 pte = vtopte(va); 377 else 378 pte = kvtopte(va); 379 380 npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) | 381 PG_V | PG_k; 382 if (flags & PMAP_NOCACHE) 383 npte |= PG_N; 384 385 if ((cpu_feature[2] & CPUID_NOX) && !(prot & VM_PROT_EXECUTE)) 386 npte |= PG_NX; 387 388 opte = pmap_pte_testset (pte, npte); /* zap! */ 389 390 if (pmap_valid_entry(opte)) { 391 #if defined(MULTIPROCESSOR) 392 kpreempt_disable(); 393 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); 394 kpreempt_enable(); 395 #else 396 /* Don't bother deferring in the single CPU case. */ 397 pmap_update_pg(va); 398 #endif 399 } 400 } 401 402 /* 403 * pmap_extract_ma: extract a MA for the given VA 404 */ 405 406 bool 407 pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap) 408 { 409 pt_entry_t *ptes, pte; 410 pd_entry_t pde; 411 pd_entry_t * const *pdes; 412 struct pmap *pmap2; 413 414 kpreempt_disable(); 415 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 416 if (!pmap_pdes_valid(va, pdes, &pde)) { 417 pmap_unmap_ptes(pmap, pmap2); 418 kpreempt_enable(); 419 return false; 420 } 421 422 pte = ptes[pl1_i(va)]; 423 pmap_unmap_ptes(pmap, pmap2); 424 kpreempt_enable(); 425 426 if (__predict_true((pte & PG_V) != 0)) { 427 if (pap != NULL) 428 *pap = (pte & PG_FRAME) | (va & (NBPD_L1 - 1)); 429 return true; 430 } 431 432 return false; 433 } 434 435 /* 436 * Flush all APDP entries found in pmaps 437 * Required during Xen save/restore operations, as it does not 438 * handle alternative recursive mappings properly 439 */ 440 void 441 pmap_unmap_all_apdp_pdes(void) 442 { 443 444 int i; 445 int s; 446 struct pmap *pm; 447 448 s = splvm(); 449 450 pmap_unmap_apdp(); 451 452 mutex_enter(&pmaps_lock); 453 /* 454 * Set APDP entries to 0 in all pmaps. 455 * Note that for PAE kernels, this only clears the APDP entries 456 * found in the L2 shadow pages, as pmap_pdirpa() is used to obtain 457 * the PA of the pmap->pm_pdir[] pages (forming the 4 contiguous 458 * pages of PAE PD: 3 for user space, 1 for the L2 kernel shadow page) 459 */ 460 LIST_FOREACH(pm, &pmaps, pm_list) { 461 for (i = 0; i < PDP_SIZE; i++) { 462 xpq_queue_pte_update( 463 xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_APTE + i)), 464 0); 465 } 466 } 467 mutex_exit(&pmaps_lock); 468 469 xpq_flush_queue(); 470 471 splx(s); 472 473 } 474 475 #ifdef PAE 476 /* 477 * NetBSD uses L2 shadow pages to support PAE with Xen. However, Xen does not 478 * handle them correctly during save/restore, leading to incorrect page 479 * tracking and pinning during restore. 480 * For save/restore to succeed, two functions are introduced: 481 * - pmap_map_recursive_entries(), used by resume code to set the recursive 482 * mapping entries to their correct value 483 * - pmap_unmap_recursive_entries(), used by suspend code to clear all 484 * PDIR_SLOT_PTE entries 485 */ 486 void 487 pmap_map_recursive_entries(void) 488 { 489 490 int i; 491 struct pmap *pm; 492 493 mutex_enter(&pmaps_lock); 494 495 LIST_FOREACH(pm, &pmaps, pm_list) { 496 for (i = 0; i < PDP_SIZE; i++) { 497 xpq_queue_pte_update( 498 xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 499 xpmap_ptom((pm)->pm_pdirpa[i]) | PG_V); 500 } 501 } 502 503 mutex_exit(&pmaps_lock); 504 505 for (i = 0; i < PDP_SIZE; i++) { 506 xpq_queue_pte_update( 507 xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)), 508 xpmap_ptom(pmap_kernel()->pm_pdirpa[i]) | PG_V); 509 } 510 511 xpq_flush_queue(); 512 } 513 514 void 515 pmap_unmap_recursive_entries(void) 516 { 517 518 int i; 519 struct pmap *pm; 520 521 pmap_invalidate_pool_caches(); 522 523 mutex_enter(&pmaps_lock); 524 525 LIST_FOREACH(pm, &pmaps, pm_list) { 526 for (i = 0; i < PDP_SIZE; i++) { 527 xpq_queue_pte_update( 528 xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 0); 529 } 530 } 531 532 mutex_exit(&pmaps_lock); 533 534 /* do it for pmap_kernel() too! */ 535 for (i = 0; i < PDP_SIZE; i++) 536 xpq_queue_pte_update( 537 xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)), 538 0); 539 540 xpq_flush_queue(); 541 542 } 543 #endif /* PAE */ 544