1*3446Smrj /* 2*3446Smrj * CDDL HEADER START 3*3446Smrj * 4*3446Smrj * The contents of this file are subject to the terms of the 5*3446Smrj * Common Development and Distribution License (the "License"). 6*3446Smrj * You may not use this file except in compliance with the License. 7*3446Smrj * 8*3446Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*3446Smrj * or http://www.opensolaris.org/os/licensing. 10*3446Smrj * See the License for the specific language governing permissions 11*3446Smrj * and limitations under the License. 12*3446Smrj * 13*3446Smrj * When distributing Covered Code, include this CDDL HEADER in each 14*3446Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*3446Smrj * If applicable, add the following below this CDDL HEADER, with the 16*3446Smrj * fields enclosed by brackets "[]" replaced with your own identifying 17*3446Smrj * information: Portions Copyright [yyyy] [name of copyright owner] 18*3446Smrj * 19*3446Smrj * CDDL HEADER END 20*3446Smrj */ 21*3446Smrj 22*3446Smrj /* 23*3446Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24*3446Smrj * Use is subject to license terms. 25*3446Smrj */ 26*3446Smrj 27*3446Smrj #pragma ident "%Z%%M% %I% %E% SMI" 28*3446Smrj 29*3446Smrj #include <sys/types.h> 30*3446Smrj #include <sys/systm.h> 31*3446Smrj #include <sys/archsystm.h> 32*3446Smrj #include <sys/debug.h> 33*3446Smrj #include <sys/bootconf.h> 34*3446Smrj #include <sys/bootsvcs.h> 35*3446Smrj #include <sys/bootinfo.h> 36*3446Smrj #include <sys/mman.h> 37*3446Smrj #include <sys/cmn_err.h> 38*3446Smrj #include <sys/param.h> 39*3446Smrj #include <sys/machparam.h> 40*3446Smrj #include <sys/machsystm.h> 41*3446Smrj #include <sys/promif.h> 42*3446Smrj #include <sys/kobj.h> 43*3446Smrj #include <vm/kboot_mmu.h> 44*3446Smrj #include <vm/hat_pte.h> 45*3446Smrj #include <vm/hat_i86.h> 46*3446Smrj #include <vm/seg_kmem.h> 47*3446Smrj 48*3446Smrj #if 0 49*3446Smrj /* 50*3446Smrj * Joe's debug printing 51*3446Smrj */ 52*3446Smrj #define DBG(x) \ 53*3446Smrj bop_printf(NULL, "boot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x)); 54*3446Smrj #else 55*3446Smrj #define DBG(x) /* naught */ 56*3446Smrj #endif 57*3446Smrj 58*3446Smrj /* 59*3446Smrj * Page table and memory stuff. 60*3446Smrj */ 61*3446Smrj static caddr_t window; 62*3446Smrj static caddr_t pte_to_window; 63*3446Smrj 64*3446Smrj /* 65*3446Smrj * this are needed by mmu_init() 66*3446Smrj */ 67*3446Smrj int kbm_nx_support = 0; /* NX bit in PTEs is in use */ 68*3446Smrj int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */ 69*3446Smrj int kbm_pge_support = 0; /* PGE is Page table global bit enabled */ 70*3446Smrj int kbm_largepage_support = 0; 71*3446Smrj uint_t kbm_nucleus_size = 0; 72*3446Smrj 73*3446Smrj #define BOOT_SHIFT(l) (shift_amt[l]) 74*3446Smrj #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l)) 75*3446Smrj #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1) 76*3446Smrj #define BOOT_MASK(l) (~BOOT_OFFSET(l)) 77*3446Smrj 78*3446Smrj /* 79*3446Smrj * Initialize memory management parameters for boot time page table management 80*3446Smrj */ 81*3446Smrj void 82*3446Smrj kbm_init(struct xboot_info *bi) 83*3446Smrj { 84*3446Smrj /* 85*3446Smrj * configure mmu information 86*3446Smrj */ 87*3446Smrj kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size; 88*3446Smrj kbm_largepage_support = bi->bi_use_largepage; 89*3446Smrj kbm_nx_support = bi->bi_use_nx; 90*3446Smrj kbm_pae_support = bi->bi_use_pae; 91*3446Smrj kbm_pge_support = bi->bi_use_pge; 92*3446Smrj window = bi->bi_pt_window; 93*3446Smrj DBG(window); 94*3446Smrj pte_to_window = bi->bi_pte_to_pt_window; 95*3446Smrj DBG(pte_to_window); 96*3446Smrj if (kbm_pae_support) { 97*3446Smrj shift_amt = shift_amt_pae; 98*3446Smrj ptes_per_table = 512; 99*3446Smrj pte_size = 8; 100*3446Smrj lpagesize = TWO_MEG; 101*3446Smrj #ifdef __amd64 102*3446Smrj top_level = 3; 103*3446Smrj #else 104*3446Smrj top_level = 2; 105*3446Smrj #endif 106*3446Smrj } else { 107*3446Smrj shift_amt = shift_amt_nopae; 108*3446Smrj ptes_per_table = 1024; 109*3446Smrj pte_size = 4; 110*3446Smrj lpagesize = FOUR_MEG; 111*3446Smrj top_level = 1; 112*3446Smrj } 113*3446Smrj 114*3446Smrj top_page_table = bi->bi_top_page_table; 115*3446Smrj DBG(top_page_table); 116*3446Smrj } 117*3446Smrj 118*3446Smrj /* 119*3446Smrj * Change the addressible page table window to point at a given page 120*3446Smrj */ 121*3446Smrj /*ARGSUSED*/ 122*3446Smrj void * 123*3446Smrj kbm_remap_window(paddr_t physaddr, int writeable) 124*3446Smrj { 125*3446Smrj uint_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE; 126*3446Smrj 127*3446Smrj DBG(physaddr); 128*3446Smrj 129*3446Smrj if (kbm_pae_support) 130*3446Smrj *((x86pte_t *)pte_to_window) = physaddr | pt_bits; 131*3446Smrj else 132*3446Smrj *((x86pte32_t *)pte_to_window) = physaddr | pt_bits; 133*3446Smrj mmu_tlbflush_entry(window); 134*3446Smrj DBG(window); 135*3446Smrj return (window); 136*3446Smrj } 137*3446Smrj 138*3446Smrj /* 139*3446Smrj * Add a mapping for the physical page at the given virtual address. 140*3446Smrj */ 141*3446Smrj void 142*3446Smrj kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel) 143*3446Smrj { 144*3446Smrj x86pte_t *ptep; 145*3446Smrj paddr_t pte_physaddr; 146*3446Smrj x86pte_t pteval; 147*3446Smrj 148*3446Smrj if (khat_running) 149*3446Smrj panic("kbm_map() called too late"); 150*3446Smrj 151*3446Smrj pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE; 152*3446Smrj if (level == 1) 153*3446Smrj pteval |= PT_PAGESIZE; 154*3446Smrj if (kbm_pge_support && is_kernel) 155*3446Smrj pteval |= PT_GLOBAL; 156*3446Smrj 157*3446Smrj /* 158*3446Smrj * Find the pte that will map this address. This creates any 159*3446Smrj * missing intermediate level page tables. 160*3446Smrj */ 161*3446Smrj ptep = find_pte(va, &pte_physaddr, level, 0); 162*3446Smrj if (ptep == NULL) 163*3446Smrj bop_panic("kbm_map: find_pte returned NULL"); 164*3446Smrj 165*3446Smrj if (kbm_pae_support) 166*3446Smrj *ptep = pteval; 167*3446Smrj else 168*3446Smrj *((x86pte32_t *)ptep) = pteval; 169*3446Smrj mmu_tlbflush_entry((caddr_t)va); 170*3446Smrj } 171*3446Smrj 172*3446Smrj /* 173*3446Smrj * Probe the boot time page tables to find the first mapping 174*3446Smrj * including va (or higher) and return non-zero if one is found. 175*3446Smrj * va is updated to the starting address and len to the pagesize. 176*3446Smrj * pp will be set to point to the 1st page_t of the mapped page(s). 177*3446Smrj * 178*3446Smrj * Note that if va is in the middle of a large page, the returned va 179*3446Smrj * will be less than what was asked for. 180*3446Smrj */ 181*3446Smrj int 182*3446Smrj kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot) 183*3446Smrj { 184*3446Smrj uintptr_t probe_va; 185*3446Smrj x86pte_t *ptep; 186*3446Smrj paddr_t pte_physaddr; 187*3446Smrj x86pte_t pte_val; 188*3446Smrj level_t l; 189*3446Smrj 190*3446Smrj if (khat_running) 191*3446Smrj panic("kbm_probe() called too late"); 192*3446Smrj *len = 0; 193*3446Smrj *pfn = PFN_INVALID; 194*3446Smrj *prot = 0; 195*3446Smrj probe_va = *va; 196*3446Smrj restart_new_va: 197*3446Smrj l = top_level; 198*3446Smrj for (;;) { 199*3446Smrj if (IN_VA_HOLE(probe_va)) 200*3446Smrj probe_va = mmu.hole_end; 201*3446Smrj 202*3446Smrj if (IN_HYPERVISOR_VA(probe_va)) 203*3446Smrj return (0); 204*3446Smrj 205*3446Smrj /* 206*3446Smrj * If we don't have a valid PTP/PTE at this level 207*3446Smrj * then we can bump VA by this level's pagesize and try again. 208*3446Smrj * When the probe_va wraps around, we are done. 209*3446Smrj */ 210*3446Smrj ptep = find_pte(probe_va, &pte_physaddr, l, 1); 211*3446Smrj if (ptep == NULL) 212*3446Smrj bop_panic("kbm_probe: find_pte returned NULL"); 213*3446Smrj if (kbm_pae_support) 214*3446Smrj pte_val = *ptep; 215*3446Smrj else 216*3446Smrj pte_val = *((x86pte32_t *)ptep); 217*3446Smrj if (!PTE_ISVALID(pte_val)) { 218*3446Smrj probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l); 219*3446Smrj if (probe_va <= *va) 220*3446Smrj return (0); 221*3446Smrj goto restart_new_va; 222*3446Smrj } 223*3446Smrj 224*3446Smrj /* 225*3446Smrj * If this entry is a pointer to a lower level page table 226*3446Smrj * go down to it. 227*3446Smrj */ 228*3446Smrj if (!PTE_ISPAGE(pte_val, l)) { 229*3446Smrj ASSERT(l > 0); 230*3446Smrj --l; 231*3446Smrj continue; 232*3446Smrj } 233*3446Smrj 234*3446Smrj /* 235*3446Smrj * We found a boot level page table entry 236*3446Smrj */ 237*3446Smrj *len = BOOT_SZ(l); 238*3446Smrj *va = probe_va & ~(*len - 1); 239*3446Smrj *pfn = PTE2PFN(pte_val, l); 240*3446Smrj 241*3446Smrj 242*3446Smrj *prot = PROT_READ | PROT_EXEC; 243*3446Smrj if (PTE_GET(pte_val, PT_WRITABLE)) 244*3446Smrj *prot |= PROT_WRITE; 245*3446Smrj 246*3446Smrj /* 247*3446Smrj * pt_nx is cleared if processor doesn't support NX bit 248*3446Smrj */ 249*3446Smrj if (PTE_GET(pte_val, mmu.pt_nx)) 250*3446Smrj *prot &= ~PROT_EXEC; 251*3446Smrj 252*3446Smrj return (1); 253*3446Smrj } 254*3446Smrj } 255*3446Smrj 256*3446Smrj 257*3446Smrj /* 258*3446Smrj * Destroy a boot loader page table 4K mapping. 259*3446Smrj */ 260*3446Smrj void 261*3446Smrj kbm_unmap(uintptr_t va) 262*3446Smrj { 263*3446Smrj if (khat_running) 264*3446Smrj panic("kbm_unmap() called too late"); 265*3446Smrj else { 266*3446Smrj x86pte_t *ptep; 267*3446Smrj level_t level = 0; 268*3446Smrj uint_t probe_only = 1; 269*3446Smrj 270*3446Smrj ptep = find_pte(va, NULL, level, probe_only); 271*3446Smrj if (ptep == NULL) 272*3446Smrj return; 273*3446Smrj 274*3446Smrj if (kbm_pae_support) 275*3446Smrj *ptep = 0; 276*3446Smrj else 277*3446Smrj *((x86pte32_t *)ptep) = 0; 278*3446Smrj mmu_tlbflush_entry((caddr_t)va); 279*3446Smrj } 280*3446Smrj } 281*3446Smrj 282*3446Smrj 283*3446Smrj /* 284*3446Smrj * Change a boot loader page table 4K mapping. 285*3446Smrj * Returns the pfn of the old mapping. 286*3446Smrj */ 287*3446Smrj pfn_t 288*3446Smrj kbm_remap(uintptr_t va, pfn_t pfn) 289*3446Smrj { 290*3446Smrj x86pte_t *ptep; 291*3446Smrj level_t level = 0; 292*3446Smrj uint_t probe_only = 1; 293*3446Smrj x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE | 294*3446Smrj PT_NOCONSIST | PT_VALID; 295*3446Smrj x86pte_t old_pte; 296*3446Smrj 297*3446Smrj if (khat_running) 298*3446Smrj panic("kbm_remap() called too late"); 299*3446Smrj ptep = find_pte(va, NULL, level, probe_only); 300*3446Smrj if (ptep == NULL) 301*3446Smrj bop_panic("kbm_remap: find_pte returned NULL"); 302*3446Smrj 303*3446Smrj if (kbm_pae_support) 304*3446Smrj old_pte = *ptep; 305*3446Smrj else 306*3446Smrj old_pte = *((x86pte32_t *)ptep); 307*3446Smrj 308*3446Smrj if (kbm_pae_support) 309*3446Smrj *((x86pte_t *)ptep) = pte_val; 310*3446Smrj else 311*3446Smrj *((x86pte32_t *)ptep) = pte_val; 312*3446Smrj mmu_tlbflush_entry((caddr_t)va); 313*3446Smrj 314*3446Smrj if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1) 315*3446Smrj return (PFN_INVALID); 316*3446Smrj return (mmu_btop(ma_to_pa(old_pte))); 317*3446Smrj } 318*3446Smrj 319*3446Smrj 320*3446Smrj /* 321*3446Smrj * Change a boot loader page table 4K mapping to read only. 322*3446Smrj */ 323*3446Smrj void 324*3446Smrj kbm_read_only(uintptr_t va, paddr_t pa) 325*3446Smrj { 326*3446Smrj x86pte_t pte_val = pa_to_ma(pa) | 327*3446Smrj PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID; 328*3446Smrj x86pte_t *ptep; 329*3446Smrj level_t level = 0; 330*3446Smrj 331*3446Smrj ptep = find_pte(va, NULL, level, 0); 332*3446Smrj if (ptep == NULL) 333*3446Smrj bop_panic("kbm_read_only: find_pte returned NULL"); 334*3446Smrj 335*3446Smrj if (kbm_pae_support) 336*3446Smrj *ptep = pte_val; 337*3446Smrj else 338*3446Smrj *((x86pte32_t *)ptep) = pte_val; 339*3446Smrj mmu_tlbflush_entry((caddr_t)va); 340*3446Smrj } 341*3446Smrj 342*3446Smrj /* 343*3446Smrj * interfaces for kernel debugger to access physical memory 344*3446Smrj */ 345*3446Smrj static x86pte_t save_pte; 346*3446Smrj 347*3446Smrj void * 348*3446Smrj kbm_push(paddr_t pa) 349*3446Smrj { 350*3446Smrj static int first_time = 1; 351*3446Smrj 352*3446Smrj if (first_time) { 353*3446Smrj first_time = 0; 354*3446Smrj return (window); 355*3446Smrj } 356*3446Smrj 357*3446Smrj if (kbm_pae_support) 358*3446Smrj save_pte = *((x86pte_t *)pte_to_window); 359*3446Smrj else 360*3446Smrj save_pte = *((x86pte32_t *)pte_to_window); 361*3446Smrj return (kbm_remap_window(pa, 0)); 362*3446Smrj } 363*3446Smrj 364*3446Smrj void 365*3446Smrj kbm_pop(void) 366*3446Smrj { 367*3446Smrj if (kbm_pae_support) 368*3446Smrj *((x86pte_t *)pte_to_window) = save_pte; 369*3446Smrj else 370*3446Smrj *((x86pte32_t *)pte_to_window) = save_pte; 371*3446Smrj mmu_tlbflush_entry(window); 372*3446Smrj } 373*3446Smrj 374*3446Smrj x86pte_t 375*3446Smrj get_pteval(paddr_t table, uint_t index) 376*3446Smrj { 377*3446Smrj void *table_ptr = kbm_remap_window(table, 0); 378*3446Smrj 379*3446Smrj if (kbm_pae_support) 380*3446Smrj return (((x86pte_t *)table_ptr)[index]); 381*3446Smrj return (((x86pte32_t *)table_ptr)[index]); 382*3446Smrj } 383*3446Smrj 384*3446Smrj void 385*3446Smrj set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 386*3446Smrj { 387*3446Smrj void *table_ptr = kbm_remap_window(table, 0); 388*3446Smrj if (kbm_pae_support) 389*3446Smrj ((x86pte_t *)table_ptr)[index] = pteval; 390*3446Smrj else 391*3446Smrj ((x86pte32_t *)table_ptr)[index] = pteval; 392*3446Smrj if (level == top_level && level == 2) 393*3446Smrj reload_cr3(); 394*3446Smrj } 395*3446Smrj 396*3446Smrj paddr_t 397*3446Smrj make_ptable(x86pte_t *pteval, uint_t level) 398*3446Smrj { 399*3446Smrj paddr_t new_table; 400*3446Smrj void *table_ptr; 401*3446Smrj 402*3446Smrj new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE); 403*3446Smrj table_ptr = kbm_remap_window(new_table, 1); 404*3446Smrj bzero(table_ptr, MMU_PAGESIZE); 405*3446Smrj 406*3446Smrj if (level == top_level && level == 2) 407*3446Smrj *pteval = pa_to_ma(new_table) | PT_VALID; 408*3446Smrj else 409*3446Smrj *pteval = pa_to_ma(new_table) | 410*3446Smrj PT_VALID | PT_REF | PT_USER | PT_WRITABLE; 411*3446Smrj 412*3446Smrj return (new_table); 413*3446Smrj } 414*3446Smrj 415*3446Smrj x86pte_t * 416*3446Smrj map_pte(paddr_t table, uint_t index) 417*3446Smrj { 418*3446Smrj void *table_ptr = kbm_remap_window(table, 0); 419*3446Smrj return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size)); 420*3446Smrj } 421