13446Smrj /* 23446Smrj * CDDL HEADER START 33446Smrj * 43446Smrj * The contents of this file are subject to the terms of the 53446Smrj * Common Development and Distribution License (the "License"). 63446Smrj * You may not use this file except in compliance with the License. 73446Smrj * 83446Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93446Smrj * or http://www.opensolaris.org/os/licensing. 103446Smrj * See the License for the specific language governing permissions 113446Smrj * and limitations under the License. 123446Smrj * 133446Smrj * When distributing Covered Code, include this CDDL HEADER in each 143446Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153446Smrj * If applicable, add the following below this CDDL HEADER, with the 163446Smrj * fields enclosed by brackets "[]" replaced with your own identifying 173446Smrj * information: Portions Copyright [yyyy] [name of copyright owner] 183446Smrj * 193446Smrj * CDDL HEADER END 203446Smrj */ 213446Smrj 223446Smrj /* 239489SJoe.Bonasera@sun.com * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 243446Smrj * Use is subject to license terms. 253446Smrj */ 263446Smrj 273446Smrj 283446Smrj #include <sys/types.h> 293446Smrj #include <sys/machparam.h> 303446Smrj #include <sys/x86_archext.h> 313446Smrj #include <sys/systm.h> 323446Smrj #include <sys/mach_mmu.h> 335084Sjohnlev #include <sys/multiboot.h> 343446Smrj 355084Sjohnlev #if defined(__xpv) 365084Sjohnlev 375084Sjohnlev #include <sys/hypervisor.h> 385084Sjohnlev uintptr_t xen_virt_start; 395084Sjohnlev pfn_t *mfn_to_pfn_mapping; 405084Sjohnlev 415084Sjohnlev #else /* !__xpv */ 423446Smrj 433446Smrj extern multiboot_header_t mb_header; 443446Smrj extern int have_cpuid(void); 455084Sjohnlev 465084Sjohnlev #endif /* !__xpv */ 473446Smrj 483446Smrj #include <sys/inttypes.h> 493446Smrj #include <sys/bootinfo.h> 503446Smrj #include <sys/mach_mmu.h> 513446Smrj #include <sys/boot_console.h> 523446Smrj 535084Sjohnlev #include "dboot_asm.h" 543446Smrj #include "dboot_printf.h" 553446Smrj #include "dboot_xboot.h" 563446Smrj #include "dboot_elfload.h" 573446Smrj 583446Smrj /* 593446Smrj * This file contains code that runs to transition us from either a multiboot 605084Sjohnlev * compliant loader (32 bit non-paging) or a XPV domain loader to 615084Sjohnlev * regular kernel execution. Its task is to setup the kernel memory image 625084Sjohnlev * and page tables. 633446Smrj * 643446Smrj * The code executes as: 653446Smrj * - 32 bits under GRUB (for 32 or 64 bit Solaris) 665084Sjohnlev * - a 32 bit program for the 32-bit PV hypervisor 675084Sjohnlev * - a 64 bit program for the 64-bit PV hypervisor (at least for now) 683446Smrj * 695084Sjohnlev * Under the PV hypervisor, we must create mappings for any memory beyond the 705084Sjohnlev * initial start of day allocation (such as the kernel itself). 713446Smrj * 725084Sjohnlev * When on the metal, the mapping between maddr_t and paddr_t is 1:1. 733446Smrj * Since we are running in real mode, so all such memory is accessible. 743446Smrj */ 753446Smrj 763446Smrj /* 773446Smrj * Standard bits used in PTE (page level) and PTP (internal levels) 783446Smrj */ 795084Sjohnlev x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER; 805084Sjohnlev x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST; 813446Smrj 823446Smrj /* 833446Smrj * This is the target addresses (physical) where the kernel text and data 845084Sjohnlev * nucleus pages will be unpacked. On the hypervisor this is actually a 855084Sjohnlev * virtual address. 863446Smrj */ 873446Smrj paddr_t ktext_phys; 883446Smrj uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */ 893446Smrj 903446Smrj static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */ 913446Smrj 923446Smrj /* 933446Smrj * The stack is setup in assembler before entering startup_kernel() 943446Smrj */ 953446Smrj char stack_space[STACK_SIZE]; 963446Smrj 973446Smrj /* 983446Smrj * Used to track physical memory allocation 993446Smrj */ 1003446Smrj static paddr_t next_avail_addr = 0; 1013446Smrj 1025084Sjohnlev #if defined(__xpv) 1035084Sjohnlev /* 1045084Sjohnlev * Additional information needed for hypervisor memory allocation. 1055084Sjohnlev * Only memory up to scratch_end is mapped by page tables. 1065084Sjohnlev * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so 1075084Sjohnlev * to derive a pfn from a pointer, you subtract mfn_base. 1085084Sjohnlev */ 1095084Sjohnlev 1105084Sjohnlev static paddr_t scratch_end = 0; /* we can't write all of mem here */ 1115084Sjohnlev static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */ 1125084Sjohnlev start_info_t *xen_info; 1135084Sjohnlev 1145084Sjohnlev #else /* __xpv */ 1155084Sjohnlev 1165084Sjohnlev /* 1175084Sjohnlev * If on the metal, then we have a multiboot loader. 1185084Sjohnlev */ 1193446Smrj multiboot_info_t *mb_info; 1203446Smrj 1215084Sjohnlev #endif /* __xpv */ 1225084Sjohnlev 1233446Smrj /* 1243446Smrj * This contains information passed to the kernel 1253446Smrj */ 1263446Smrj struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */ 1273446Smrj struct xboot_info *bi; 1283446Smrj 1293446Smrj /* 1303446Smrj * Page table and memory stuff. 1313446Smrj */ 1325084Sjohnlev static paddr_t max_mem; /* maximum memory address */ 1333446Smrj 1343446Smrj /* 1353446Smrj * Information about processor MMU 1363446Smrj */ 1373446Smrj int amd64_support = 0; 1383446Smrj int largepage_support = 0; 1393446Smrj int pae_support = 0; 1403446Smrj int pge_support = 0; 1413446Smrj int NX_support = 0; 1423446Smrj 1433446Smrj /* 1443446Smrj * Low 32 bits of kernel entry address passed back to assembler. 1453446Smrj * When running a 64 bit kernel, the high 32 bits are 0xffffffff. 1463446Smrj */ 1473446Smrj uint32_t entry_addr_low; 1483446Smrj 1493446Smrj /* 1503446Smrj * Memlists for the kernel. We shouldn't need a lot of these. 1513446Smrj */ 1523489Sjosephb #define MAX_MEMLIST (50) 1533446Smrj struct boot_memlist memlists[MAX_MEMLIST]; 1543446Smrj uint_t memlists_used = 0; 1553489Sjosephb struct boot_memlist pcimemlists[MAX_MEMLIST]; 1563489Sjosephb uint_t pcimemlists_used = 0; 1579940SVikram.Hegde@Sun.COM struct boot_memlist rsvdmemlists[MAX_MEMLIST]; 1589940SVikram.Hegde@Sun.COM uint_t rsvdmemlists_used = 0; 1593446Smrj 1603446Smrj #define MAX_MODULES (10) 1613446Smrj struct boot_modules modules[MAX_MODULES]; 1623446Smrj uint_t modules_used = 0; 1633446Smrj 1643446Smrj /* 1653446Smrj * Debugging macros 1663446Smrj */ 1673446Smrj uint_t prom_debug = 0; 1683446Smrj uint_t map_debug = 0; 1693446Smrj 1703446Smrj /* 1715084Sjohnlev * Either hypervisor-specific or grub-specific code builds the initial 1725084Sjohnlev * memlists. This code does the sort/merge/link for final use. 1733446Smrj */ 1743446Smrj static void 1753446Smrj sort_physinstall(void) 1763446Smrj { 1773446Smrj int i; 1785084Sjohnlev #if !defined(__xpv) 1793446Smrj int j; 1803446Smrj struct boot_memlist tmp; 1813446Smrj 1823446Smrj /* 1833446Smrj * Now sort the memlists, in case they weren't in order. 1843446Smrj * Yeah, this is a bubble sort; small, simple and easy to get right. 1853446Smrj */ 1863446Smrj DBG_MSG("Sorting phys-installed list\n"); 1873446Smrj for (j = memlists_used - 1; j > 0; --j) { 1883446Smrj for (i = 0; i < j; ++i) { 1893446Smrj if (memlists[i].addr < memlists[i + 1].addr) 1903446Smrj continue; 1913446Smrj tmp = memlists[i]; 1923446Smrj memlists[i] = memlists[i + 1]; 1933446Smrj memlists[i + 1] = tmp; 1943446Smrj } 1953446Smrj } 1963446Smrj 1973446Smrj /* 1983446Smrj * Merge any memlists that don't have holes between them. 1993446Smrj */ 2003446Smrj for (i = 0; i <= memlists_used - 1; ++i) { 2013446Smrj if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr) 2023446Smrj continue; 2033446Smrj 2043446Smrj if (prom_debug) 2053446Smrj dboot_printf( 2063446Smrj "merging mem segs %" PRIx64 "...%" PRIx64 2073446Smrj " w/ %" PRIx64 "...%" PRIx64 "\n", 2083446Smrj memlists[i].addr, 2093446Smrj memlists[i].addr + memlists[i].size, 2103446Smrj memlists[i + 1].addr, 2113446Smrj memlists[i + 1].addr + memlists[i + 1].size); 2123446Smrj 2133446Smrj memlists[i].size += memlists[i + 1].size; 2143446Smrj for (j = i + 1; j < memlists_used - 1; ++j) 2153446Smrj memlists[j] = memlists[j + 1]; 2163446Smrj --memlists_used; 2173446Smrj DBG(memlists_used); 2183446Smrj --i; /* after merging we need to reexamine, so do this */ 2193446Smrj } 2205084Sjohnlev #endif /* __xpv */ 2213446Smrj 2223446Smrj if (prom_debug) { 2233446Smrj dboot_printf("\nFinal memlists:\n"); 2243446Smrj for (i = 0; i < memlists_used; ++i) { 2253446Smrj dboot_printf("\t%d: addr=%" PRIx64 " size=%" 2263446Smrj PRIx64 "\n", i, memlists[i].addr, memlists[i].size); 2273446Smrj } 2283446Smrj } 2293446Smrj 2303446Smrj /* 2313446Smrj * link together the memlists with native size pointers 2323446Smrj */ 2333446Smrj memlists[0].next = 0; 2343446Smrj memlists[0].prev = 0; 2353446Smrj for (i = 1; i < memlists_used; ++i) { 2363446Smrj memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1); 2373446Smrj memlists[i].next = 0; 2383446Smrj memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i); 2393446Smrj } 2403446Smrj bi->bi_phys_install = (native_ptr_t)memlists; 2413446Smrj DBG(bi->bi_phys_install); 2423446Smrj } 2433446Smrj 2449940SVikram.Hegde@Sun.COM /* 2459940SVikram.Hegde@Sun.COM * build bios reserved memlists 2469940SVikram.Hegde@Sun.COM */ 2479940SVikram.Hegde@Sun.COM static void 2489940SVikram.Hegde@Sun.COM build_rsvdmemlists(void) 2499940SVikram.Hegde@Sun.COM { 2509940SVikram.Hegde@Sun.COM int i; 2519940SVikram.Hegde@Sun.COM 2529940SVikram.Hegde@Sun.COM rsvdmemlists[0].next = 0; 2539940SVikram.Hegde@Sun.COM rsvdmemlists[0].prev = 0; 2549940SVikram.Hegde@Sun.COM for (i = 1; i < rsvdmemlists_used; ++i) { 2559940SVikram.Hegde@Sun.COM rsvdmemlists[i].prev = 2569940SVikram.Hegde@Sun.COM (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1); 2579940SVikram.Hegde@Sun.COM rsvdmemlists[i].next = 0; 2589940SVikram.Hegde@Sun.COM rsvdmemlists[i - 1].next = 2599940SVikram.Hegde@Sun.COM (native_ptr_t)(uintptr_t)(rsvdmemlists + i); 2609940SVikram.Hegde@Sun.COM } 2619940SVikram.Hegde@Sun.COM bi->bi_rsvdmem = (native_ptr_t)rsvdmemlists; 2629940SVikram.Hegde@Sun.COM DBG(bi->bi_rsvdmem); 2639940SVikram.Hegde@Sun.COM } 2649940SVikram.Hegde@Sun.COM 2655084Sjohnlev #if defined(__xpv) 2665084Sjohnlev 2675084Sjohnlev /* 2685084Sjohnlev * halt on the hypervisor after a delay to drain console output 2695084Sjohnlev */ 2705084Sjohnlev void 2715084Sjohnlev dboot_halt(void) 2725084Sjohnlev { 2735084Sjohnlev uint_t i = 10000; 2745084Sjohnlev 2755084Sjohnlev while (--i) 2765084Sjohnlev HYPERVISOR_yield(); 2775084Sjohnlev HYPERVISOR_shutdown(SHUTDOWN_poweroff); 2785084Sjohnlev } 2795084Sjohnlev 2805084Sjohnlev /* 2815084Sjohnlev * From a machine address, find the corresponding pseudo-physical address. 2825084Sjohnlev * Pseudo-physical address are contiguous and run from mfn_base in each VM. 2835084Sjohnlev * Machine addresses are the real underlying hardware addresses. 2845084Sjohnlev * These are needed for page table entries. Note that this routine is 2855084Sjohnlev * poorly protected. A bad value of "ma" will cause a page fault. 2865084Sjohnlev */ 2875084Sjohnlev paddr_t 2885084Sjohnlev ma_to_pa(maddr_t ma) 2895084Sjohnlev { 2905084Sjohnlev ulong_t pgoff = ma & MMU_PAGEOFFSET; 2915084Sjohnlev ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)]; 2925084Sjohnlev paddr_t pa; 2935084Sjohnlev 2945084Sjohnlev if (pfn >= xen_info->nr_pages) 2955084Sjohnlev return (-(paddr_t)1); 2965084Sjohnlev pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff; 2975084Sjohnlev #ifdef DEBUG 2985084Sjohnlev if (ma != pa_to_ma(pa)) 2995084Sjohnlev dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", " 3005084Sjohnlev "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa)); 3015084Sjohnlev #endif 3025084Sjohnlev return (pa); 3035084Sjohnlev } 3045084Sjohnlev 3055084Sjohnlev /* 3065084Sjohnlev * From a pseudo-physical address, find the corresponding machine address. 3075084Sjohnlev */ 3085084Sjohnlev maddr_t 3095084Sjohnlev pa_to_ma(paddr_t pa) 3105084Sjohnlev { 3115084Sjohnlev pfn_t pfn; 3125084Sjohnlev ulong_t mfn; 3135084Sjohnlev 3145084Sjohnlev pfn = mmu_btop(pa - mfn_base); 3155084Sjohnlev if (pa < mfn_base || pfn >= xen_info->nr_pages) 3165084Sjohnlev dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa); 3175084Sjohnlev mfn = ((ulong_t *)xen_info->mfn_list)[pfn]; 3185084Sjohnlev #ifdef DEBUG 3195084Sjohnlev if (mfn_to_pfn_mapping[mfn] != pfn) 3205084Sjohnlev dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n", 3215084Sjohnlev pfn, mfn, mfn_to_pfn_mapping[mfn]); 3225084Sjohnlev #endif 3235084Sjohnlev return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET)); 3245084Sjohnlev } 3255084Sjohnlev 3265084Sjohnlev #endif /* __xpv */ 3275084Sjohnlev 3283446Smrj x86pte_t 3293446Smrj get_pteval(paddr_t table, uint_t index) 3303446Smrj { 3313446Smrj if (pae_support) 3323446Smrj return (((x86pte_t *)(uintptr_t)table)[index]); 3333446Smrj return (((x86pte32_t *)(uintptr_t)table)[index]); 3343446Smrj } 3353446Smrj 3363446Smrj /*ARGSUSED*/ 3373446Smrj void 3383446Smrj set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 3393446Smrj { 3405084Sjohnlev #ifdef __xpv 3415084Sjohnlev mmu_update_t t; 3425084Sjohnlev maddr_t mtable = pa_to_ma(table); 3435084Sjohnlev int retcnt; 3445084Sjohnlev 3455084Sjohnlev t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 3465084Sjohnlev t.val = pteval; 3475084Sjohnlev if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 3485084Sjohnlev dboot_panic("HYPERVISOR_mmu_update() failed"); 3495084Sjohnlev #else /* __xpv */ 3503446Smrj uintptr_t tab_addr = (uintptr_t)table; 3513446Smrj 3523446Smrj if (pae_support) 3533446Smrj ((x86pte_t *)tab_addr)[index] = pteval; 3543446Smrj else 3553446Smrj ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval; 3563446Smrj if (level == top_level && level == 2) 3573446Smrj reload_cr3(); 3585084Sjohnlev #endif /* __xpv */ 3593446Smrj } 3603446Smrj 3613446Smrj paddr_t 3623446Smrj make_ptable(x86pte_t *pteval, uint_t level) 3633446Smrj { 3643446Smrj paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 3653446Smrj 3663446Smrj if (level == top_level && level == 2) 3673446Smrj *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID; 3683446Smrj else 3693446Smrj *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits; 3703446Smrj 3715084Sjohnlev #ifdef __xpv 3725084Sjohnlev /* Remove write permission to the new page table. */ 3735084Sjohnlev if (HYPERVISOR_update_va_mapping(new_table, 3745084Sjohnlev *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL)) 3755084Sjohnlev dboot_panic("HYP_update_va_mapping error"); 3765084Sjohnlev #endif 3775084Sjohnlev 3783446Smrj if (map_debug) 3793446Smrj dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%" 3803446Smrj PRIx64 "\n", level, (ulong_t)new_table, *pteval); 3813446Smrj return (new_table); 3823446Smrj } 3833446Smrj 3843446Smrj x86pte_t * 3853446Smrj map_pte(paddr_t table, uint_t index) 3863446Smrj { 3873446Smrj return ((x86pte_t *)(uintptr_t)(table + index * pte_size)); 3883446Smrj } 3893446Smrj 3907656SSherry.Moore@Sun.COM /* 3917656SSherry.Moore@Sun.COM * dump out the contents of page tables... 3927656SSherry.Moore@Sun.COM */ 3937656SSherry.Moore@Sun.COM static void 3947656SSherry.Moore@Sun.COM dump_tables(void) 3957656SSherry.Moore@Sun.COM { 3967656SSherry.Moore@Sun.COM uint_t save_index[4]; /* for recursion */ 3977656SSherry.Moore@Sun.COM char *save_table[4]; /* for recursion */ 3987656SSherry.Moore@Sun.COM uint_t l; 3997656SSherry.Moore@Sun.COM uint64_t va; 4007656SSherry.Moore@Sun.COM uint64_t pgsize; 4017656SSherry.Moore@Sun.COM int index; 4027656SSherry.Moore@Sun.COM int i; 4037656SSherry.Moore@Sun.COM x86pte_t pteval; 4047656SSherry.Moore@Sun.COM char *table; 4057656SSherry.Moore@Sun.COM static char *tablist = "\t\t\t"; 4067656SSherry.Moore@Sun.COM char *tabs = tablist + 3 - top_level; 4077656SSherry.Moore@Sun.COM uint_t pa, pa1; 4085084Sjohnlev #if !defined(__xpv) 4095084Sjohnlev #define maddr_t paddr_t 4105084Sjohnlev #endif /* !__xpv */ 4115084Sjohnlev 4127656SSherry.Moore@Sun.COM dboot_printf("Finished pagetables:\n"); 4137656SSherry.Moore@Sun.COM table = (char *)(uintptr_t)top_page_table; 4147656SSherry.Moore@Sun.COM l = top_level; 4157656SSherry.Moore@Sun.COM va = 0; 4167656SSherry.Moore@Sun.COM for (index = 0; index < ptes_per_table; ++index) { 4177656SSherry.Moore@Sun.COM pgsize = 1ull << shift_amt[l]; 4187656SSherry.Moore@Sun.COM if (pae_support) 4197656SSherry.Moore@Sun.COM pteval = ((x86pte_t *)table)[index]; 4207656SSherry.Moore@Sun.COM else 4217656SSherry.Moore@Sun.COM pteval = ((x86pte32_t *)table)[index]; 4227656SSherry.Moore@Sun.COM if (pteval == 0) 4237656SSherry.Moore@Sun.COM goto next_entry; 4247656SSherry.Moore@Sun.COM 4257656SSherry.Moore@Sun.COM dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64, 4267656SSherry.Moore@Sun.COM tabs + l, table, index, (uint64_t)pteval, va); 4277656SSherry.Moore@Sun.COM pa = ma_to_pa(pteval & MMU_PAGEMASK); 4287656SSherry.Moore@Sun.COM dboot_printf(" physaddr=%x\n", pa); 4297656SSherry.Moore@Sun.COM 4307656SSherry.Moore@Sun.COM /* 4317656SSherry.Moore@Sun.COM * Don't try to walk hypervisor private pagetables 4327656SSherry.Moore@Sun.COM */ 4337656SSherry.Moore@Sun.COM if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) { 4347656SSherry.Moore@Sun.COM save_table[l] = table; 4357656SSherry.Moore@Sun.COM save_index[l] = index; 4367656SSherry.Moore@Sun.COM --l; 4377656SSherry.Moore@Sun.COM index = -1; 4387656SSherry.Moore@Sun.COM table = (char *)(uintptr_t) 4397656SSherry.Moore@Sun.COM ma_to_pa(pteval & MMU_PAGEMASK); 4407656SSherry.Moore@Sun.COM goto recursion; 4417656SSherry.Moore@Sun.COM } 4427656SSherry.Moore@Sun.COM 4437656SSherry.Moore@Sun.COM /* 4447656SSherry.Moore@Sun.COM * shorten dump for consecutive mappings 4457656SSherry.Moore@Sun.COM */ 4467656SSherry.Moore@Sun.COM for (i = 1; index + i < ptes_per_table; ++i) { 4477656SSherry.Moore@Sun.COM if (pae_support) 4487656SSherry.Moore@Sun.COM pteval = ((x86pte_t *)table)[index + i]; 4497656SSherry.Moore@Sun.COM else 4507656SSherry.Moore@Sun.COM pteval = ((x86pte32_t *)table)[index + i]; 4517656SSherry.Moore@Sun.COM if (pteval == 0) 4527656SSherry.Moore@Sun.COM break; 4537656SSherry.Moore@Sun.COM pa1 = ma_to_pa(pteval & MMU_PAGEMASK); 4547656SSherry.Moore@Sun.COM if (pa1 != pa + i * pgsize) 4557656SSherry.Moore@Sun.COM break; 4567656SSherry.Moore@Sun.COM } 4577656SSherry.Moore@Sun.COM if (i > 2) { 4587656SSherry.Moore@Sun.COM dboot_printf("%s...\n", tabs + l); 4597656SSherry.Moore@Sun.COM va += pgsize * (i - 2); 4607656SSherry.Moore@Sun.COM index += i - 2; 4617656SSherry.Moore@Sun.COM } 4627656SSherry.Moore@Sun.COM next_entry: 4637656SSherry.Moore@Sun.COM va += pgsize; 4647656SSherry.Moore@Sun.COM if (l == 3 && index == 256) /* VA hole */ 4657656SSherry.Moore@Sun.COM va = 0xffff800000000000ull; 4667656SSherry.Moore@Sun.COM recursion: 4677656SSherry.Moore@Sun.COM ; 4687656SSherry.Moore@Sun.COM } 4697656SSherry.Moore@Sun.COM if (l < top_level) { 4707656SSherry.Moore@Sun.COM ++l; 4717656SSherry.Moore@Sun.COM index = save_index[l]; 4727656SSherry.Moore@Sun.COM table = save_table[l]; 4737656SSherry.Moore@Sun.COM goto recursion; 4747656SSherry.Moore@Sun.COM } 4757656SSherry.Moore@Sun.COM } 4767656SSherry.Moore@Sun.COM 4773446Smrj /* 4785084Sjohnlev * Add a mapping for the machine page at the given virtual address. 4793446Smrj */ 4803446Smrj static void 4815084Sjohnlev map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level) 4823446Smrj { 4833446Smrj x86pte_t *ptep; 4843446Smrj x86pte_t pteval; 4853446Smrj 4865084Sjohnlev pteval = ma | pte_bits; 4873446Smrj if (level > 0) 4883446Smrj pteval |= PT_PAGESIZE; 4893446Smrj if (va >= target_kernel_text && pge_support) 4903446Smrj pteval |= PT_GLOBAL; 4913446Smrj 4925084Sjohnlev if (map_debug && ma != va) 4935084Sjohnlev dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64 4943446Smrj " pte=0x%" PRIx64 " l=%d\n", 4955084Sjohnlev (uint64_t)ma, (uint64_t)va, pteval, level); 4965084Sjohnlev 4975084Sjohnlev #if defined(__xpv) 4985084Sjohnlev /* 4995084Sjohnlev * see if we can avoid find_pte() on the hypervisor 5005084Sjohnlev */ 5015084Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval, 5025084Sjohnlev UVMF_INVLPG | UVMF_LOCAL) == 0) 5035084Sjohnlev return; 5045084Sjohnlev #endif 5053446Smrj 5063446Smrj /* 5073446Smrj * Find the pte that will map this address. This creates any 5083446Smrj * missing intermediate level page tables 5093446Smrj */ 5103446Smrj ptep = find_pte(va, NULL, level, 0); 5113446Smrj 5123446Smrj /* 5135084Sjohnlev * When paravirtualized, we must use hypervisor calls to modify the 5145084Sjohnlev * PTE, since paging is active. On real hardware we just write to 5155084Sjohnlev * the pagetables which aren't in use yet. 5163446Smrj */ 5175084Sjohnlev #if defined(__xpv) 5185084Sjohnlev ptep = ptep; /* shut lint up */ 5195084Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 5205084Sjohnlev dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64 5215084Sjohnlev " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "", 5225084Sjohnlev (uint64_t)va, level, (uint64_t)ma, pteval); 5235084Sjohnlev #else 5243446Smrj if (va < 1024 * 1024) 5253446Smrj pteval |= PT_NOCACHE; /* for video RAM */ 5263446Smrj if (pae_support) 5273446Smrj *ptep = pteval; 5283446Smrj else 5293446Smrj *((x86pte32_t *)ptep) = (x86pte32_t)pteval; 5305084Sjohnlev #endif 5313446Smrj } 5323446Smrj 5333446Smrj /* 5345084Sjohnlev * Add a mapping for the physical page at the given virtual address. 5353446Smrj */ 5363446Smrj static void 5375084Sjohnlev map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level) 5383446Smrj { 5395084Sjohnlev map_ma_at_va(pa_to_ma(pa), va, level); 5403446Smrj } 5413446Smrj 5423446Smrj /* 5433489Sjosephb * This is called to remove start..end from the 5443489Sjosephb * possible range of PCI addresses. 5453489Sjosephb */ 5463489Sjosephb const uint64_t pci_lo_limit = 0x00100000ul; 5473489Sjosephb const uint64_t pci_hi_limit = 0xfff00000ul; 5483489Sjosephb static void 5493489Sjosephb exclude_from_pci(uint64_t start, uint64_t end) 5503489Sjosephb { 5513489Sjosephb int i; 5523489Sjosephb int j; 5533489Sjosephb struct boot_memlist *ml; 5543489Sjosephb 5553489Sjosephb for (i = 0; i < pcimemlists_used; ++i) { 5563489Sjosephb ml = &pcimemlists[i]; 5573489Sjosephb 5583489Sjosephb /* delete the entire range? */ 5593489Sjosephb if (start <= ml->addr && ml->addr + ml->size <= end) { 5603489Sjosephb --pcimemlists_used; 5613489Sjosephb for (j = i; j < pcimemlists_used; ++j) 5623489Sjosephb pcimemlists[j] = pcimemlists[j + 1]; 5633489Sjosephb --i; /* to revisit the new one at this index */ 5643489Sjosephb } 5653489Sjosephb 5663489Sjosephb /* split a range? */ 5673489Sjosephb else if (ml->addr < start && end < ml->addr + ml->size) { 5683489Sjosephb 5693489Sjosephb ++pcimemlists_used; 5703489Sjosephb if (pcimemlists_used > MAX_MEMLIST) 5713489Sjosephb dboot_panic("too many pcimemlists"); 5723489Sjosephb 5733489Sjosephb for (j = pcimemlists_used - 1; j > i; --j) 5743489Sjosephb pcimemlists[j] = pcimemlists[j - 1]; 5753489Sjosephb ml->size = start - ml->addr; 5763489Sjosephb 5773489Sjosephb ++ml; 5783489Sjosephb ml->size = (ml->addr + ml->size) - end; 5793489Sjosephb ml->addr = end; 5803489Sjosephb ++i; /* skip on to next one */ 5813489Sjosephb } 5823489Sjosephb 5833489Sjosephb /* cut memory off the start? */ 5843489Sjosephb else if (ml->addr < end && end < ml->addr + ml->size) { 5853489Sjosephb ml->size -= end - ml->addr; 5863489Sjosephb ml->addr = end; 5873489Sjosephb } 5883489Sjosephb 5893489Sjosephb /* cut memory off the end? */ 5903489Sjosephb else if (ml->addr <= start && start < ml->addr + ml->size) { 5913489Sjosephb ml->size = start - ml->addr; 5923489Sjosephb } 5933489Sjosephb } 5943489Sjosephb } 5953489Sjosephb 5963489Sjosephb /* 5975084Sjohnlev * Xen strips the size field out of the mb_memory_map_t, see struct e820entry 5985084Sjohnlev * definition in Xen source. 5995084Sjohnlev */ 6005084Sjohnlev #ifdef __xpv 6015084Sjohnlev typedef struct { 6025084Sjohnlev uint32_t base_addr_low; 6035084Sjohnlev uint32_t base_addr_high; 6045084Sjohnlev uint32_t length_low; 6055084Sjohnlev uint32_t length_high; 6065084Sjohnlev uint32_t type; 6075084Sjohnlev } mmap_t; 6085084Sjohnlev #else 6095084Sjohnlev typedef mb_memory_map_t mmap_t; 6105084Sjohnlev #endif 6115084Sjohnlev 6125084Sjohnlev static void 6135084Sjohnlev build_pcimemlists(mmap_t *mem, int num) 6145084Sjohnlev { 6155084Sjohnlev mmap_t *mmap; 6165084Sjohnlev uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 6175084Sjohnlev uint64_t start; 6185084Sjohnlev uint64_t end; 6195084Sjohnlev int i; 6205084Sjohnlev 6215084Sjohnlev /* 6225084Sjohnlev * initialize 6235084Sjohnlev */ 6245084Sjohnlev pcimemlists[0].addr = pci_lo_limit; 6255084Sjohnlev pcimemlists[0].size = pci_hi_limit - pci_lo_limit; 6265084Sjohnlev pcimemlists_used = 1; 6275084Sjohnlev 6285084Sjohnlev /* 6295084Sjohnlev * Fill in PCI memlists. 6305084Sjohnlev */ 6315084Sjohnlev for (mmap = mem, i = 0; i < num; ++i, ++mmap) { 6325084Sjohnlev start = ((uint64_t)mmap->base_addr_high << 32) + 6335084Sjohnlev mmap->base_addr_low; 6345084Sjohnlev end = start + ((uint64_t)mmap->length_high << 32) + 6355084Sjohnlev mmap->length_low; 6365084Sjohnlev 6375084Sjohnlev if (prom_debug) 6385084Sjohnlev dboot_printf("\ttype: %d %" PRIx64 "..%" 6395084Sjohnlev PRIx64 "\n", mmap->type, start, end); 6405084Sjohnlev 6415084Sjohnlev /* 6425084Sjohnlev * page align start and end 6435084Sjohnlev */ 6445084Sjohnlev start = (start + page_offset) & ~page_offset; 6455084Sjohnlev end &= ~page_offset; 6465084Sjohnlev if (end <= start) 6475084Sjohnlev continue; 6485084Sjohnlev 6495084Sjohnlev exclude_from_pci(start, end); 6505084Sjohnlev } 6515084Sjohnlev 6525084Sjohnlev /* 6535084Sjohnlev * Finish off the pcimemlist 6545084Sjohnlev */ 6555084Sjohnlev if (prom_debug) { 6565084Sjohnlev for (i = 0; i < pcimemlists_used; ++i) { 6575084Sjohnlev dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%" 6585084Sjohnlev PRIx64 "\n", pcimemlists[i].addr, 6595084Sjohnlev pcimemlists[i].addr + pcimemlists[i].size); 6605084Sjohnlev } 6615084Sjohnlev } 6625084Sjohnlev pcimemlists[0].next = 0; 6635084Sjohnlev pcimemlists[0].prev = 0; 6645084Sjohnlev for (i = 1; i < pcimemlists_used; ++i) { 6655084Sjohnlev pcimemlists[i].prev = 6665084Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i - 1); 6675084Sjohnlev pcimemlists[i].next = 0; 6685084Sjohnlev pcimemlists[i - 1].next = 6695084Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i); 6705084Sjohnlev } 6715084Sjohnlev bi->bi_pcimem = (native_ptr_t)pcimemlists; 6725084Sjohnlev DBG(bi->bi_pcimem); 6735084Sjohnlev } 6745084Sjohnlev 6755084Sjohnlev #if defined(__xpv) 6765084Sjohnlev /* 6775084Sjohnlev * Initialize memory allocator stuff from hypervisor-supplied start info. 6785084Sjohnlev * 6795084Sjohnlev * There is 512KB of scratch area after the boot stack page. 6805084Sjohnlev * We'll use that for everything except the kernel nucleus pages which are too 6815084Sjohnlev * big to fit there and are allocated last anyway. 6825084Sjohnlev */ 6835084Sjohnlev #define MAXMAPS 100 6845084Sjohnlev static mmap_t map_buffer[MAXMAPS]; 6855084Sjohnlev static void 6865084Sjohnlev init_mem_alloc(void) 6875084Sjohnlev { 6885084Sjohnlev int local; /* variables needed to find start region */ 6895084Sjohnlev paddr_t scratch_start; 6905084Sjohnlev xen_memory_map_t map; 6915084Sjohnlev 6925084Sjohnlev DBG_MSG("Entered init_mem_alloc()\n"); 6935084Sjohnlev 6945084Sjohnlev /* 6955084Sjohnlev * Free memory follows the stack. There's at least 512KB of scratch 6965084Sjohnlev * space, rounded up to at least 2Mb alignment. That should be enough 6975084Sjohnlev * for the page tables we'll need to build. The nucleus memory is 6985084Sjohnlev * allocated last and will be outside the addressible range. We'll 6995084Sjohnlev * switch to new page tables before we unpack the kernel 7005084Sjohnlev */ 7015084Sjohnlev scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE); 7025084Sjohnlev DBG(scratch_start); 7035084Sjohnlev scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG); 7045084Sjohnlev DBG(scratch_end); 7055084Sjohnlev 7065084Sjohnlev /* 7075084Sjohnlev * For paranoia, leave some space between hypervisor data and ours. 7085084Sjohnlev * Use 500 instead of 512. 7095084Sjohnlev */ 7105084Sjohnlev next_avail_addr = scratch_end - 500 * 1024; 7115084Sjohnlev DBG(next_avail_addr); 7125084Sjohnlev 7135084Sjohnlev /* 7145084Sjohnlev * The domain builder gives us at most 1 module 7155084Sjohnlev */ 7165084Sjohnlev DBG(xen_info->mod_len); 7175084Sjohnlev if (xen_info->mod_len > 0) { 7185084Sjohnlev DBG(xen_info->mod_start); 7195084Sjohnlev modules[0].bm_addr = xen_info->mod_start; 7205084Sjohnlev modules[0].bm_size = xen_info->mod_len; 7215084Sjohnlev bi->bi_module_cnt = 1; 7225084Sjohnlev bi->bi_modules = (native_ptr_t)modules; 7235084Sjohnlev } else { 7245084Sjohnlev bi->bi_module_cnt = 0; 7255084Sjohnlev bi->bi_modules = NULL; 7265084Sjohnlev } 7275084Sjohnlev DBG(bi->bi_module_cnt); 7285084Sjohnlev DBG(bi->bi_modules); 7295084Sjohnlev 7305084Sjohnlev DBG(xen_info->mfn_list); 7315084Sjohnlev DBG(xen_info->nr_pages); 7325084Sjohnlev max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT; 7335084Sjohnlev DBG(max_mem); 7345084Sjohnlev 7355084Sjohnlev /* 7365084Sjohnlev * Using pseudo-physical addresses, so only 1 memlist element 7375084Sjohnlev */ 7385084Sjohnlev memlists[0].addr = 0; 7395084Sjohnlev DBG(memlists[0].addr); 7405084Sjohnlev memlists[0].size = max_mem; 7415084Sjohnlev DBG(memlists[0].size); 7425084Sjohnlev memlists_used = 1; 7435084Sjohnlev DBG(memlists_used); 7445084Sjohnlev 7455084Sjohnlev /* 7465084Sjohnlev * finish building physinstall list 7475084Sjohnlev */ 7485084Sjohnlev sort_physinstall(); 7495084Sjohnlev 7509940SVikram.Hegde@Sun.COM /* 7519940SVikram.Hegde@Sun.COM * build bios reserved memlists 7529940SVikram.Hegde@Sun.COM */ 7539940SVikram.Hegde@Sun.COM build_rsvdmemlists(); 7549940SVikram.Hegde@Sun.COM 7555084Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 7565084Sjohnlev /* 7575084Sjohnlev * build PCI Memory list 7585084Sjohnlev */ 7595084Sjohnlev map.nr_entries = MAXMAPS; 7605084Sjohnlev /*LINTED: constant in conditional context*/ 7615084Sjohnlev set_xen_guest_handle(map.buffer, map_buffer); 7625084Sjohnlev if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0) 7635084Sjohnlev dboot_panic("getting XENMEM_machine_memory_map failed"); 7645084Sjohnlev build_pcimemlists(map_buffer, map.nr_entries); 7655084Sjohnlev } 7665084Sjohnlev } 7675084Sjohnlev 7685084Sjohnlev #else /* !__xpv */ 7695084Sjohnlev 7705084Sjohnlev /* 7715084Sjohnlev * During memory allocation, find the highest address not used yet. 7725084Sjohnlev */ 7735084Sjohnlev static void 7745084Sjohnlev check_higher(paddr_t a) 7755084Sjohnlev { 7765084Sjohnlev if (a < next_avail_addr) 7775084Sjohnlev return; 7785084Sjohnlev next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE); 7795084Sjohnlev DBG(next_avail_addr); 7805084Sjohnlev } 7815084Sjohnlev 7825084Sjohnlev /* 7833446Smrj * Walk through the module information finding the last used address. 7843446Smrj * The first available address will become the top level page table. 7853446Smrj * 7863446Smrj * We then build the phys_install memlist from the multiboot information. 7873446Smrj */ 7883446Smrj static void 7893446Smrj init_mem_alloc(void) 7903446Smrj { 7913446Smrj mb_memory_map_t *mmap; 7923446Smrj mb_module_t *mod; 7933446Smrj uint64_t start; 7943446Smrj uint64_t end; 7953446Smrj uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 7963446Smrj extern char _end[]; 7973446Smrj int i; 7983446Smrj 7993446Smrj DBG_MSG("Entered init_mem_alloc()\n"); 8003446Smrj DBG((uintptr_t)mb_info); 8013446Smrj 802*10304SSeth.Goldberg@Sun.COM if (mb_info->mods_count > MAX_MODULES) { 803*10304SSeth.Goldberg@Sun.COM dboot_panic("Too many modules (%d) -- the maximum is %d.", 804*10304SSeth.Goldberg@Sun.COM mb_info->mods_count, MAX_MODULES); 805*10304SSeth.Goldberg@Sun.COM } 8063446Smrj /* 8073446Smrj * search the modules to find the last used address 8083446Smrj * we'll build the module list while we're walking through here 8093446Smrj */ 8103446Smrj DBG_MSG("\nFinding Modules\n"); 8113446Smrj check_higher((paddr_t)&_end); 8123446Smrj for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0; 8133446Smrj i < mb_info->mods_count; 8143446Smrj ++mod, ++i) { 8153446Smrj if (prom_debug) { 8163446Smrj dboot_printf("\tmodule #%d: %s at: 0x%lx, len 0x%lx\n", 8173446Smrj i, (char *)(mod->mod_name), 8183446Smrj (ulong_t)mod->mod_start, (ulong_t)mod->mod_end); 8193446Smrj } 8203446Smrj modules[i].bm_addr = mod->mod_start; 821*10304SSeth.Goldberg@Sun.COM if (mod->mod_start > mod->mod_end) { 822*10304SSeth.Goldberg@Sun.COM dboot_panic("module[%d]: Invalid module start address " 823*10304SSeth.Goldberg@Sun.COM "(0x%llx)", i, (uint64_t)mod->mod_start); 824*10304SSeth.Goldberg@Sun.COM } 825*10304SSeth.Goldberg@Sun.COM modules[i].bm_size = mod->mod_end - mod->mod_start; 8263446Smrj 8273446Smrj check_higher(mod->mod_end); 8283446Smrj } 8293446Smrj bi->bi_modules = (native_ptr_t)modules; 8303446Smrj DBG(bi->bi_modules); 8313446Smrj bi->bi_module_cnt = mb_info->mods_count; 8323446Smrj DBG(bi->bi_module_cnt); 8333446Smrj 8343446Smrj /* 8353446Smrj * Walk through the memory map from multiboot and build our memlist 8363446Smrj * structures. Note these will have native format pointers. 8373446Smrj */ 8383446Smrj DBG_MSG("\nFinding Memory Map\n"); 8393446Smrj DBG(mb_info->flags); 8403446Smrj max_mem = 0; 8413446Smrj if (mb_info->flags & 0x40) { 8425084Sjohnlev int cnt = 0; 8435084Sjohnlev 8443446Smrj DBG(mb_info->mmap_addr); 8453446Smrj DBG(mb_info->mmap_length); 8463446Smrj check_higher(mb_info->mmap_addr + mb_info->mmap_length); 8473446Smrj 8483446Smrj for (mmap = (mb_memory_map_t *)mb_info->mmap_addr; 8493446Smrj (uint32_t)mmap < mb_info->mmap_addr + mb_info->mmap_length; 8503446Smrj mmap = (mb_memory_map_t *)((uint32_t)mmap + mmap->size 8513446Smrj + sizeof (mmap->size))) { 8525084Sjohnlev ++cnt; 8533446Smrj start = ((uint64_t)mmap->base_addr_high << 32) + 8543446Smrj mmap->base_addr_low; 8553446Smrj end = start + ((uint64_t)mmap->length_high << 32) + 8563446Smrj mmap->length_low; 8573446Smrj 8583489Sjosephb if (prom_debug) 8593446Smrj dboot_printf("\ttype: %d %" PRIx64 "..%" 8603446Smrj PRIx64 "\n", mmap->type, start, end); 8613446Smrj 8623446Smrj /* 8633446Smrj * page align start and end 8643446Smrj */ 8653446Smrj start = (start + page_offset) & ~page_offset; 8663446Smrj end &= ~page_offset; 8673446Smrj if (end <= start) 8683446Smrj continue; 8693446Smrj 8703489Sjosephb /* 8713489Sjosephb * only type 1 is usable RAM 8723489Sjosephb */ 8739940SVikram.Hegde@Sun.COM switch (mmap->type) { 8749940SVikram.Hegde@Sun.COM case 1: 8759940SVikram.Hegde@Sun.COM if (end > max_mem) 8769940SVikram.Hegde@Sun.COM max_mem = end; 8779940SVikram.Hegde@Sun.COM memlists[memlists_used].addr = start; 8789940SVikram.Hegde@Sun.COM memlists[memlists_used].size = end - start; 8799940SVikram.Hegde@Sun.COM ++memlists_used; 8809940SVikram.Hegde@Sun.COM if (memlists_used > MAX_MEMLIST) 8819940SVikram.Hegde@Sun.COM dboot_panic("too many memlists"); 8829940SVikram.Hegde@Sun.COM break; 8839940SVikram.Hegde@Sun.COM case 2: 8849940SVikram.Hegde@Sun.COM rsvdmemlists[rsvdmemlists_used].addr = start; 8859940SVikram.Hegde@Sun.COM rsvdmemlists[rsvdmemlists_used].size = 8869940SVikram.Hegde@Sun.COM end - start; 8879940SVikram.Hegde@Sun.COM ++rsvdmemlists_used; 8889940SVikram.Hegde@Sun.COM if (rsvdmemlists_used > MAX_MEMLIST) 8899940SVikram.Hegde@Sun.COM dboot_panic("too many rsvdmemlists"); 8909940SVikram.Hegde@Sun.COM break; 8919940SVikram.Hegde@Sun.COM default: 8923489Sjosephb continue; 8939940SVikram.Hegde@Sun.COM } 8943446Smrj } 8955084Sjohnlev build_pcimemlists((mb_memory_map_t *)mb_info->mmap_addr, cnt); 8963446Smrj } else if (mb_info->flags & 0x01) { 8973446Smrj DBG(mb_info->mem_lower); 8983446Smrj memlists[memlists_used].addr = 0; 8993446Smrj memlists[memlists_used].size = mb_info->mem_lower * 1024; 9003446Smrj ++memlists_used; 9013446Smrj DBG(mb_info->mem_upper); 9023446Smrj memlists[memlists_used].addr = 1024 * 1024; 9033446Smrj memlists[memlists_used].size = mb_info->mem_upper * 1024; 9043446Smrj ++memlists_used; 9055084Sjohnlev 9065084Sjohnlev /* 9075084Sjohnlev * Old platform - assume I/O space at the end of memory. 9085084Sjohnlev */ 9095084Sjohnlev pcimemlists[0].addr = 9105084Sjohnlev (mb_info->mem_upper * 1024) + (1024 * 1024); 9115084Sjohnlev pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr; 9125084Sjohnlev pcimemlists[0].next = 0; 9135084Sjohnlev pcimemlists[0].prev = 0; 9145084Sjohnlev bi->bi_pcimem = (native_ptr_t)pcimemlists; 9155084Sjohnlev DBG(bi->bi_pcimem); 9163446Smrj } else { 9175084Sjohnlev dboot_panic("No memory info from boot loader!!!"); 9183446Smrj } 9193446Smrj 9203446Smrj check_higher(bi->bi_cmdline); 9213446Smrj 9223446Smrj /* 9233446Smrj * finish processing the physinstall list 9243446Smrj */ 9253446Smrj sort_physinstall(); 9269940SVikram.Hegde@Sun.COM 9279940SVikram.Hegde@Sun.COM /* 9289940SVikram.Hegde@Sun.COM * build bios reserved mem lists 9299940SVikram.Hegde@Sun.COM */ 9309940SVikram.Hegde@Sun.COM build_rsvdmemlists(); 9313446Smrj } 9325084Sjohnlev #endif /* !__xpv */ 9333446Smrj 9343446Smrj /* 9353446Smrj * Simple memory allocator, allocates aligned physical memory. 9363446Smrj * Note that startup_kernel() only allocates memory, never frees. 9373446Smrj * Memory usage just grows in an upward direction. 9383446Smrj */ 9393446Smrj static void * 9403446Smrj do_mem_alloc(uint32_t size, uint32_t align) 9413446Smrj { 9423446Smrj uint_t i; 9433446Smrj uint64_t best; 9443446Smrj uint64_t start; 9453446Smrj uint64_t end; 9463446Smrj 9473446Smrj /* 9483446Smrj * make sure size is a multiple of pagesize 9493446Smrj */ 9503446Smrj size = RNDUP(size, MMU_PAGESIZE); 9513446Smrj next_avail_addr = RNDUP(next_avail_addr, align); 9523446Smrj 9533446Smrj /* 9545084Sjohnlev * XXPV fixme joe 9555084Sjohnlev * 9563446Smrj * a really large bootarchive that causes you to run out of memory 9573446Smrj * may cause this to blow up 9583446Smrj */ 9593446Smrj /* LINTED E_UNEXPECTED_UINT_PROMOTION */ 9603446Smrj best = (uint64_t)-size; 9613446Smrj for (i = 0; i < memlists_used; ++i) { 9623446Smrj start = memlists[i].addr; 9635084Sjohnlev #if defined(__xpv) 9645084Sjohnlev start += mfn_base; 9655084Sjohnlev #endif 9663446Smrj end = start + memlists[i].size; 9673446Smrj 9683446Smrj /* 9693446Smrj * did we find the desired address? 9703446Smrj */ 9713446Smrj if (start <= next_avail_addr && next_avail_addr + size <= end) { 9723446Smrj best = next_avail_addr; 9733446Smrj goto done; 9743446Smrj } 9753446Smrj 9763446Smrj /* 9773446Smrj * if not is this address the best so far? 9783446Smrj */ 9793446Smrj if (start > next_avail_addr && start < best && 9803446Smrj RNDUP(start, align) + size <= end) 9813446Smrj best = RNDUP(start, align); 9823446Smrj } 9833446Smrj 9843446Smrj /* 9853446Smrj * We didn't find exactly the address we wanted, due to going off the 9863446Smrj * end of a memory region. Return the best found memory address. 9873446Smrj */ 9883446Smrj done: 9893446Smrj next_avail_addr = best + size; 9905084Sjohnlev #if defined(__xpv) 9915084Sjohnlev if (next_avail_addr > scratch_end) 9925084Sjohnlev dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: " 9935084Sjohnlev "0x%lx", (ulong_t)next_avail_addr, 9945084Sjohnlev (ulong_t)scratch_end); 9955084Sjohnlev #endif 9963446Smrj (void) memset((void *)(uintptr_t)best, 0, size); 9973446Smrj return ((void *)(uintptr_t)best); 9983446Smrj } 9993446Smrj 10003446Smrj void * 10013446Smrj mem_alloc(uint32_t size) 10023446Smrj { 10033446Smrj return (do_mem_alloc(size, MMU_PAGESIZE)); 10043446Smrj } 10053446Smrj 10063446Smrj 10073446Smrj /* 10083446Smrj * Build page tables to map all of memory used so far as well as the kernel. 10093446Smrj */ 10103446Smrj static void 10113446Smrj build_page_tables(void) 10123446Smrj { 10133446Smrj uint32_t psize; 10143446Smrj uint32_t level; 10153446Smrj uint32_t off; 10165084Sjohnlev uint64_t start; 10175084Sjohnlev #if !defined(__xpv) 10183446Smrj uint32_t i; 10193446Smrj uint64_t end; 10205084Sjohnlev #endif /* __xpv */ 10213446Smrj 10223446Smrj /* 10235084Sjohnlev * If we're on metal, we need to create the top level pagetable. 10243446Smrj */ 10255084Sjohnlev #if defined(__xpv) 10265084Sjohnlev top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base; 10275084Sjohnlev #else /* __xpv */ 10283446Smrj top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 10295084Sjohnlev #endif /* __xpv */ 10303446Smrj DBG((uintptr_t)top_page_table); 10313446Smrj 10323446Smrj /* 10333446Smrj * Determine if we'll use large mappings for kernel, then map it. 10343446Smrj */ 10353446Smrj if (largepage_support) { 10363446Smrj psize = lpagesize; 10373446Smrj level = 1; 10383446Smrj } else { 10393446Smrj psize = MMU_PAGESIZE; 10403446Smrj level = 0; 10413446Smrj } 10423446Smrj 10433446Smrj DBG_MSG("Mapping kernel\n"); 10443446Smrj DBG(ktext_phys); 10453446Smrj DBG(target_kernel_text); 10463446Smrj DBG(ksize); 10473446Smrj DBG(psize); 10483446Smrj for (off = 0; off < ksize; off += psize) 10493446Smrj map_pa_at_va(ktext_phys + off, target_kernel_text + off, level); 10503446Smrj 10513446Smrj /* 10523446Smrj * The kernel will need a 1 page window to work with page tables 10533446Smrj */ 10543446Smrj bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE); 10553446Smrj DBG(bi->bi_pt_window); 10563446Smrj bi->bi_pte_to_pt_window = 10573446Smrj (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0); 10583446Smrj DBG(bi->bi_pte_to_pt_window); 10593446Smrj 10605084Sjohnlev #if defined(__xpv) 10615084Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 10625084Sjohnlev /* If this is a domU we're done. */ 10635084Sjohnlev DBG_MSG("\nPage tables constructed\n"); 10645084Sjohnlev return; 10655084Sjohnlev } 10665084Sjohnlev #endif /* __xpv */ 10675084Sjohnlev 10683446Smrj /* 10695084Sjohnlev * We need 1:1 mappings for the lower 1M of memory to access 10705084Sjohnlev * BIOS tables used by a couple of drivers during boot. 10715084Sjohnlev * 10725084Sjohnlev * The following code works because our simple memory allocator 10735084Sjohnlev * only grows usage in an upwards direction. 10743446Smrj * 10755084Sjohnlev * Note that by this point in boot some mappings for low memory 10765084Sjohnlev * may already exist because we've already accessed device in low 10775084Sjohnlev * memory. (Specifically the video frame buffer and keyboard 10785084Sjohnlev * status ports.) If we're booting on raw hardware then GRUB 10795084Sjohnlev * created these mappings for us. If we're booting under a 10805084Sjohnlev * hypervisor then we went ahead and remapped these devices into 10815084Sjohnlev * memory allocated within dboot itself. 10825084Sjohnlev */ 10835084Sjohnlev if (map_debug) 10845084Sjohnlev dboot_printf("1:1 map pa=0..1Meg\n"); 10855084Sjohnlev for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) { 10865084Sjohnlev #if defined(__xpv) 10875084Sjohnlev map_ma_at_va(start, start, 0); 10885084Sjohnlev #else /* __xpv */ 10895084Sjohnlev map_pa_at_va(start, start, 0); 10905084Sjohnlev #endif /* __xpv */ 10915084Sjohnlev } 10925084Sjohnlev 10935084Sjohnlev #if !defined(__xpv) 10943446Smrj for (i = 0; i < memlists_used; ++i) { 10953446Smrj start = memlists[i].addr; 10963446Smrj 10973446Smrj end = start + memlists[i].size; 10983446Smrj 10993446Smrj if (map_debug) 11003446Smrj dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 11013446Smrj start, end); 11023446Smrj while (start < end && start < next_avail_addr) { 11033446Smrj map_pa_at_va(start, start, 0); 11043446Smrj start += MMU_PAGESIZE; 11053446Smrj } 11063446Smrj } 11075084Sjohnlev #endif /* !__xpv */ 11083446Smrj 11093446Smrj DBG_MSG("\nPage tables constructed\n"); 11103446Smrj } 11113446Smrj 11123446Smrj #define NO_MULTIBOOT \ 11133446Smrj "multiboot is no longer used to boot the Solaris Operating System.\n\ 11143446Smrj The grub entry should be changed to:\n\ 11153446Smrj kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\ 11163446Smrj module$ /platform/i86pc/$ISADIR/boot_archive\n\ 11173446Smrj See http://www.sun.com/msg/SUNOS-8000-AK for details.\n" 11183446Smrj 11193446Smrj /* 11203446Smrj * startup_kernel has a pretty simple job. It builds pagetables which reflect 11213446Smrj * 1:1 mappings for all memory in use. It then also adds mappings for 11223446Smrj * the kernel nucleus at virtual address of target_kernel_text using large page 11233446Smrj * mappings. The page table pages are also accessible at 1:1 mapped 11243446Smrj * virtual addresses. 11253446Smrj */ 11263446Smrj /*ARGSUSED*/ 11273446Smrj void 11283446Smrj startup_kernel(void) 11293446Smrj { 11303446Smrj char *cmdline; 11313446Smrj uintptr_t addr; 11325084Sjohnlev #if defined(__xpv) 11335084Sjohnlev physdev_set_iopl_t set_iopl; 11345084Sjohnlev #endif /* __xpv */ 11353446Smrj 11363446Smrj /* 11373446Smrj * At this point we are executing in a 32 bit real mode. 11383446Smrj */ 11395084Sjohnlev #if defined(__xpv) 11405084Sjohnlev cmdline = (char *)xen_info->cmd_line; 11415084Sjohnlev #else /* __xpv */ 11423446Smrj cmdline = (char *)mb_info->cmdline; 11435084Sjohnlev #endif /* __xpv */ 11445084Sjohnlev 11453446Smrj prom_debug = (strstr(cmdline, "prom_debug") != NULL); 11463446Smrj map_debug = (strstr(cmdline, "map_debug") != NULL); 11475084Sjohnlev 11485084Sjohnlev #if defined(__xpv) 11495084Sjohnlev /* 11505084Sjohnlev * For dom0, before we initialize the console subsystem we'll 11515084Sjohnlev * need to enable io operations, so set I/O priveldge level to 1. 11525084Sjohnlev */ 11535084Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 11545084Sjohnlev set_iopl.iopl = 1; 11555084Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 11565084Sjohnlev } 11575084Sjohnlev #endif /* __xpv */ 11585084Sjohnlev 11593446Smrj bcons_init(cmdline); 11603446Smrj DBG_MSG("\n\nSolaris prekernel set: "); 11613446Smrj DBG_MSG(cmdline); 11623446Smrj DBG_MSG("\n"); 11633446Smrj 11643446Smrj if (strstr(cmdline, "multiboot") != NULL) { 11653446Smrj dboot_panic(NO_MULTIBOOT); 11663446Smrj } 11673446Smrj 11683446Smrj /* 11693446Smrj * boot info must be 16 byte aligned for 64 bit kernel ABI 11703446Smrj */ 11713446Smrj addr = (uintptr_t)boot_info; 11723446Smrj addr = (addr + 0xf) & ~0xf; 11733446Smrj bi = (struct xboot_info *)addr; 11743446Smrj DBG((uintptr_t)bi); 11753446Smrj bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline; 11763446Smrj 11773446Smrj /* 11783446Smrj * Need correct target_kernel_text value 11793446Smrj */ 11803446Smrj #if defined(_BOOT_TARGET_amd64) 11813446Smrj target_kernel_text = KERNEL_TEXT_amd64; 11825084Sjohnlev #elif defined(__xpv) 11835084Sjohnlev target_kernel_text = KERNEL_TEXT_i386_xpv; 11843446Smrj #else 11853446Smrj target_kernel_text = KERNEL_TEXT_i386; 11863446Smrj #endif 11873446Smrj DBG(target_kernel_text); 11883446Smrj 11895084Sjohnlev #if defined(__xpv) 11905084Sjohnlev 11915084Sjohnlev /* 11925084Sjohnlev * XXPV Derive this stuff from CPUID / what the hypervisor has enabled 11935084Sjohnlev */ 11945084Sjohnlev 11955084Sjohnlev #if defined(_BOOT_TARGET_amd64) 11965084Sjohnlev /* 11975084Sjohnlev * 64-bit hypervisor. 11985084Sjohnlev */ 11995084Sjohnlev amd64_support = 1; 12005084Sjohnlev pae_support = 1; 12015084Sjohnlev 12025084Sjohnlev #else /* _BOOT_TARGET_amd64 */ 12035084Sjohnlev 12045084Sjohnlev /* 12055084Sjohnlev * See if we are running on a PAE Hypervisor 12065084Sjohnlev */ 12075084Sjohnlev { 12085084Sjohnlev xen_capabilities_info_t caps; 12095084Sjohnlev 12105084Sjohnlev if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0) 12115084Sjohnlev dboot_panic("HYPERVISOR_xen_version(caps) failed"); 12125084Sjohnlev caps[sizeof (caps) - 1] = 0; 12135084Sjohnlev if (prom_debug) 12145084Sjohnlev dboot_printf("xen capabilities %s\n", caps); 12155084Sjohnlev if (strstr(caps, "x86_32p") != NULL) 12165084Sjohnlev pae_support = 1; 12175084Sjohnlev } 12185084Sjohnlev 12195084Sjohnlev #endif /* _BOOT_TARGET_amd64 */ 12205084Sjohnlev { 12215084Sjohnlev xen_platform_parameters_t p; 12225084Sjohnlev 12235084Sjohnlev if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0) 12245084Sjohnlev dboot_panic("HYPERVISOR_xen_version(parms) failed"); 12255084Sjohnlev DBG(p.virt_start); 12265084Sjohnlev mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start); 12275084Sjohnlev } 12285084Sjohnlev 12295084Sjohnlev /* 12305084Sjohnlev * The hypervisor loads stuff starting at 1Gig 12315084Sjohnlev */ 12325084Sjohnlev mfn_base = ONE_GIG; 12335084Sjohnlev DBG(mfn_base); 12345084Sjohnlev 12355084Sjohnlev /* 12365084Sjohnlev * enable writable page table mode for the hypervisor 12375084Sjohnlev */ 12385084Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 12395084Sjohnlev VMASST_TYPE_writable_pagetables) < 0) 12405084Sjohnlev dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed"); 12415084Sjohnlev 12425084Sjohnlev /* 12435084Sjohnlev * check for NX support 12445084Sjohnlev */ 12455084Sjohnlev if (pae_support) { 12465084Sjohnlev uint32_t eax = 0x80000000; 12475084Sjohnlev uint32_t edx = get_cpuid_edx(&eax); 12485084Sjohnlev 12495084Sjohnlev if (eax >= 0x80000001) { 12505084Sjohnlev eax = 0x80000001; 12515084Sjohnlev edx = get_cpuid_edx(&eax); 12525084Sjohnlev if (edx & CPUID_AMD_EDX_NX) 12535084Sjohnlev NX_support = 1; 12545084Sjohnlev } 12555084Sjohnlev } 12565084Sjohnlev 12575084Sjohnlev #if !defined(_BOOT_TARGET_amd64) 12585084Sjohnlev 12595084Sjohnlev /* 12605084Sjohnlev * The 32-bit hypervisor uses segmentation to protect itself from 12615084Sjohnlev * guests. This means when a guest attempts to install a flat 4GB 12625084Sjohnlev * code or data descriptor the 32-bit hypervisor will protect itself 12635084Sjohnlev * by silently shrinking the segment such that if the guest attempts 12645084Sjohnlev * any access where the hypervisor lives a #gp fault is generated. 12655084Sjohnlev * The problem is that some applications expect a full 4GB flat 12665084Sjohnlev * segment for their current thread pointer and will use negative 12675084Sjohnlev * offset segment wrap around to access data. TLS support in linux 12685084Sjohnlev * brand is one example of this. 12695084Sjohnlev * 12705084Sjohnlev * The 32-bit hypervisor can catch the #gp fault in these cases 12715084Sjohnlev * and emulate the access without passing the #gp fault to the guest 12725084Sjohnlev * but only if VMASST_TYPE_4gb_segments is explicitly turned on. 12735084Sjohnlev * Seems like this should have been the default. 12745084Sjohnlev * Either way, we want the hypervisor -- and not Solaris -- to deal 12755084Sjohnlev * to deal with emulating these accesses. 12765084Sjohnlev */ 12775084Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 12785084Sjohnlev VMASST_TYPE_4gb_segments) < 0) 12795084Sjohnlev dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed"); 12805084Sjohnlev #endif /* !_BOOT_TARGET_amd64 */ 12815084Sjohnlev 12825084Sjohnlev #else /* __xpv */ 12835084Sjohnlev 12843446Smrj /* 12853446Smrj * use cpuid to enable MMU features 12863446Smrj */ 12873446Smrj if (have_cpuid()) { 12883446Smrj uint32_t eax, edx; 12893446Smrj 12903446Smrj eax = 1; 12913446Smrj edx = get_cpuid_edx(&eax); 12923446Smrj if (edx & CPUID_INTC_EDX_PSE) 12933446Smrj largepage_support = 1; 12943446Smrj if (edx & CPUID_INTC_EDX_PGE) 12953446Smrj pge_support = 1; 12963446Smrj if (edx & CPUID_INTC_EDX_PAE) 12973446Smrj pae_support = 1; 12983446Smrj 12993446Smrj eax = 0x80000000; 13003446Smrj edx = get_cpuid_edx(&eax); 13013446Smrj if (eax >= 0x80000001) { 13023446Smrj eax = 0x80000001; 13033446Smrj edx = get_cpuid_edx(&eax); 13043446Smrj if (edx & CPUID_AMD_EDX_LM) 13053446Smrj amd64_support = 1; 13063446Smrj if (edx & CPUID_AMD_EDX_NX) 13073446Smrj NX_support = 1; 13083446Smrj } 13093446Smrj } else { 13103446Smrj dboot_printf("cpuid not supported\n"); 13113446Smrj } 13125084Sjohnlev #endif /* __xpv */ 13135084Sjohnlev 13143446Smrj 13153446Smrj #if defined(_BOOT_TARGET_amd64) 13163446Smrj if (amd64_support == 0) 13175084Sjohnlev dboot_panic("long mode not supported, rebooting"); 13183446Smrj else if (pae_support == 0) 13195084Sjohnlev dboot_panic("long mode, but no PAE; rebooting"); 13205084Sjohnlev #else 13215084Sjohnlev /* 13225084Sjohnlev * Allow the command line to over-ride use of PAE for 32 bit. 13235084Sjohnlev */ 13245084Sjohnlev if (strstr(cmdline, "disablePAE=true") != NULL) { 13255084Sjohnlev pae_support = 0; 13265084Sjohnlev NX_support = 0; 13275084Sjohnlev amd64_support = 0; 13285084Sjohnlev } 13293446Smrj #endif 13303446Smrj 13313446Smrj /* 13325084Sjohnlev * initialize the simple memory allocator 13333446Smrj */ 13343446Smrj init_mem_alloc(); 13353446Smrj 13365084Sjohnlev #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64) 13375084Sjohnlev /* 13385084Sjohnlev * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory 13395084Sjohnlev */ 13405084Sjohnlev if (max_mem < FOUR_GIG && NX_support == 0) 13415084Sjohnlev pae_support = 0; 13425084Sjohnlev #endif 13435084Sjohnlev 13443446Smrj /* 13453446Smrj * configure mmu information 13463446Smrj */ 13475084Sjohnlev if (pae_support) { 13483446Smrj shift_amt = shift_amt_pae; 13493446Smrj ptes_per_table = 512; 13503446Smrj pte_size = 8; 13513446Smrj lpagesize = TWO_MEG; 13523446Smrj #if defined(_BOOT_TARGET_amd64) 13533446Smrj top_level = 3; 13543446Smrj #else 13553446Smrj top_level = 2; 13563446Smrj #endif 13573446Smrj } else { 13583446Smrj pae_support = 0; 13593446Smrj NX_support = 0; 13603446Smrj shift_amt = shift_amt_nopae; 13613446Smrj ptes_per_table = 1024; 13623446Smrj pte_size = 4; 13633446Smrj lpagesize = FOUR_MEG; 13643446Smrj top_level = 1; 13653446Smrj } 13663446Smrj 13673446Smrj DBG(pge_support); 13683446Smrj DBG(NX_support); 13693446Smrj DBG(largepage_support); 13703446Smrj DBG(amd64_support); 13713446Smrj DBG(top_level); 13723446Smrj DBG(pte_size); 13733446Smrj DBG(ptes_per_table); 13743446Smrj DBG(lpagesize); 13753446Smrj 13765084Sjohnlev #if defined(__xpv) 13775084Sjohnlev ktext_phys = ONE_GIG; /* from UNIX Mapfile */ 13785084Sjohnlev #else 13793446Smrj ktext_phys = FOUR_MEG; /* from UNIX Mapfile */ 13805084Sjohnlev #endif 13813446Smrj 13825084Sjohnlev #if !defined(__xpv) && defined(_BOOT_TARGET_amd64) 13833446Smrj /* 13843446Smrj * For grub, copy kernel bits from the ELF64 file to final place. 13853446Smrj */ 13863446Smrj DBG_MSG("\nAllocating nucleus pages.\n"); 13873446Smrj ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG); 13883446Smrj if (ktext_phys == 0) 13895084Sjohnlev dboot_panic("failed to allocate aligned kernel memory"); 13903446Smrj if (dboot_elfload64(mb_header.load_addr) != 0) 13915084Sjohnlev dboot_panic("failed to parse kernel ELF image, rebooting"); 13925084Sjohnlev #endif 13933446Smrj 13943446Smrj DBG(ktext_phys); 13953446Smrj 13963446Smrj /* 13973446Smrj * Allocate page tables. 13983446Smrj */ 13993446Smrj build_page_tables(); 14003446Smrj 14013446Smrj /* 14023446Smrj * return to assembly code to switch to running kernel 14033446Smrj */ 14043446Smrj entry_addr_low = (uint32_t)target_kernel_text; 14053446Smrj DBG(entry_addr_low); 14063446Smrj bi->bi_use_largepage = largepage_support; 14073446Smrj bi->bi_use_pae = pae_support; 14083446Smrj bi->bi_use_pge = pge_support; 14093446Smrj bi->bi_use_nx = NX_support; 14105084Sjohnlev 14115084Sjohnlev #if defined(__xpv) 14125084Sjohnlev 14135084Sjohnlev bi->bi_next_paddr = next_avail_addr - mfn_base; 14145084Sjohnlev DBG(bi->bi_next_paddr); 14155084Sjohnlev bi->bi_next_vaddr = (native_ptr_t)next_avail_addr; 14165084Sjohnlev DBG(bi->bi_next_vaddr); 14175084Sjohnlev 14185084Sjohnlev /* 14195084Sjohnlev * unmap unused pages in start area to make them available for DMA 14205084Sjohnlev */ 14215084Sjohnlev while (next_avail_addr < scratch_end) { 14225084Sjohnlev (void) HYPERVISOR_update_va_mapping(next_avail_addr, 14235084Sjohnlev 0, UVMF_INVLPG | UVMF_LOCAL); 14245084Sjohnlev next_avail_addr += MMU_PAGESIZE; 14255084Sjohnlev } 14265084Sjohnlev 14275084Sjohnlev bi->bi_xen_start_info = (uintptr_t)xen_info; 14285084Sjohnlev DBG((uintptr_t)HYPERVISOR_shared_info); 14295084Sjohnlev bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info; 14305084Sjohnlev bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base; 14315084Sjohnlev 14325084Sjohnlev #else /* __xpv */ 14335084Sjohnlev 14343446Smrj bi->bi_next_paddr = next_avail_addr; 14353446Smrj DBG(bi->bi_next_paddr); 14363446Smrj bi->bi_next_vaddr = (uintptr_t)next_avail_addr; 14373446Smrj DBG(bi->bi_next_vaddr); 14383446Smrj bi->bi_mb_info = (uintptr_t)mb_info; 14393446Smrj bi->bi_top_page_table = (uintptr_t)top_page_table; 14403446Smrj 14415084Sjohnlev #endif /* __xpv */ 14425084Sjohnlev 14433446Smrj bi->bi_kseg_size = FOUR_MEG; 14443446Smrj DBG(bi->bi_kseg_size); 14453446Smrj 14467673SSherry.Moore@Sun.COM #ifndef __xpv 14479489SJoe.Bonasera@sun.com if (map_debug) 14487656SSherry.Moore@Sun.COM dump_tables(); 14497673SSherry.Moore@Sun.COM #endif 14507656SSherry.Moore@Sun.COM 14513446Smrj DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n"); 14523446Smrj } 1453