13446Smrj /*
23446Smrj * CDDL HEADER START
33446Smrj *
43446Smrj * The contents of this file are subject to the terms of the
53446Smrj * Common Development and Distribution License (the "License").
63446Smrj * You may not use this file except in compliance with the License.
73446Smrj *
83446Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93446Smrj * or http://www.opensolaris.org/os/licensing.
103446Smrj * See the License for the specific language governing permissions
113446Smrj * and limitations under the License.
123446Smrj *
133446Smrj * When distributing Covered Code, include this CDDL HEADER in each
143446Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153446Smrj * If applicable, add the following below this CDDL HEADER, with the
163446Smrj * fields enclosed by brackets "[]" replaced with your own identifying
173446Smrj * information: Portions Copyright [yyyy] [name of copyright owner]
183446Smrj *
193446Smrj * CDDL HEADER END
203446Smrj */
213446Smrj
223446Smrj /*
239489SJoe.Bonasera@sun.com * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
243446Smrj * Use is subject to license terms.
253446Smrj */
263446Smrj
273446Smrj
283446Smrj #include <sys/types.h>
293446Smrj #include <sys/machparam.h>
303446Smrj #include <sys/x86_archext.h>
313446Smrj #include <sys/systm.h>
323446Smrj #include <sys/mach_mmu.h>
335084Sjohnlev #include <sys/multiboot.h>
343446Smrj
355084Sjohnlev #if defined(__xpv)
365084Sjohnlev
375084Sjohnlev #include <sys/hypervisor.h>
385084Sjohnlev uintptr_t xen_virt_start;
395084Sjohnlev pfn_t *mfn_to_pfn_mapping;
405084Sjohnlev
415084Sjohnlev #else /* !__xpv */
423446Smrj
433446Smrj extern multiboot_header_t mb_header;
443446Smrj extern int have_cpuid(void);
455084Sjohnlev
465084Sjohnlev #endif /* !__xpv */
473446Smrj
483446Smrj #include <sys/inttypes.h>
493446Smrj #include <sys/bootinfo.h>
503446Smrj #include <sys/mach_mmu.h>
513446Smrj #include <sys/boot_console.h>
523446Smrj
535084Sjohnlev #include "dboot_asm.h"
543446Smrj #include "dboot_printf.h"
553446Smrj #include "dboot_xboot.h"
563446Smrj #include "dboot_elfload.h"
573446Smrj
583446Smrj /*
593446Smrj * This file contains code that runs to transition us from either a multiboot
605084Sjohnlev * compliant loader (32 bit non-paging) or a XPV domain loader to
615084Sjohnlev * regular kernel execution. Its task is to setup the kernel memory image
625084Sjohnlev * and page tables.
633446Smrj *
643446Smrj * The code executes as:
653446Smrj * - 32 bits under GRUB (for 32 or 64 bit Solaris)
665084Sjohnlev * - a 32 bit program for the 32-bit PV hypervisor
675084Sjohnlev * - a 64 bit program for the 64-bit PV hypervisor (at least for now)
683446Smrj *
695084Sjohnlev * Under the PV hypervisor, we must create mappings for any memory beyond the
705084Sjohnlev * initial start of day allocation (such as the kernel itself).
713446Smrj *
725084Sjohnlev * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
733446Smrj * Since we are running in real mode, so all such memory is accessible.
743446Smrj */
753446Smrj
763446Smrj /*
773446Smrj * Standard bits used in PTE (page level) and PTP (internal levels)
783446Smrj */
795084Sjohnlev x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
805084Sjohnlev x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
813446Smrj
823446Smrj /*
833446Smrj * This is the target addresses (physical) where the kernel text and data
845084Sjohnlev * nucleus pages will be unpacked. On the hypervisor this is actually a
855084Sjohnlev * virtual address.
863446Smrj */
873446Smrj paddr_t ktext_phys;
883446Smrj uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */
893446Smrj
903446Smrj static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */
913446Smrj
923446Smrj /*
933446Smrj * The stack is setup in assembler before entering startup_kernel()
943446Smrj */
953446Smrj char stack_space[STACK_SIZE];
963446Smrj
973446Smrj /*
983446Smrj * Used to track physical memory allocation
993446Smrj */
1003446Smrj static paddr_t next_avail_addr = 0;
1013446Smrj
1025084Sjohnlev #if defined(__xpv)
1035084Sjohnlev /*
1045084Sjohnlev * Additional information needed for hypervisor memory allocation.
1055084Sjohnlev * Only memory up to scratch_end is mapped by page tables.
1065084Sjohnlev * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
1075084Sjohnlev * to derive a pfn from a pointer, you subtract mfn_base.
1085084Sjohnlev */
1095084Sjohnlev
1105084Sjohnlev static paddr_t scratch_end = 0; /* we can't write all of mem here */
1115084Sjohnlev static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */
1125084Sjohnlev start_info_t *xen_info;
1135084Sjohnlev
1145084Sjohnlev #else /* __xpv */
1155084Sjohnlev
1165084Sjohnlev /*
1175084Sjohnlev * If on the metal, then we have a multiboot loader.
1185084Sjohnlev */
1193446Smrj multiboot_info_t *mb_info;
1203446Smrj
1215084Sjohnlev #endif /* __xpv */
1225084Sjohnlev
1233446Smrj /*
1243446Smrj * This contains information passed to the kernel
1253446Smrj */
1263446Smrj struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */
1273446Smrj struct xboot_info *bi;
1283446Smrj
1293446Smrj /*
1303446Smrj * Page table and memory stuff.
1313446Smrj */
1325084Sjohnlev static paddr_t max_mem; /* maximum memory address */
1333446Smrj
1343446Smrj /*
1353446Smrj * Information about processor MMU
1363446Smrj */
1373446Smrj int amd64_support = 0;
1383446Smrj int largepage_support = 0;
1393446Smrj int pae_support = 0;
1403446Smrj int pge_support = 0;
1413446Smrj int NX_support = 0;
1423446Smrj
1433446Smrj /*
1443446Smrj * Low 32 bits of kernel entry address passed back to assembler.
1453446Smrj * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
1463446Smrj */
1473446Smrj uint32_t entry_addr_low;
1483446Smrj
1493446Smrj /*
1503446Smrj * Memlists for the kernel. We shouldn't need a lot of these.
1513446Smrj */
1523489Sjosephb #define MAX_MEMLIST (50)
1533446Smrj struct boot_memlist memlists[MAX_MEMLIST];
1543446Smrj uint_t memlists_used = 0;
1553489Sjosephb struct boot_memlist pcimemlists[MAX_MEMLIST];
1563489Sjosephb uint_t pcimemlists_used = 0;
1579940SVikram.Hegde@Sun.COM struct boot_memlist rsvdmemlists[MAX_MEMLIST];
1589940SVikram.Hegde@Sun.COM uint_t rsvdmemlists_used = 0;
1593446Smrj
1603446Smrj #define MAX_MODULES (10)
1613446Smrj struct boot_modules modules[MAX_MODULES];
1623446Smrj uint_t modules_used = 0;
1633446Smrj
1643446Smrj /*
1653446Smrj * Debugging macros
1663446Smrj */
1673446Smrj uint_t prom_debug = 0;
1683446Smrj uint_t map_debug = 0;
1693446Smrj
1703446Smrj /*
1715084Sjohnlev * Either hypervisor-specific or grub-specific code builds the initial
1725084Sjohnlev * memlists. This code does the sort/merge/link for final use.
1733446Smrj */
1743446Smrj static void
sort_physinstall(void)1753446Smrj sort_physinstall(void)
1763446Smrj {
1773446Smrj int i;
1785084Sjohnlev #if !defined(__xpv)
1793446Smrj int j;
1803446Smrj struct boot_memlist tmp;
1813446Smrj
1823446Smrj /*
1833446Smrj * Now sort the memlists, in case they weren't in order.
1843446Smrj * Yeah, this is a bubble sort; small, simple and easy to get right.
1853446Smrj */
1863446Smrj DBG_MSG("Sorting phys-installed list\n");
1873446Smrj for (j = memlists_used - 1; j > 0; --j) {
1883446Smrj for (i = 0; i < j; ++i) {
1893446Smrj if (memlists[i].addr < memlists[i + 1].addr)
1903446Smrj continue;
1913446Smrj tmp = memlists[i];
1923446Smrj memlists[i] = memlists[i + 1];
1933446Smrj memlists[i + 1] = tmp;
1943446Smrj }
1953446Smrj }
1963446Smrj
1973446Smrj /*
1983446Smrj * Merge any memlists that don't have holes between them.
1993446Smrj */
2003446Smrj for (i = 0; i <= memlists_used - 1; ++i) {
2013446Smrj if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
2023446Smrj continue;
2033446Smrj
2043446Smrj if (prom_debug)
2053446Smrj dboot_printf(
2063446Smrj "merging mem segs %" PRIx64 "...%" PRIx64
2073446Smrj " w/ %" PRIx64 "...%" PRIx64 "\n",
2083446Smrj memlists[i].addr,
2093446Smrj memlists[i].addr + memlists[i].size,
2103446Smrj memlists[i + 1].addr,
2113446Smrj memlists[i + 1].addr + memlists[i + 1].size);
2123446Smrj
2133446Smrj memlists[i].size += memlists[i + 1].size;
2143446Smrj for (j = i + 1; j < memlists_used - 1; ++j)
2153446Smrj memlists[j] = memlists[j + 1];
2163446Smrj --memlists_used;
2173446Smrj DBG(memlists_used);
2183446Smrj --i; /* after merging we need to reexamine, so do this */
2193446Smrj }
2205084Sjohnlev #endif /* __xpv */
2213446Smrj
2223446Smrj if (prom_debug) {
2233446Smrj dboot_printf("\nFinal memlists:\n");
2243446Smrj for (i = 0; i < memlists_used; ++i) {
2253446Smrj dboot_printf("\t%d: addr=%" PRIx64 " size=%"
2263446Smrj PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
2273446Smrj }
2283446Smrj }
2293446Smrj
2303446Smrj /*
2313446Smrj * link together the memlists with native size pointers
2323446Smrj */
2333446Smrj memlists[0].next = 0;
2343446Smrj memlists[0].prev = 0;
2353446Smrj for (i = 1; i < memlists_used; ++i) {
2363446Smrj memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
2373446Smrj memlists[i].next = 0;
2383446Smrj memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
2393446Smrj }
2403446Smrj bi->bi_phys_install = (native_ptr_t)memlists;
2413446Smrj DBG(bi->bi_phys_install);
2423446Smrj }
2433446Smrj
2449940SVikram.Hegde@Sun.COM /*
2459940SVikram.Hegde@Sun.COM * build bios reserved memlists
2469940SVikram.Hegde@Sun.COM */
2479940SVikram.Hegde@Sun.COM static void
build_rsvdmemlists(void)2489940SVikram.Hegde@Sun.COM build_rsvdmemlists(void)
2499940SVikram.Hegde@Sun.COM {
2509940SVikram.Hegde@Sun.COM int i;
2519940SVikram.Hegde@Sun.COM
2529940SVikram.Hegde@Sun.COM rsvdmemlists[0].next = 0;
2539940SVikram.Hegde@Sun.COM rsvdmemlists[0].prev = 0;
2549940SVikram.Hegde@Sun.COM for (i = 1; i < rsvdmemlists_used; ++i) {
2559940SVikram.Hegde@Sun.COM rsvdmemlists[i].prev =
2569940SVikram.Hegde@Sun.COM (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
2579940SVikram.Hegde@Sun.COM rsvdmemlists[i].next = 0;
2589940SVikram.Hegde@Sun.COM rsvdmemlists[i - 1].next =
2599940SVikram.Hegde@Sun.COM (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
2609940SVikram.Hegde@Sun.COM }
2619940SVikram.Hegde@Sun.COM bi->bi_rsvdmem = (native_ptr_t)rsvdmemlists;
2629940SVikram.Hegde@Sun.COM DBG(bi->bi_rsvdmem);
2639940SVikram.Hegde@Sun.COM }
2649940SVikram.Hegde@Sun.COM
2655084Sjohnlev #if defined(__xpv)
2665084Sjohnlev
2675084Sjohnlev /*
2685084Sjohnlev * halt on the hypervisor after a delay to drain console output
2695084Sjohnlev */
2705084Sjohnlev void
dboot_halt(void)2715084Sjohnlev dboot_halt(void)
2725084Sjohnlev {
2735084Sjohnlev uint_t i = 10000;
2745084Sjohnlev
2755084Sjohnlev while (--i)
276*11387SSurya.Prakki@Sun.COM (void) HYPERVISOR_yield();
277*11387SSurya.Prakki@Sun.COM (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
2785084Sjohnlev }
2795084Sjohnlev
2805084Sjohnlev /*
2815084Sjohnlev * From a machine address, find the corresponding pseudo-physical address.
2825084Sjohnlev * Pseudo-physical address are contiguous and run from mfn_base in each VM.
2835084Sjohnlev * Machine addresses are the real underlying hardware addresses.
2845084Sjohnlev * These are needed for page table entries. Note that this routine is
2855084Sjohnlev * poorly protected. A bad value of "ma" will cause a page fault.
2865084Sjohnlev */
2875084Sjohnlev paddr_t
ma_to_pa(maddr_t ma)2885084Sjohnlev ma_to_pa(maddr_t ma)
2895084Sjohnlev {
2905084Sjohnlev ulong_t pgoff = ma & MMU_PAGEOFFSET;
2915084Sjohnlev ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
2925084Sjohnlev paddr_t pa;
2935084Sjohnlev
2945084Sjohnlev if (pfn >= xen_info->nr_pages)
2955084Sjohnlev return (-(paddr_t)1);
2965084Sjohnlev pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
2975084Sjohnlev #ifdef DEBUG
2985084Sjohnlev if (ma != pa_to_ma(pa))
2995084Sjohnlev dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
3005084Sjohnlev "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
3015084Sjohnlev #endif
3025084Sjohnlev return (pa);
3035084Sjohnlev }
3045084Sjohnlev
3055084Sjohnlev /*
3065084Sjohnlev * From a pseudo-physical address, find the corresponding machine address.
3075084Sjohnlev */
3085084Sjohnlev maddr_t
pa_to_ma(paddr_t pa)3095084Sjohnlev pa_to_ma(paddr_t pa)
3105084Sjohnlev {
3115084Sjohnlev pfn_t pfn;
3125084Sjohnlev ulong_t mfn;
3135084Sjohnlev
3145084Sjohnlev pfn = mmu_btop(pa - mfn_base);
3155084Sjohnlev if (pa < mfn_base || pfn >= xen_info->nr_pages)
3165084Sjohnlev dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
3175084Sjohnlev mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
3185084Sjohnlev #ifdef DEBUG
3195084Sjohnlev if (mfn_to_pfn_mapping[mfn] != pfn)
3205084Sjohnlev dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
3215084Sjohnlev pfn, mfn, mfn_to_pfn_mapping[mfn]);
3225084Sjohnlev #endif
3235084Sjohnlev return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
3245084Sjohnlev }
3255084Sjohnlev
3265084Sjohnlev #endif /* __xpv */
3275084Sjohnlev
3283446Smrj x86pte_t
get_pteval(paddr_t table,uint_t index)3293446Smrj get_pteval(paddr_t table, uint_t index)
3303446Smrj {
3313446Smrj if (pae_support)
3323446Smrj return (((x86pte_t *)(uintptr_t)table)[index]);
3333446Smrj return (((x86pte32_t *)(uintptr_t)table)[index]);
3343446Smrj }
3353446Smrj
3363446Smrj /*ARGSUSED*/
3373446Smrj void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)3383446Smrj set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
3393446Smrj {
3405084Sjohnlev #ifdef __xpv
3415084Sjohnlev mmu_update_t t;
3425084Sjohnlev maddr_t mtable = pa_to_ma(table);
3435084Sjohnlev int retcnt;
3445084Sjohnlev
3455084Sjohnlev t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
3465084Sjohnlev t.val = pteval;
3475084Sjohnlev if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
3485084Sjohnlev dboot_panic("HYPERVISOR_mmu_update() failed");
3495084Sjohnlev #else /* __xpv */
3503446Smrj uintptr_t tab_addr = (uintptr_t)table;
3513446Smrj
3523446Smrj if (pae_support)
3533446Smrj ((x86pte_t *)tab_addr)[index] = pteval;
3543446Smrj else
3553446Smrj ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
3563446Smrj if (level == top_level && level == 2)
3573446Smrj reload_cr3();
3585084Sjohnlev #endif /* __xpv */
3593446Smrj }
3603446Smrj
3613446Smrj paddr_t
make_ptable(x86pte_t * pteval,uint_t level)3623446Smrj make_ptable(x86pte_t *pteval, uint_t level)
3633446Smrj {
3643446Smrj paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
3653446Smrj
3663446Smrj if (level == top_level && level == 2)
3673446Smrj *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
3683446Smrj else
3693446Smrj *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
3703446Smrj
3715084Sjohnlev #ifdef __xpv
3725084Sjohnlev /* Remove write permission to the new page table. */
3735084Sjohnlev if (HYPERVISOR_update_va_mapping(new_table,
3745084Sjohnlev *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
3755084Sjohnlev dboot_panic("HYP_update_va_mapping error");
3765084Sjohnlev #endif
3775084Sjohnlev
3783446Smrj if (map_debug)
3793446Smrj dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
3803446Smrj PRIx64 "\n", level, (ulong_t)new_table, *pteval);
3813446Smrj return (new_table);
3823446Smrj }
3833446Smrj
3843446Smrj x86pte_t *
map_pte(paddr_t table,uint_t index)3853446Smrj map_pte(paddr_t table, uint_t index)
3863446Smrj {
3873446Smrj return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
3883446Smrj }
3893446Smrj
3907656SSherry.Moore@Sun.COM /*
3917656SSherry.Moore@Sun.COM * dump out the contents of page tables...
3927656SSherry.Moore@Sun.COM */
3937656SSherry.Moore@Sun.COM static void
dump_tables(void)3947656SSherry.Moore@Sun.COM dump_tables(void)
3957656SSherry.Moore@Sun.COM {
3967656SSherry.Moore@Sun.COM uint_t save_index[4]; /* for recursion */
3977656SSherry.Moore@Sun.COM char *save_table[4]; /* for recursion */
3987656SSherry.Moore@Sun.COM uint_t l;
3997656SSherry.Moore@Sun.COM uint64_t va;
4007656SSherry.Moore@Sun.COM uint64_t pgsize;
4017656SSherry.Moore@Sun.COM int index;
4027656SSherry.Moore@Sun.COM int i;
4037656SSherry.Moore@Sun.COM x86pte_t pteval;
4047656SSherry.Moore@Sun.COM char *table;
4057656SSherry.Moore@Sun.COM static char *tablist = "\t\t\t";
4067656SSherry.Moore@Sun.COM char *tabs = tablist + 3 - top_level;
4077656SSherry.Moore@Sun.COM uint_t pa, pa1;
4085084Sjohnlev #if !defined(__xpv)
4095084Sjohnlev #define maddr_t paddr_t
4105084Sjohnlev #endif /* !__xpv */
4115084Sjohnlev
4127656SSherry.Moore@Sun.COM dboot_printf("Finished pagetables:\n");
4137656SSherry.Moore@Sun.COM table = (char *)(uintptr_t)top_page_table;
4147656SSherry.Moore@Sun.COM l = top_level;
4157656SSherry.Moore@Sun.COM va = 0;
4167656SSherry.Moore@Sun.COM for (index = 0; index < ptes_per_table; ++index) {
4177656SSherry.Moore@Sun.COM pgsize = 1ull << shift_amt[l];
4187656SSherry.Moore@Sun.COM if (pae_support)
4197656SSherry.Moore@Sun.COM pteval = ((x86pte_t *)table)[index];
4207656SSherry.Moore@Sun.COM else
4217656SSherry.Moore@Sun.COM pteval = ((x86pte32_t *)table)[index];
4227656SSherry.Moore@Sun.COM if (pteval == 0)
4237656SSherry.Moore@Sun.COM goto next_entry;
4247656SSherry.Moore@Sun.COM
4257656SSherry.Moore@Sun.COM dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
426*11387SSurya.Prakki@Sun.COM tabs + l, (void *)table, index, (uint64_t)pteval, va);
4277656SSherry.Moore@Sun.COM pa = ma_to_pa(pteval & MMU_PAGEMASK);
4287656SSherry.Moore@Sun.COM dboot_printf(" physaddr=%x\n", pa);
4297656SSherry.Moore@Sun.COM
4307656SSherry.Moore@Sun.COM /*
4317656SSherry.Moore@Sun.COM * Don't try to walk hypervisor private pagetables
4327656SSherry.Moore@Sun.COM */
4337656SSherry.Moore@Sun.COM if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
4347656SSherry.Moore@Sun.COM save_table[l] = table;
4357656SSherry.Moore@Sun.COM save_index[l] = index;
4367656SSherry.Moore@Sun.COM --l;
4377656SSherry.Moore@Sun.COM index = -1;
4387656SSherry.Moore@Sun.COM table = (char *)(uintptr_t)
4397656SSherry.Moore@Sun.COM ma_to_pa(pteval & MMU_PAGEMASK);
4407656SSherry.Moore@Sun.COM goto recursion;
4417656SSherry.Moore@Sun.COM }
4427656SSherry.Moore@Sun.COM
4437656SSherry.Moore@Sun.COM /*
4447656SSherry.Moore@Sun.COM * shorten dump for consecutive mappings
4457656SSherry.Moore@Sun.COM */
4467656SSherry.Moore@Sun.COM for (i = 1; index + i < ptes_per_table; ++i) {
4477656SSherry.Moore@Sun.COM if (pae_support)
4487656SSherry.Moore@Sun.COM pteval = ((x86pte_t *)table)[index + i];
4497656SSherry.Moore@Sun.COM else
4507656SSherry.Moore@Sun.COM pteval = ((x86pte32_t *)table)[index + i];
4517656SSherry.Moore@Sun.COM if (pteval == 0)
4527656SSherry.Moore@Sun.COM break;
4537656SSherry.Moore@Sun.COM pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
4547656SSherry.Moore@Sun.COM if (pa1 != pa + i * pgsize)
4557656SSherry.Moore@Sun.COM break;
4567656SSherry.Moore@Sun.COM }
4577656SSherry.Moore@Sun.COM if (i > 2) {
4587656SSherry.Moore@Sun.COM dboot_printf("%s...\n", tabs + l);
4597656SSherry.Moore@Sun.COM va += pgsize * (i - 2);
4607656SSherry.Moore@Sun.COM index += i - 2;
4617656SSherry.Moore@Sun.COM }
4627656SSherry.Moore@Sun.COM next_entry:
4637656SSherry.Moore@Sun.COM va += pgsize;
4647656SSherry.Moore@Sun.COM if (l == 3 && index == 256) /* VA hole */
4657656SSherry.Moore@Sun.COM va = 0xffff800000000000ull;
4667656SSherry.Moore@Sun.COM recursion:
4677656SSherry.Moore@Sun.COM ;
4687656SSherry.Moore@Sun.COM }
4697656SSherry.Moore@Sun.COM if (l < top_level) {
4707656SSherry.Moore@Sun.COM ++l;
4717656SSherry.Moore@Sun.COM index = save_index[l];
4727656SSherry.Moore@Sun.COM table = save_table[l];
4737656SSherry.Moore@Sun.COM goto recursion;
4747656SSherry.Moore@Sun.COM }
4757656SSherry.Moore@Sun.COM }
4767656SSherry.Moore@Sun.COM
4773446Smrj /*
4785084Sjohnlev * Add a mapping for the machine page at the given virtual address.
4793446Smrj */
4803446Smrj static void
map_ma_at_va(maddr_t ma,native_ptr_t va,uint_t level)4815084Sjohnlev map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
4823446Smrj {
4833446Smrj x86pte_t *ptep;
4843446Smrj x86pte_t pteval;
4853446Smrj
4865084Sjohnlev pteval = ma | pte_bits;
4873446Smrj if (level > 0)
4883446Smrj pteval |= PT_PAGESIZE;
4893446Smrj if (va >= target_kernel_text && pge_support)
4903446Smrj pteval |= PT_GLOBAL;
4913446Smrj
4925084Sjohnlev if (map_debug && ma != va)
4935084Sjohnlev dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
4943446Smrj " pte=0x%" PRIx64 " l=%d\n",
4955084Sjohnlev (uint64_t)ma, (uint64_t)va, pteval, level);
4965084Sjohnlev
4975084Sjohnlev #if defined(__xpv)
4985084Sjohnlev /*
4995084Sjohnlev * see if we can avoid find_pte() on the hypervisor
5005084Sjohnlev */
5015084Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval,
5025084Sjohnlev UVMF_INVLPG | UVMF_LOCAL) == 0)
5035084Sjohnlev return;
5045084Sjohnlev #endif
5053446Smrj
5063446Smrj /*
5073446Smrj * Find the pte that will map this address. This creates any
5083446Smrj * missing intermediate level page tables
5093446Smrj */
5103446Smrj ptep = find_pte(va, NULL, level, 0);
5113446Smrj
5123446Smrj /*
5135084Sjohnlev * When paravirtualized, we must use hypervisor calls to modify the
5145084Sjohnlev * PTE, since paging is active. On real hardware we just write to
5155084Sjohnlev * the pagetables which aren't in use yet.
5163446Smrj */
5175084Sjohnlev #if defined(__xpv)
5185084Sjohnlev ptep = ptep; /* shut lint up */
5195084Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
5205084Sjohnlev dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
5215084Sjohnlev " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
5225084Sjohnlev (uint64_t)va, level, (uint64_t)ma, pteval);
5235084Sjohnlev #else
5243446Smrj if (va < 1024 * 1024)
5253446Smrj pteval |= PT_NOCACHE; /* for video RAM */
5263446Smrj if (pae_support)
5273446Smrj *ptep = pteval;
5283446Smrj else
5293446Smrj *((x86pte32_t *)ptep) = (x86pte32_t)pteval;
5305084Sjohnlev #endif
5313446Smrj }
5323446Smrj
5333446Smrj /*
5345084Sjohnlev * Add a mapping for the physical page at the given virtual address.
5353446Smrj */
5363446Smrj static void
map_pa_at_va(paddr_t pa,native_ptr_t va,uint_t level)5375084Sjohnlev map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
5383446Smrj {
5395084Sjohnlev map_ma_at_va(pa_to_ma(pa), va, level);
5403446Smrj }
5413446Smrj
5423446Smrj /*
5433489Sjosephb * This is called to remove start..end from the
5443489Sjosephb * possible range of PCI addresses.
5453489Sjosephb */
5463489Sjosephb const uint64_t pci_lo_limit = 0x00100000ul;
5473489Sjosephb const uint64_t pci_hi_limit = 0xfff00000ul;
5483489Sjosephb static void
exclude_from_pci(uint64_t start,uint64_t end)5493489Sjosephb exclude_from_pci(uint64_t start, uint64_t end)
5503489Sjosephb {
5513489Sjosephb int i;
5523489Sjosephb int j;
5533489Sjosephb struct boot_memlist *ml;
5543489Sjosephb
5553489Sjosephb for (i = 0; i < pcimemlists_used; ++i) {
5563489Sjosephb ml = &pcimemlists[i];
5573489Sjosephb
5583489Sjosephb /* delete the entire range? */
5593489Sjosephb if (start <= ml->addr && ml->addr + ml->size <= end) {
5603489Sjosephb --pcimemlists_used;
5613489Sjosephb for (j = i; j < pcimemlists_used; ++j)
5623489Sjosephb pcimemlists[j] = pcimemlists[j + 1];
5633489Sjosephb --i; /* to revisit the new one at this index */
5643489Sjosephb }
5653489Sjosephb
5663489Sjosephb /* split a range? */
5673489Sjosephb else if (ml->addr < start && end < ml->addr + ml->size) {
5683489Sjosephb
5693489Sjosephb ++pcimemlists_used;
5703489Sjosephb if (pcimemlists_used > MAX_MEMLIST)
5713489Sjosephb dboot_panic("too many pcimemlists");
5723489Sjosephb
5733489Sjosephb for (j = pcimemlists_used - 1; j > i; --j)
5743489Sjosephb pcimemlists[j] = pcimemlists[j - 1];
5753489Sjosephb ml->size = start - ml->addr;
5763489Sjosephb
5773489Sjosephb ++ml;
5783489Sjosephb ml->size = (ml->addr + ml->size) - end;
5793489Sjosephb ml->addr = end;
5803489Sjosephb ++i; /* skip on to next one */
5813489Sjosephb }
5823489Sjosephb
5833489Sjosephb /* cut memory off the start? */
5843489Sjosephb else if (ml->addr < end && end < ml->addr + ml->size) {
5853489Sjosephb ml->size -= end - ml->addr;
5863489Sjosephb ml->addr = end;
5873489Sjosephb }
5883489Sjosephb
5893489Sjosephb /* cut memory off the end? */
5903489Sjosephb else if (ml->addr <= start && start < ml->addr + ml->size) {
5913489Sjosephb ml->size = start - ml->addr;
5923489Sjosephb }
5933489Sjosephb }
5943489Sjosephb }
5953489Sjosephb
5963489Sjosephb /*
5975084Sjohnlev * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
5985084Sjohnlev * definition in Xen source.
5995084Sjohnlev */
6005084Sjohnlev #ifdef __xpv
6015084Sjohnlev typedef struct {
6025084Sjohnlev uint32_t base_addr_low;
6035084Sjohnlev uint32_t base_addr_high;
6045084Sjohnlev uint32_t length_low;
6055084Sjohnlev uint32_t length_high;
6065084Sjohnlev uint32_t type;
6075084Sjohnlev } mmap_t;
6085084Sjohnlev #else
6095084Sjohnlev typedef mb_memory_map_t mmap_t;
6105084Sjohnlev #endif
6115084Sjohnlev
6125084Sjohnlev static void
build_pcimemlists(mmap_t * mem,int num)6135084Sjohnlev build_pcimemlists(mmap_t *mem, int num)
6145084Sjohnlev {
6155084Sjohnlev mmap_t *mmap;
6165084Sjohnlev uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
6175084Sjohnlev uint64_t start;
6185084Sjohnlev uint64_t end;
6195084Sjohnlev int i;
6205084Sjohnlev
6215084Sjohnlev /*
6225084Sjohnlev * initialize
6235084Sjohnlev */
6245084Sjohnlev pcimemlists[0].addr = pci_lo_limit;
6255084Sjohnlev pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
6265084Sjohnlev pcimemlists_used = 1;
6275084Sjohnlev
6285084Sjohnlev /*
6295084Sjohnlev * Fill in PCI memlists.
6305084Sjohnlev */
6315084Sjohnlev for (mmap = mem, i = 0; i < num; ++i, ++mmap) {
6325084Sjohnlev start = ((uint64_t)mmap->base_addr_high << 32) +
6335084Sjohnlev mmap->base_addr_low;
6345084Sjohnlev end = start + ((uint64_t)mmap->length_high << 32) +
6355084Sjohnlev mmap->length_low;
6365084Sjohnlev
6375084Sjohnlev if (prom_debug)
6385084Sjohnlev dboot_printf("\ttype: %d %" PRIx64 "..%"
6395084Sjohnlev PRIx64 "\n", mmap->type, start, end);
6405084Sjohnlev
6415084Sjohnlev /*
6425084Sjohnlev * page align start and end
6435084Sjohnlev */
6445084Sjohnlev start = (start + page_offset) & ~page_offset;
6455084Sjohnlev end &= ~page_offset;
6465084Sjohnlev if (end <= start)
6475084Sjohnlev continue;
6485084Sjohnlev
6495084Sjohnlev exclude_from_pci(start, end);
6505084Sjohnlev }
6515084Sjohnlev
6525084Sjohnlev /*
6535084Sjohnlev * Finish off the pcimemlist
6545084Sjohnlev */
6555084Sjohnlev if (prom_debug) {
6565084Sjohnlev for (i = 0; i < pcimemlists_used; ++i) {
6575084Sjohnlev dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
6585084Sjohnlev PRIx64 "\n", pcimemlists[i].addr,
6595084Sjohnlev pcimemlists[i].addr + pcimemlists[i].size);
6605084Sjohnlev }
6615084Sjohnlev }
6625084Sjohnlev pcimemlists[0].next = 0;
6635084Sjohnlev pcimemlists[0].prev = 0;
6645084Sjohnlev for (i = 1; i < pcimemlists_used; ++i) {
6655084Sjohnlev pcimemlists[i].prev =
6665084Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
6675084Sjohnlev pcimemlists[i].next = 0;
6685084Sjohnlev pcimemlists[i - 1].next =
6695084Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i);
6705084Sjohnlev }
6715084Sjohnlev bi->bi_pcimem = (native_ptr_t)pcimemlists;
6725084Sjohnlev DBG(bi->bi_pcimem);
6735084Sjohnlev }
6745084Sjohnlev
6755084Sjohnlev #if defined(__xpv)
6765084Sjohnlev /*
6775084Sjohnlev * Initialize memory allocator stuff from hypervisor-supplied start info.
6785084Sjohnlev *
6795084Sjohnlev * There is 512KB of scratch area after the boot stack page.
6805084Sjohnlev * We'll use that for everything except the kernel nucleus pages which are too
6815084Sjohnlev * big to fit there and are allocated last anyway.
6825084Sjohnlev */
6835084Sjohnlev #define MAXMAPS 100
6845084Sjohnlev static mmap_t map_buffer[MAXMAPS];
6855084Sjohnlev static void
init_mem_alloc(void)6865084Sjohnlev init_mem_alloc(void)
6875084Sjohnlev {
6885084Sjohnlev int local; /* variables needed to find start region */
6895084Sjohnlev paddr_t scratch_start;
6905084Sjohnlev xen_memory_map_t map;
6915084Sjohnlev
6925084Sjohnlev DBG_MSG("Entered init_mem_alloc()\n");
6935084Sjohnlev
6945084Sjohnlev /*
6955084Sjohnlev * Free memory follows the stack. There's at least 512KB of scratch
6965084Sjohnlev * space, rounded up to at least 2Mb alignment. That should be enough
6975084Sjohnlev * for the page tables we'll need to build. The nucleus memory is
6985084Sjohnlev * allocated last and will be outside the addressible range. We'll
6995084Sjohnlev * switch to new page tables before we unpack the kernel
7005084Sjohnlev */
7015084Sjohnlev scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
7025084Sjohnlev DBG(scratch_start);
7035084Sjohnlev scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
7045084Sjohnlev DBG(scratch_end);
7055084Sjohnlev
7065084Sjohnlev /*
7075084Sjohnlev * For paranoia, leave some space between hypervisor data and ours.
7085084Sjohnlev * Use 500 instead of 512.
7095084Sjohnlev */
7105084Sjohnlev next_avail_addr = scratch_end - 500 * 1024;
7115084Sjohnlev DBG(next_avail_addr);
7125084Sjohnlev
7135084Sjohnlev /*
7145084Sjohnlev * The domain builder gives us at most 1 module
7155084Sjohnlev */
7165084Sjohnlev DBG(xen_info->mod_len);
7175084Sjohnlev if (xen_info->mod_len > 0) {
7185084Sjohnlev DBG(xen_info->mod_start);
7195084Sjohnlev modules[0].bm_addr = xen_info->mod_start;
7205084Sjohnlev modules[0].bm_size = xen_info->mod_len;
7215084Sjohnlev bi->bi_module_cnt = 1;
7225084Sjohnlev bi->bi_modules = (native_ptr_t)modules;
7235084Sjohnlev } else {
7245084Sjohnlev bi->bi_module_cnt = 0;
7255084Sjohnlev bi->bi_modules = NULL;
7265084Sjohnlev }
7275084Sjohnlev DBG(bi->bi_module_cnt);
7285084Sjohnlev DBG(bi->bi_modules);
7295084Sjohnlev
7305084Sjohnlev DBG(xen_info->mfn_list);
7315084Sjohnlev DBG(xen_info->nr_pages);
7325084Sjohnlev max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
7335084Sjohnlev DBG(max_mem);
7345084Sjohnlev
7355084Sjohnlev /*
7365084Sjohnlev * Using pseudo-physical addresses, so only 1 memlist element
7375084Sjohnlev */
7385084Sjohnlev memlists[0].addr = 0;
7395084Sjohnlev DBG(memlists[0].addr);
7405084Sjohnlev memlists[0].size = max_mem;
7415084Sjohnlev DBG(memlists[0].size);
7425084Sjohnlev memlists_used = 1;
7435084Sjohnlev DBG(memlists_used);
7445084Sjohnlev
7455084Sjohnlev /*
7465084Sjohnlev * finish building physinstall list
7475084Sjohnlev */
7485084Sjohnlev sort_physinstall();
7495084Sjohnlev
7509940SVikram.Hegde@Sun.COM /*
7519940SVikram.Hegde@Sun.COM * build bios reserved memlists
7529940SVikram.Hegde@Sun.COM */
7539940SVikram.Hegde@Sun.COM build_rsvdmemlists();
7549940SVikram.Hegde@Sun.COM
7555084Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) {
7565084Sjohnlev /*
7575084Sjohnlev * build PCI Memory list
7585084Sjohnlev */
7595084Sjohnlev map.nr_entries = MAXMAPS;
7605084Sjohnlev /*LINTED: constant in conditional context*/
7615084Sjohnlev set_xen_guest_handle(map.buffer, map_buffer);
7625084Sjohnlev if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
7635084Sjohnlev dboot_panic("getting XENMEM_machine_memory_map failed");
7645084Sjohnlev build_pcimemlists(map_buffer, map.nr_entries);
7655084Sjohnlev }
7665084Sjohnlev }
7675084Sjohnlev
7685084Sjohnlev #else /* !__xpv */
7695084Sjohnlev
7705084Sjohnlev /*
7715084Sjohnlev * During memory allocation, find the highest address not used yet.
7725084Sjohnlev */
7735084Sjohnlev static void
check_higher(paddr_t a)7745084Sjohnlev check_higher(paddr_t a)
7755084Sjohnlev {
7765084Sjohnlev if (a < next_avail_addr)
7775084Sjohnlev return;
7785084Sjohnlev next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
7795084Sjohnlev DBG(next_avail_addr);
7805084Sjohnlev }
7815084Sjohnlev
7825084Sjohnlev /*
7833446Smrj * Walk through the module information finding the last used address.
7843446Smrj * The first available address will become the top level page table.
7853446Smrj *
7863446Smrj * We then build the phys_install memlist from the multiboot information.
7873446Smrj */
7883446Smrj static void
init_mem_alloc(void)7893446Smrj init_mem_alloc(void)
7903446Smrj {
7913446Smrj mb_memory_map_t *mmap;
7923446Smrj mb_module_t *mod;
7933446Smrj uint64_t start;
7943446Smrj uint64_t end;
7953446Smrj uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
7963446Smrj extern char _end[];
7973446Smrj int i;
7983446Smrj
7993446Smrj DBG_MSG("Entered init_mem_alloc()\n");
8003446Smrj DBG((uintptr_t)mb_info);
8013446Smrj
80210304SSeth.Goldberg@Sun.COM if (mb_info->mods_count > MAX_MODULES) {
80310304SSeth.Goldberg@Sun.COM dboot_panic("Too many modules (%d) -- the maximum is %d.",
80410304SSeth.Goldberg@Sun.COM mb_info->mods_count, MAX_MODULES);
80510304SSeth.Goldberg@Sun.COM }
8063446Smrj /*
8073446Smrj * search the modules to find the last used address
8083446Smrj * we'll build the module list while we're walking through here
8093446Smrj */
8103446Smrj DBG_MSG("\nFinding Modules\n");
8113446Smrj check_higher((paddr_t)&_end);
8123446Smrj for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0;
8133446Smrj i < mb_info->mods_count;
8143446Smrj ++mod, ++i) {
8153446Smrj if (prom_debug) {
8163446Smrj dboot_printf("\tmodule #%d: %s at: 0x%lx, len 0x%lx\n",
8173446Smrj i, (char *)(mod->mod_name),
8183446Smrj (ulong_t)mod->mod_start, (ulong_t)mod->mod_end);
8193446Smrj }
8203446Smrj modules[i].bm_addr = mod->mod_start;
82110304SSeth.Goldberg@Sun.COM if (mod->mod_start > mod->mod_end) {
82210304SSeth.Goldberg@Sun.COM dboot_panic("module[%d]: Invalid module start address "
82310304SSeth.Goldberg@Sun.COM "(0x%llx)", i, (uint64_t)mod->mod_start);
82410304SSeth.Goldberg@Sun.COM }
82510304SSeth.Goldberg@Sun.COM modules[i].bm_size = mod->mod_end - mod->mod_start;
8263446Smrj
8273446Smrj check_higher(mod->mod_end);
8283446Smrj }
8293446Smrj bi->bi_modules = (native_ptr_t)modules;
8303446Smrj DBG(bi->bi_modules);
8313446Smrj bi->bi_module_cnt = mb_info->mods_count;
8323446Smrj DBG(bi->bi_module_cnt);
8333446Smrj
8343446Smrj /*
8353446Smrj * Walk through the memory map from multiboot and build our memlist
8363446Smrj * structures. Note these will have native format pointers.
8373446Smrj */
8383446Smrj DBG_MSG("\nFinding Memory Map\n");
8393446Smrj DBG(mb_info->flags);
8403446Smrj max_mem = 0;
8413446Smrj if (mb_info->flags & 0x40) {
8425084Sjohnlev int cnt = 0;
8435084Sjohnlev
8443446Smrj DBG(mb_info->mmap_addr);
8453446Smrj DBG(mb_info->mmap_length);
8463446Smrj check_higher(mb_info->mmap_addr + mb_info->mmap_length);
8473446Smrj
8483446Smrj for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
8493446Smrj (uint32_t)mmap < mb_info->mmap_addr + mb_info->mmap_length;
8503446Smrj mmap = (mb_memory_map_t *)((uint32_t)mmap + mmap->size
8513446Smrj + sizeof (mmap->size))) {
8525084Sjohnlev ++cnt;
8533446Smrj start = ((uint64_t)mmap->base_addr_high << 32) +
8543446Smrj mmap->base_addr_low;
8553446Smrj end = start + ((uint64_t)mmap->length_high << 32) +
8563446Smrj mmap->length_low;
8573446Smrj
8583489Sjosephb if (prom_debug)
8593446Smrj dboot_printf("\ttype: %d %" PRIx64 "..%"
8603446Smrj PRIx64 "\n", mmap->type, start, end);
8613446Smrj
8623446Smrj /*
8633446Smrj * page align start and end
8643446Smrj */
8653446Smrj start = (start + page_offset) & ~page_offset;
8663446Smrj end &= ~page_offset;
8673446Smrj if (end <= start)
8683446Smrj continue;
8693446Smrj
8703489Sjosephb /*
8713489Sjosephb * only type 1 is usable RAM
8723489Sjosephb */
8739940SVikram.Hegde@Sun.COM switch (mmap->type) {
8749940SVikram.Hegde@Sun.COM case 1:
8759940SVikram.Hegde@Sun.COM if (end > max_mem)
8769940SVikram.Hegde@Sun.COM max_mem = end;
8779940SVikram.Hegde@Sun.COM memlists[memlists_used].addr = start;
8789940SVikram.Hegde@Sun.COM memlists[memlists_used].size = end - start;
8799940SVikram.Hegde@Sun.COM ++memlists_used;
8809940SVikram.Hegde@Sun.COM if (memlists_used > MAX_MEMLIST)
8819940SVikram.Hegde@Sun.COM dboot_panic("too many memlists");
8829940SVikram.Hegde@Sun.COM break;
8839940SVikram.Hegde@Sun.COM case 2:
8849940SVikram.Hegde@Sun.COM rsvdmemlists[rsvdmemlists_used].addr = start;
8859940SVikram.Hegde@Sun.COM rsvdmemlists[rsvdmemlists_used].size =
8869940SVikram.Hegde@Sun.COM end - start;
8879940SVikram.Hegde@Sun.COM ++rsvdmemlists_used;
8889940SVikram.Hegde@Sun.COM if (rsvdmemlists_used > MAX_MEMLIST)
8899940SVikram.Hegde@Sun.COM dboot_panic("too many rsvdmemlists");
8909940SVikram.Hegde@Sun.COM break;
8919940SVikram.Hegde@Sun.COM default:
8923489Sjosephb continue;
8939940SVikram.Hegde@Sun.COM }
8943446Smrj }
8955084Sjohnlev build_pcimemlists((mb_memory_map_t *)mb_info->mmap_addr, cnt);
8963446Smrj } else if (mb_info->flags & 0x01) {
8973446Smrj DBG(mb_info->mem_lower);
8983446Smrj memlists[memlists_used].addr = 0;
8993446Smrj memlists[memlists_used].size = mb_info->mem_lower * 1024;
9003446Smrj ++memlists_used;
9013446Smrj DBG(mb_info->mem_upper);
9023446Smrj memlists[memlists_used].addr = 1024 * 1024;
9033446Smrj memlists[memlists_used].size = mb_info->mem_upper * 1024;
9043446Smrj ++memlists_used;
9055084Sjohnlev
9065084Sjohnlev /*
9075084Sjohnlev * Old platform - assume I/O space at the end of memory.
9085084Sjohnlev */
9095084Sjohnlev pcimemlists[0].addr =
9105084Sjohnlev (mb_info->mem_upper * 1024) + (1024 * 1024);
9115084Sjohnlev pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
9125084Sjohnlev pcimemlists[0].next = 0;
9135084Sjohnlev pcimemlists[0].prev = 0;
9145084Sjohnlev bi->bi_pcimem = (native_ptr_t)pcimemlists;
9155084Sjohnlev DBG(bi->bi_pcimem);
9163446Smrj } else {
9175084Sjohnlev dboot_panic("No memory info from boot loader!!!");
9183446Smrj }
9193446Smrj
9203446Smrj check_higher(bi->bi_cmdline);
9213446Smrj
9223446Smrj /*
9233446Smrj * finish processing the physinstall list
9243446Smrj */
9253446Smrj sort_physinstall();
9269940SVikram.Hegde@Sun.COM
9279940SVikram.Hegde@Sun.COM /*
9289940SVikram.Hegde@Sun.COM * build bios reserved mem lists
9299940SVikram.Hegde@Sun.COM */
9309940SVikram.Hegde@Sun.COM build_rsvdmemlists();
9313446Smrj }
9325084Sjohnlev #endif /* !__xpv */
9333446Smrj
9343446Smrj /*
9353446Smrj * Simple memory allocator, allocates aligned physical memory.
9363446Smrj * Note that startup_kernel() only allocates memory, never frees.
9373446Smrj * Memory usage just grows in an upward direction.
9383446Smrj */
9393446Smrj static void *
do_mem_alloc(uint32_t size,uint32_t align)9403446Smrj do_mem_alloc(uint32_t size, uint32_t align)
9413446Smrj {
9423446Smrj uint_t i;
9433446Smrj uint64_t best;
9443446Smrj uint64_t start;
9453446Smrj uint64_t end;
9463446Smrj
9473446Smrj /*
9483446Smrj * make sure size is a multiple of pagesize
9493446Smrj */
9503446Smrj size = RNDUP(size, MMU_PAGESIZE);
9513446Smrj next_avail_addr = RNDUP(next_avail_addr, align);
9523446Smrj
9533446Smrj /*
9545084Sjohnlev * XXPV fixme joe
9555084Sjohnlev *
9563446Smrj * a really large bootarchive that causes you to run out of memory
9573446Smrj * may cause this to blow up
9583446Smrj */
9593446Smrj /* LINTED E_UNEXPECTED_UINT_PROMOTION */
9603446Smrj best = (uint64_t)-size;
9613446Smrj for (i = 0; i < memlists_used; ++i) {
9623446Smrj start = memlists[i].addr;
9635084Sjohnlev #if defined(__xpv)
9645084Sjohnlev start += mfn_base;
9655084Sjohnlev #endif
9663446Smrj end = start + memlists[i].size;
9673446Smrj
9683446Smrj /*
9693446Smrj * did we find the desired address?
9703446Smrj */
9713446Smrj if (start <= next_avail_addr && next_avail_addr + size <= end) {
9723446Smrj best = next_avail_addr;
9733446Smrj goto done;
9743446Smrj }
9753446Smrj
9763446Smrj /*
9773446Smrj * if not is this address the best so far?
9783446Smrj */
9793446Smrj if (start > next_avail_addr && start < best &&
9803446Smrj RNDUP(start, align) + size <= end)
9813446Smrj best = RNDUP(start, align);
9823446Smrj }
9833446Smrj
9843446Smrj /*
9853446Smrj * We didn't find exactly the address we wanted, due to going off the
9863446Smrj * end of a memory region. Return the best found memory address.
9873446Smrj */
9883446Smrj done:
9893446Smrj next_avail_addr = best + size;
9905084Sjohnlev #if defined(__xpv)
9915084Sjohnlev if (next_avail_addr > scratch_end)
9925084Sjohnlev dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
9935084Sjohnlev "0x%lx", (ulong_t)next_avail_addr,
9945084Sjohnlev (ulong_t)scratch_end);
9955084Sjohnlev #endif
9963446Smrj (void) memset((void *)(uintptr_t)best, 0, size);
9973446Smrj return ((void *)(uintptr_t)best);
9983446Smrj }
9993446Smrj
10003446Smrj void *
mem_alloc(uint32_t size)10013446Smrj mem_alloc(uint32_t size)
10023446Smrj {
10033446Smrj return (do_mem_alloc(size, MMU_PAGESIZE));
10043446Smrj }
10053446Smrj
10063446Smrj
10073446Smrj /*
10083446Smrj * Build page tables to map all of memory used so far as well as the kernel.
10093446Smrj */
10103446Smrj static void
build_page_tables(void)10113446Smrj build_page_tables(void)
10123446Smrj {
10133446Smrj uint32_t psize;
10143446Smrj uint32_t level;
10153446Smrj uint32_t off;
10165084Sjohnlev uint64_t start;
10175084Sjohnlev #if !defined(__xpv)
10183446Smrj uint32_t i;
10193446Smrj uint64_t end;
10205084Sjohnlev #endif /* __xpv */
10213446Smrj
10223446Smrj /*
10235084Sjohnlev * If we're on metal, we need to create the top level pagetable.
10243446Smrj */
10255084Sjohnlev #if defined(__xpv)
10265084Sjohnlev top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
10275084Sjohnlev #else /* __xpv */
10283446Smrj top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
10295084Sjohnlev #endif /* __xpv */
10303446Smrj DBG((uintptr_t)top_page_table);
10313446Smrj
10323446Smrj /*
10333446Smrj * Determine if we'll use large mappings for kernel, then map it.
10343446Smrj */
10353446Smrj if (largepage_support) {
10363446Smrj psize = lpagesize;
10373446Smrj level = 1;
10383446Smrj } else {
10393446Smrj psize = MMU_PAGESIZE;
10403446Smrj level = 0;
10413446Smrj }
10423446Smrj
10433446Smrj DBG_MSG("Mapping kernel\n");
10443446Smrj DBG(ktext_phys);
10453446Smrj DBG(target_kernel_text);
10463446Smrj DBG(ksize);
10473446Smrj DBG(psize);
10483446Smrj for (off = 0; off < ksize; off += psize)
10493446Smrj map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
10503446Smrj
10513446Smrj /*
10523446Smrj * The kernel will need a 1 page window to work with page tables
10533446Smrj */
10543446Smrj bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE);
10553446Smrj DBG(bi->bi_pt_window);
10563446Smrj bi->bi_pte_to_pt_window =
10573446Smrj (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
10583446Smrj DBG(bi->bi_pte_to_pt_window);
10593446Smrj
10605084Sjohnlev #if defined(__xpv)
10615084Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
10625084Sjohnlev /* If this is a domU we're done. */
10635084Sjohnlev DBG_MSG("\nPage tables constructed\n");
10645084Sjohnlev return;
10655084Sjohnlev }
10665084Sjohnlev #endif /* __xpv */
10675084Sjohnlev
10683446Smrj /*
10695084Sjohnlev * We need 1:1 mappings for the lower 1M of memory to access
10705084Sjohnlev * BIOS tables used by a couple of drivers during boot.
10715084Sjohnlev *
10725084Sjohnlev * The following code works because our simple memory allocator
10735084Sjohnlev * only grows usage in an upwards direction.
10743446Smrj *
10755084Sjohnlev * Note that by this point in boot some mappings for low memory
10765084Sjohnlev * may already exist because we've already accessed device in low
10775084Sjohnlev * memory. (Specifically the video frame buffer and keyboard
10785084Sjohnlev * status ports.) If we're booting on raw hardware then GRUB
10795084Sjohnlev * created these mappings for us. If we're booting under a
10805084Sjohnlev * hypervisor then we went ahead and remapped these devices into
10815084Sjohnlev * memory allocated within dboot itself.
10825084Sjohnlev */
10835084Sjohnlev if (map_debug)
10845084Sjohnlev dboot_printf("1:1 map pa=0..1Meg\n");
10855084Sjohnlev for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
10865084Sjohnlev #if defined(__xpv)
10875084Sjohnlev map_ma_at_va(start, start, 0);
10885084Sjohnlev #else /* __xpv */
10895084Sjohnlev map_pa_at_va(start, start, 0);
10905084Sjohnlev #endif /* __xpv */
10915084Sjohnlev }
10925084Sjohnlev
10935084Sjohnlev #if !defined(__xpv)
10943446Smrj for (i = 0; i < memlists_used; ++i) {
10953446Smrj start = memlists[i].addr;
10963446Smrj
10973446Smrj end = start + memlists[i].size;
10983446Smrj
10993446Smrj if (map_debug)
11003446Smrj dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
11013446Smrj start, end);
11023446Smrj while (start < end && start < next_avail_addr) {
11033446Smrj map_pa_at_va(start, start, 0);
11043446Smrj start += MMU_PAGESIZE;
11053446Smrj }
11063446Smrj }
11075084Sjohnlev #endif /* !__xpv */
11083446Smrj
11093446Smrj DBG_MSG("\nPage tables constructed\n");
11103446Smrj }
11113446Smrj
11123446Smrj #define NO_MULTIBOOT \
11133446Smrj "multiboot is no longer used to boot the Solaris Operating System.\n\
11143446Smrj The grub entry should be changed to:\n\
11153446Smrj kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
11163446Smrj module$ /platform/i86pc/$ISADIR/boot_archive\n\
11173446Smrj See http://www.sun.com/msg/SUNOS-8000-AK for details.\n"
11183446Smrj
11193446Smrj /*
11203446Smrj * startup_kernel has a pretty simple job. It builds pagetables which reflect
11213446Smrj * 1:1 mappings for all memory in use. It then also adds mappings for
11223446Smrj * the kernel nucleus at virtual address of target_kernel_text using large page
11233446Smrj * mappings. The page table pages are also accessible at 1:1 mapped
11243446Smrj * virtual addresses.
11253446Smrj */
11263446Smrj /*ARGSUSED*/
11273446Smrj void
startup_kernel(void)11283446Smrj startup_kernel(void)
11293446Smrj {
11303446Smrj char *cmdline;
11313446Smrj uintptr_t addr;
11325084Sjohnlev #if defined(__xpv)
11335084Sjohnlev physdev_set_iopl_t set_iopl;
11345084Sjohnlev #endif /* __xpv */
11353446Smrj
11363446Smrj /*
11373446Smrj * At this point we are executing in a 32 bit real mode.
11383446Smrj */
11395084Sjohnlev #if defined(__xpv)
11405084Sjohnlev cmdline = (char *)xen_info->cmd_line;
11415084Sjohnlev #else /* __xpv */
11423446Smrj cmdline = (char *)mb_info->cmdline;
11435084Sjohnlev #endif /* __xpv */
11445084Sjohnlev
11453446Smrj prom_debug = (strstr(cmdline, "prom_debug") != NULL);
11463446Smrj map_debug = (strstr(cmdline, "map_debug") != NULL);
11475084Sjohnlev
11485084Sjohnlev #if defined(__xpv)
11495084Sjohnlev /*
11505084Sjohnlev * For dom0, before we initialize the console subsystem we'll
11515084Sjohnlev * need to enable io operations, so set I/O priveldge level to 1.
11525084Sjohnlev */
11535084Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) {
11545084Sjohnlev set_iopl.iopl = 1;
11555084Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
11565084Sjohnlev }
11575084Sjohnlev #endif /* __xpv */
11585084Sjohnlev
11593446Smrj bcons_init(cmdline);
11603446Smrj DBG_MSG("\n\nSolaris prekernel set: ");
11613446Smrj DBG_MSG(cmdline);
11623446Smrj DBG_MSG("\n");
11633446Smrj
11643446Smrj if (strstr(cmdline, "multiboot") != NULL) {
11653446Smrj dboot_panic(NO_MULTIBOOT);
11663446Smrj }
11673446Smrj
11683446Smrj /*
11693446Smrj * boot info must be 16 byte aligned for 64 bit kernel ABI
11703446Smrj */
11713446Smrj addr = (uintptr_t)boot_info;
11723446Smrj addr = (addr + 0xf) & ~0xf;
11733446Smrj bi = (struct xboot_info *)addr;
11743446Smrj DBG((uintptr_t)bi);
11753446Smrj bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
11763446Smrj
11773446Smrj /*
11783446Smrj * Need correct target_kernel_text value
11793446Smrj */
11803446Smrj #if defined(_BOOT_TARGET_amd64)
11813446Smrj target_kernel_text = KERNEL_TEXT_amd64;
11825084Sjohnlev #elif defined(__xpv)
11835084Sjohnlev target_kernel_text = KERNEL_TEXT_i386_xpv;
11843446Smrj #else
11853446Smrj target_kernel_text = KERNEL_TEXT_i386;
11863446Smrj #endif
11873446Smrj DBG(target_kernel_text);
11883446Smrj
11895084Sjohnlev #if defined(__xpv)
11905084Sjohnlev
11915084Sjohnlev /*
11925084Sjohnlev * XXPV Derive this stuff from CPUID / what the hypervisor has enabled
11935084Sjohnlev */
11945084Sjohnlev
11955084Sjohnlev #if defined(_BOOT_TARGET_amd64)
11965084Sjohnlev /*
11975084Sjohnlev * 64-bit hypervisor.
11985084Sjohnlev */
11995084Sjohnlev amd64_support = 1;
12005084Sjohnlev pae_support = 1;
12015084Sjohnlev
12025084Sjohnlev #else /* _BOOT_TARGET_amd64 */
12035084Sjohnlev
12045084Sjohnlev /*
12055084Sjohnlev * See if we are running on a PAE Hypervisor
12065084Sjohnlev */
12075084Sjohnlev {
12085084Sjohnlev xen_capabilities_info_t caps;
12095084Sjohnlev
12105084Sjohnlev if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
12115084Sjohnlev dboot_panic("HYPERVISOR_xen_version(caps) failed");
12125084Sjohnlev caps[sizeof (caps) - 1] = 0;
12135084Sjohnlev if (prom_debug)
12145084Sjohnlev dboot_printf("xen capabilities %s\n", caps);
12155084Sjohnlev if (strstr(caps, "x86_32p") != NULL)
12165084Sjohnlev pae_support = 1;
12175084Sjohnlev }
12185084Sjohnlev
12195084Sjohnlev #endif /* _BOOT_TARGET_amd64 */
12205084Sjohnlev {
12215084Sjohnlev xen_platform_parameters_t p;
12225084Sjohnlev
12235084Sjohnlev if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
12245084Sjohnlev dboot_panic("HYPERVISOR_xen_version(parms) failed");
12255084Sjohnlev DBG(p.virt_start);
12265084Sjohnlev mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
12275084Sjohnlev }
12285084Sjohnlev
12295084Sjohnlev /*
12305084Sjohnlev * The hypervisor loads stuff starting at 1Gig
12315084Sjohnlev */
12325084Sjohnlev mfn_base = ONE_GIG;
12335084Sjohnlev DBG(mfn_base);
12345084Sjohnlev
12355084Sjohnlev /*
12365084Sjohnlev * enable writable page table mode for the hypervisor
12375084Sjohnlev */
12385084Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
12395084Sjohnlev VMASST_TYPE_writable_pagetables) < 0)
12405084Sjohnlev dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
12415084Sjohnlev
12425084Sjohnlev /*
12435084Sjohnlev * check for NX support
12445084Sjohnlev */
12455084Sjohnlev if (pae_support) {
12465084Sjohnlev uint32_t eax = 0x80000000;
12475084Sjohnlev uint32_t edx = get_cpuid_edx(&eax);
12485084Sjohnlev
12495084Sjohnlev if (eax >= 0x80000001) {
12505084Sjohnlev eax = 0x80000001;
12515084Sjohnlev edx = get_cpuid_edx(&eax);
12525084Sjohnlev if (edx & CPUID_AMD_EDX_NX)
12535084Sjohnlev NX_support = 1;
12545084Sjohnlev }
12555084Sjohnlev }
12565084Sjohnlev
12575084Sjohnlev #if !defined(_BOOT_TARGET_amd64)
12585084Sjohnlev
12595084Sjohnlev /*
12605084Sjohnlev * The 32-bit hypervisor uses segmentation to protect itself from
12615084Sjohnlev * guests. This means when a guest attempts to install a flat 4GB
12625084Sjohnlev * code or data descriptor the 32-bit hypervisor will protect itself
12635084Sjohnlev * by silently shrinking the segment such that if the guest attempts
12645084Sjohnlev * any access where the hypervisor lives a #gp fault is generated.
12655084Sjohnlev * The problem is that some applications expect a full 4GB flat
12665084Sjohnlev * segment for their current thread pointer and will use negative
12675084Sjohnlev * offset segment wrap around to access data. TLS support in linux
12685084Sjohnlev * brand is one example of this.
12695084Sjohnlev *
12705084Sjohnlev * The 32-bit hypervisor can catch the #gp fault in these cases
12715084Sjohnlev * and emulate the access without passing the #gp fault to the guest
12725084Sjohnlev * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
12735084Sjohnlev * Seems like this should have been the default.
12745084Sjohnlev * Either way, we want the hypervisor -- and not Solaris -- to deal
12755084Sjohnlev * to deal with emulating these accesses.
12765084Sjohnlev */
12775084Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
12785084Sjohnlev VMASST_TYPE_4gb_segments) < 0)
12795084Sjohnlev dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
12805084Sjohnlev #endif /* !_BOOT_TARGET_amd64 */
12815084Sjohnlev
12825084Sjohnlev #else /* __xpv */
12835084Sjohnlev
12843446Smrj /*
12853446Smrj * use cpuid to enable MMU features
12863446Smrj */
12873446Smrj if (have_cpuid()) {
12883446Smrj uint32_t eax, edx;
12893446Smrj
12903446Smrj eax = 1;
12913446Smrj edx = get_cpuid_edx(&eax);
12923446Smrj if (edx & CPUID_INTC_EDX_PSE)
12933446Smrj largepage_support = 1;
12943446Smrj if (edx & CPUID_INTC_EDX_PGE)
12953446Smrj pge_support = 1;
12963446Smrj if (edx & CPUID_INTC_EDX_PAE)
12973446Smrj pae_support = 1;
12983446Smrj
12993446Smrj eax = 0x80000000;
13003446Smrj edx = get_cpuid_edx(&eax);
13013446Smrj if (eax >= 0x80000001) {
13023446Smrj eax = 0x80000001;
13033446Smrj edx = get_cpuid_edx(&eax);
13043446Smrj if (edx & CPUID_AMD_EDX_LM)
13053446Smrj amd64_support = 1;
13063446Smrj if (edx & CPUID_AMD_EDX_NX)
13073446Smrj NX_support = 1;
13083446Smrj }
13093446Smrj } else {
13103446Smrj dboot_printf("cpuid not supported\n");
13113446Smrj }
13125084Sjohnlev #endif /* __xpv */
13135084Sjohnlev
13143446Smrj
13153446Smrj #if defined(_BOOT_TARGET_amd64)
13163446Smrj if (amd64_support == 0)
13175084Sjohnlev dboot_panic("long mode not supported, rebooting");
13183446Smrj else if (pae_support == 0)
13195084Sjohnlev dboot_panic("long mode, but no PAE; rebooting");
13205084Sjohnlev #else
13215084Sjohnlev /*
13225084Sjohnlev * Allow the command line to over-ride use of PAE for 32 bit.
13235084Sjohnlev */
13245084Sjohnlev if (strstr(cmdline, "disablePAE=true") != NULL) {
13255084Sjohnlev pae_support = 0;
13265084Sjohnlev NX_support = 0;
13275084Sjohnlev amd64_support = 0;
13285084Sjohnlev }
13293446Smrj #endif
13303446Smrj
13313446Smrj /*
13325084Sjohnlev * initialize the simple memory allocator
13333446Smrj */
13343446Smrj init_mem_alloc();
13353446Smrj
13365084Sjohnlev #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
13375084Sjohnlev /*
13385084Sjohnlev * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
13395084Sjohnlev */
13405084Sjohnlev if (max_mem < FOUR_GIG && NX_support == 0)
13415084Sjohnlev pae_support = 0;
13425084Sjohnlev #endif
13435084Sjohnlev
13443446Smrj /*
13453446Smrj * configure mmu information
13463446Smrj */
13475084Sjohnlev if (pae_support) {
13483446Smrj shift_amt = shift_amt_pae;
13493446Smrj ptes_per_table = 512;
13503446Smrj pte_size = 8;
13513446Smrj lpagesize = TWO_MEG;
13523446Smrj #if defined(_BOOT_TARGET_amd64)
13533446Smrj top_level = 3;
13543446Smrj #else
13553446Smrj top_level = 2;
13563446Smrj #endif
13573446Smrj } else {
13583446Smrj pae_support = 0;
13593446Smrj NX_support = 0;
13603446Smrj shift_amt = shift_amt_nopae;
13613446Smrj ptes_per_table = 1024;
13623446Smrj pte_size = 4;
13633446Smrj lpagesize = FOUR_MEG;
13643446Smrj top_level = 1;
13653446Smrj }
13663446Smrj
13673446Smrj DBG(pge_support);
13683446Smrj DBG(NX_support);
13693446Smrj DBG(largepage_support);
13703446Smrj DBG(amd64_support);
13713446Smrj DBG(top_level);
13723446Smrj DBG(pte_size);
13733446Smrj DBG(ptes_per_table);
13743446Smrj DBG(lpagesize);
13753446Smrj
13765084Sjohnlev #if defined(__xpv)
13775084Sjohnlev ktext_phys = ONE_GIG; /* from UNIX Mapfile */
13785084Sjohnlev #else
13793446Smrj ktext_phys = FOUR_MEG; /* from UNIX Mapfile */
13805084Sjohnlev #endif
13813446Smrj
13825084Sjohnlev #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
13833446Smrj /*
13843446Smrj * For grub, copy kernel bits from the ELF64 file to final place.
13853446Smrj */
13863446Smrj DBG_MSG("\nAllocating nucleus pages.\n");
13873446Smrj ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
13883446Smrj if (ktext_phys == 0)
13895084Sjohnlev dboot_panic("failed to allocate aligned kernel memory");
13903446Smrj if (dboot_elfload64(mb_header.load_addr) != 0)
13915084Sjohnlev dboot_panic("failed to parse kernel ELF image, rebooting");
13925084Sjohnlev #endif
13933446Smrj
13943446Smrj DBG(ktext_phys);
13953446Smrj
13963446Smrj /*
13973446Smrj * Allocate page tables.
13983446Smrj */
13993446Smrj build_page_tables();
14003446Smrj
14013446Smrj /*
14023446Smrj * return to assembly code to switch to running kernel
14033446Smrj */
14043446Smrj entry_addr_low = (uint32_t)target_kernel_text;
14053446Smrj DBG(entry_addr_low);
14063446Smrj bi->bi_use_largepage = largepage_support;
14073446Smrj bi->bi_use_pae = pae_support;
14083446Smrj bi->bi_use_pge = pge_support;
14093446Smrj bi->bi_use_nx = NX_support;
14105084Sjohnlev
14115084Sjohnlev #if defined(__xpv)
14125084Sjohnlev
14135084Sjohnlev bi->bi_next_paddr = next_avail_addr - mfn_base;
14145084Sjohnlev DBG(bi->bi_next_paddr);
14155084Sjohnlev bi->bi_next_vaddr = (native_ptr_t)next_avail_addr;
14165084Sjohnlev DBG(bi->bi_next_vaddr);
14175084Sjohnlev
14185084Sjohnlev /*
14195084Sjohnlev * unmap unused pages in start area to make them available for DMA
14205084Sjohnlev */
14215084Sjohnlev while (next_avail_addr < scratch_end) {
14225084Sjohnlev (void) HYPERVISOR_update_va_mapping(next_avail_addr,
14235084Sjohnlev 0, UVMF_INVLPG | UVMF_LOCAL);
14245084Sjohnlev next_avail_addr += MMU_PAGESIZE;
14255084Sjohnlev }
14265084Sjohnlev
14275084Sjohnlev bi->bi_xen_start_info = (uintptr_t)xen_info;
14285084Sjohnlev DBG((uintptr_t)HYPERVISOR_shared_info);
14295084Sjohnlev bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
14305084Sjohnlev bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
14315084Sjohnlev
14325084Sjohnlev #else /* __xpv */
14335084Sjohnlev
14343446Smrj bi->bi_next_paddr = next_avail_addr;
14353446Smrj DBG(bi->bi_next_paddr);
14363446Smrj bi->bi_next_vaddr = (uintptr_t)next_avail_addr;
14373446Smrj DBG(bi->bi_next_vaddr);
14383446Smrj bi->bi_mb_info = (uintptr_t)mb_info;
14393446Smrj bi->bi_top_page_table = (uintptr_t)top_page_table;
14403446Smrj
14415084Sjohnlev #endif /* __xpv */
14425084Sjohnlev
14433446Smrj bi->bi_kseg_size = FOUR_MEG;
14443446Smrj DBG(bi->bi_kseg_size);
14453446Smrj
14467673SSherry.Moore@Sun.COM #ifndef __xpv
14479489SJoe.Bonasera@sun.com if (map_debug)
14487656SSherry.Moore@Sun.COM dump_tables();
14497673SSherry.Moore@Sun.COM #endif
14507656SSherry.Moore@Sun.COM
14513446Smrj DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
14523446Smrj }
1453