1 /*- 2 * Copyright (c) 2014 Andrew Turner 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include "opt_acpi.h" 29 #include "opt_kstack_pages.h" 30 #include "opt_platform.h" 31 #include "opt_ddb.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/asan.h> 36 #include <sys/buf.h> 37 #include <sys/bus.h> 38 #include <sys/cons.h> 39 #include <sys/cpu.h> 40 #include <sys/csan.h> 41 #include <sys/devmap.h> 42 #include <sys/efi.h> 43 #include <sys/exec.h> 44 #include <sys/imgact.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/ktr.h> 48 #include <sys/limits.h> 49 #include <sys/linker.h> 50 #include <sys/msan.h> 51 #include <sys/msgbuf.h> 52 #include <sys/pcpu.h> 53 #include <sys/physmem.h> 54 #include <sys/proc.h> 55 #include <sys/ptrace.h> 56 #include <sys/reboot.h> 57 #include <sys/reg.h> 58 #include <sys/rwlock.h> 59 #include <sys/sched.h> 60 #include <sys/signalvar.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/sysent.h> 63 #include <sys/sysproto.h> 64 #include <sys/ucontext.h> 65 #include <sys/vdso.h> 66 #include <sys/vmmeter.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/vm_kern.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_phys.h> 74 #include <vm/pmap.h> 75 #include <vm/vm_map.h> 76 #include <vm/vm_pager.h> 77 78 #include <machine/armreg.h> 79 #include <machine/cpu.h> 80 #include <machine/cpu_feat.h> 81 #include <machine/debug_monitor.h> 82 #include <machine/hypervisor.h> 83 #include <machine/kdb.h> 84 #include <machine/machdep.h> 85 #include <machine/metadata.h> 86 #include <machine/md_var.h> 87 #include <machine/pcb.h> 88 #include <machine/undefined.h> 89 #include <machine/vmparam.h> 90 91 #ifdef VFP 92 #include <machine/vfp.h> 93 #endif 94 95 #ifdef DEV_ACPI 96 #include <contrib/dev/acpica/include/acpi.h> 97 #include <machine/acpica_machdep.h> 98 #endif 99 100 #ifdef FDT 101 #include <dev/fdt/fdt_common.h> 102 #include <dev/ofw/openfirm.h> 103 #endif 104 105 #include <dev/smbios/smbios.h> 106 107 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size"); 108 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136, 109 "pcb_fpusaved changed offset"); 110 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192, 111 "pcb_fpustate changed offset"); 112 113 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; 114 115 /* 116 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we 117 * could relocate this, but will need to keep the same virtual address as 118 * it's reverenced by the EARLY_COUNTER macro. 119 */ 120 struct pcpu pcpu0; 121 122 #if defined(PERTHREAD_SSP) 123 /* 124 * The boot SSP canary. Will be replaced with a per-thread canary when 125 * scheduling has started. 126 */ 127 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul; 128 #endif 129 130 static struct trapframe proc0_tf; 131 132 int early_boot = 1; 133 int cold = 1; 134 static int boot_el; 135 136 struct kva_md_info kmi; 137 138 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ 139 int has_pan; 140 141 #if defined(SOCDEV_PA) 142 /* 143 * This is the virtual address used to access SOCDEV_PA. As it's set before 144 * .bss is cleared we need to ensure it's preserved. To do this use 145 * __read_mostly as it's only ever set once but read in the putc functions. 146 */ 147 uintptr_t socdev_va __read_mostly; 148 #endif 149 150 /* 151 * Physical address of the EFI System Table. Stashed from the metadata hints 152 * passed into the kernel and used by the EFI code to call runtime services. 153 */ 154 vm_paddr_t efi_systbl_phys; 155 static struct efi_map_header *efihdr; 156 157 /* pagezero_* implementations are provided in support.S */ 158 void pagezero_simple(void *); 159 void pagezero_cache(void *); 160 161 /* pagezero_simple is default pagezero */ 162 void (*pagezero)(void *p) = pagezero_simple; 163 164 int (*apei_nmi)(void); 165 166 #if defined(PERTHREAD_SSP_WARNING) 167 static void 168 print_ssp_warning(void *data __unused) 169 { 170 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n"); 171 } 172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL); 173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL); 174 #endif 175 176 static bool 177 pan_check(const struct cpu_feat *feat __unused, u_int midr __unused) 178 { 179 uint64_t id_aa64mfr1; 180 181 id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); 182 return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE); 183 } 184 185 static void 186 pan_enable(const struct cpu_feat *feat __unused, 187 cpu_feat_errata errata_status __unused, u_int *errata_list __unused, 188 u_int errata_count __unused) 189 { 190 has_pan = 1; 191 192 /* 193 * This sets the PAN bit, stopping the kernel from accessing 194 * memory when userspace can also access it unless the kernel 195 * uses the userspace load/store instructions. 196 */ 197 WRITE_SPECIALREG(sctlr_el1, 198 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN); 199 __asm __volatile( 200 ".arch_extension pan \n" 201 "msr pan, #1 \n" 202 ".arch_extension nopan \n"); 203 } 204 205 static struct cpu_feat feat_pan = { 206 .feat_name = "FEAT_PAN", 207 .feat_check = pan_check, 208 .feat_enable = pan_enable, 209 .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU, 210 }; 211 DATA_SET(cpu_feat_set, feat_pan); 212 213 bool 214 has_hyp(void) 215 { 216 return (boot_el == CURRENTEL_EL_EL2); 217 } 218 219 bool 220 in_vhe(void) 221 { 222 /* If we are currently in EL2 then must be in VHE */ 223 return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) == 224 CURRENTEL_EL_EL2); 225 } 226 227 static void 228 cpu_startup(void *dummy) 229 { 230 vm_paddr_t size; 231 int i; 232 233 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem), 234 ptoa((uintmax_t)realmem) / 1024 / 1024); 235 236 if (bootverbose) { 237 printf("Physical memory chunk(s):\n"); 238 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 239 size = phys_avail[i + 1] - phys_avail[i]; 240 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n", 241 (uintmax_t)phys_avail[i], 242 (uintmax_t)phys_avail[i + 1] - 1, 243 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 244 } 245 } 246 247 printf("avail memory = %ju (%ju MB)\n", 248 ptoa((uintmax_t)vm_free_count()), 249 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024); 250 251 undef_init(); 252 install_cpu_errata(); 253 254 vm_ksubmap_init(&kmi); 255 bufinit(); 256 vm_pager_bufferinit(); 257 } 258 259 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 260 261 static void 262 late_ifunc_resolve(void *dummy __unused) 263 { 264 link_elf_late_ireloc(); 265 } 266 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 267 268 int 269 cpu_idle_wakeup(int cpu) 270 { 271 272 return (0); 273 } 274 275 void 276 cpu_idle(int busy) 277 { 278 279 spinlock_enter(); 280 if (!busy) 281 cpu_idleclock(); 282 if (!sched_runnable()) 283 __asm __volatile( 284 "dsb sy \n" 285 "wfi \n"); 286 if (!busy) 287 cpu_activeclock(); 288 spinlock_exit(); 289 } 290 291 void 292 cpu_halt(void) 293 { 294 295 /* We should have shutdown by now, if not enter a low power sleep */ 296 intr_disable(); 297 while (1) { 298 __asm __volatile("wfi"); 299 } 300 } 301 302 /* 303 * Flush the D-cache for non-DMA I/O so that the I-cache can 304 * be made coherent later. 305 */ 306 void 307 cpu_flush_dcache(void *ptr, size_t len) 308 { 309 310 /* ARM64TODO TBD */ 311 } 312 313 /* Get current clock frequency for the given CPU ID. */ 314 int 315 cpu_est_clockrate(int cpu_id, uint64_t *rate) 316 { 317 struct pcpu *pc; 318 319 pc = pcpu_find(cpu_id); 320 if (pc == NULL || rate == NULL) 321 return (EINVAL); 322 323 if (pc->pc_clock == 0) 324 return (EOPNOTSUPP); 325 326 *rate = pc->pc_clock; 327 return (0); 328 } 329 330 void 331 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 332 { 333 334 pcpu->pc_acpi_id = 0xffffffff; 335 pcpu->pc_mpidr = UINT64_MAX; 336 } 337 338 void 339 spinlock_enter(void) 340 { 341 struct thread *td; 342 register_t daif; 343 344 td = curthread; 345 if (td->td_md.md_spinlock_count == 0) { 346 daif = intr_disable(); 347 td->td_md.md_spinlock_count = 1; 348 td->td_md.md_saved_daif = daif; 349 critical_enter(); 350 } else 351 td->td_md.md_spinlock_count++; 352 } 353 354 void 355 spinlock_exit(void) 356 { 357 struct thread *td; 358 register_t daif; 359 360 td = curthread; 361 daif = td->td_md.md_saved_daif; 362 td->td_md.md_spinlock_count--; 363 if (td->td_md.md_spinlock_count == 0) { 364 critical_exit(); 365 intr_restore(daif); 366 } 367 } 368 369 /* 370 * Construct a PCB from a trapframe. This is called from kdb_trap() where 371 * we want to start a backtrace from the function that caused us to enter 372 * the debugger. We have the context in the trapframe, but base the trace 373 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 374 * enough for a backtrace. 375 */ 376 void 377 makectx(struct trapframe *tf, struct pcb *pcb) 378 { 379 int i; 380 381 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */ 382 for (i = 0; i < nitems(pcb->pcb_x); i++) { 383 if (i == PCB_LR) 384 pcb->pcb_x[i] = tf->tf_elr; 385 else 386 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START]; 387 } 388 389 pcb->pcb_sp = tf->tf_sp; 390 } 391 392 static void 393 init_proc0(vm_offset_t kstack) 394 { 395 struct pcpu *pcpup; 396 397 pcpup = cpuid_to_pcpu[0]; 398 MPASS(pcpup != NULL); 399 400 proc_linkup0(&proc0, &thread0); 401 thread0.td_kstack = kstack; 402 thread0.td_kstack_pages = KSTACK_PAGES; 403 #if defined(PERTHREAD_SSP) 404 thread0.td_md.md_canary = boot_canary; 405 #endif 406 thread0.td_pcb = (struct pcb *)(thread0.td_kstack + 407 thread0.td_kstack_pages * PAGE_SIZE) - 1; 408 thread0.td_pcb->pcb_flags = 0; 409 thread0.td_pcb->pcb_fpflags = 0; 410 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; 411 thread0.td_pcb->pcb_vfpcpu = UINT_MAX; 412 thread0.td_frame = &proc0_tf; 413 ptrauth_thread0(&thread0); 414 pcpup->pc_curpcb = thread0.td_pcb; 415 416 /* 417 * Unmask SError exceptions. They are used to signal a RAS failure, 418 * or other hardware error. 419 */ 420 serror_enable(); 421 } 422 423 /* 424 * Get an address to be used to write to kernel data that may be mapped 425 * read-only, e.g. to patch kernel code. 426 */ 427 bool 428 arm64_get_writable_addr(void *addr, void **out) 429 { 430 vm_paddr_t pa; 431 432 /* Check if the page is writable */ 433 if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) { 434 *out = addr; 435 return (true); 436 } 437 438 /* 439 * Find the physical address of the given page. 440 */ 441 if (!pmap_klookup((vm_offset_t)addr, &pa)) { 442 return (false); 443 } 444 445 /* 446 * If it is within the DMAP region and is writable use that. 447 */ 448 if (PHYS_IN_DMAP_RANGE(pa)) { 449 addr = (void *)PHYS_TO_DMAP(pa); 450 if (PAR_SUCCESS(arm64_address_translate_s1e1w( 451 (vm_offset_t)addr))) { 452 *out = addr; 453 return (true); 454 } 455 } 456 457 return (false); 458 } 459 460 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp); 461 462 static void 463 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp) 464 { 465 struct efi_md *map, *p; 466 size_t efisz; 467 int ndesc, i; 468 469 /* 470 * Memory map data provided by UEFI via the GetMemoryMap 471 * Boot Services API. 472 */ 473 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 474 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 475 476 if (efihdr->descriptor_size == 0) 477 return; 478 ndesc = efihdr->memory_size / efihdr->descriptor_size; 479 480 for (i = 0, p = map; i < ndesc; i++, 481 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 482 cb(p, argp); 483 } 484 } 485 486 /* 487 * Handle the EFI memory map list. 488 * 489 * We will make two passes at this, the first (exclude == false) to populate 490 * physmem with valid physical memory ranges from recognized map entry types. 491 * In the second pass we will exclude memory ranges from physmem which must not 492 * be used for general allocations, either because they are used by runtime 493 * firmware or otherwise reserved. 494 * 495 * Adding the runtime-reserved memory ranges to physmem and excluding them 496 * later ensures that they are included in the DMAP, but excluded from 497 * phys_avail[]. 498 * 499 * Entry types not explicitly listed here are ignored and not mapped. 500 */ 501 static void 502 handle_efi_map_entry(struct efi_md *p, void *argp) 503 { 504 bool exclude = *(bool *)argp; 505 506 switch (p->md_type) { 507 case EFI_MD_TYPE_RECLAIM: 508 /* 509 * The recomended location for ACPI tables. Map into the 510 * DMAP so we can access them from userspace via /dev/mem. 511 */ 512 case EFI_MD_TYPE_RT_CODE: 513 /* 514 * Some UEFI implementations put the system table in the 515 * runtime code section. Include it in the DMAP, but will 516 * be excluded from phys_avail. 517 */ 518 case EFI_MD_TYPE_RT_DATA: 519 /* 520 * Runtime data will be excluded after the DMAP 521 * region is created to stop it from being added 522 * to phys_avail. 523 */ 524 if (exclude) { 525 physmem_exclude_region(p->md_phys, 526 p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC); 527 break; 528 } 529 /* FALLTHROUGH */ 530 case EFI_MD_TYPE_CODE: 531 case EFI_MD_TYPE_DATA: 532 case EFI_MD_TYPE_BS_CODE: 533 case EFI_MD_TYPE_BS_DATA: 534 case EFI_MD_TYPE_FREE: 535 /* 536 * We're allowed to use any entry with these types. 537 */ 538 if (!exclude) 539 physmem_hardware_region(p->md_phys, 540 p->md_pages * EFI_PAGE_SIZE); 541 break; 542 default: 543 /* Other types shall not be handled by physmem. */ 544 break; 545 } 546 } 547 548 static void 549 add_efi_map_entries(struct efi_map_header *efihdr) 550 { 551 bool exclude = false; 552 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); 553 } 554 555 static void 556 exclude_efi_map_entries(struct efi_map_header *efihdr) 557 { 558 bool exclude = true; 559 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); 560 } 561 562 static void 563 print_efi_map_entry(struct efi_md *p, void *argp __unused) 564 { 565 const char *type; 566 static const char *types[] = { 567 "Reserved", 568 "LoaderCode", 569 "LoaderData", 570 "BootServicesCode", 571 "BootServicesData", 572 "RuntimeServicesCode", 573 "RuntimeServicesData", 574 "ConventionalMemory", 575 "UnusableMemory", 576 "ACPIReclaimMemory", 577 "ACPIMemoryNVS", 578 "MemoryMappedIO", 579 "MemoryMappedIOPortSpace", 580 "PalCode", 581 "PersistentMemory" 582 }; 583 584 if (p->md_type < nitems(types)) 585 type = types[p->md_type]; 586 else 587 type = "<INVALID>"; 588 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 589 p->md_virt, p->md_pages); 590 if (p->md_attr & EFI_MD_ATTR_UC) 591 printf("UC "); 592 if (p->md_attr & EFI_MD_ATTR_WC) 593 printf("WC "); 594 if (p->md_attr & EFI_MD_ATTR_WT) 595 printf("WT "); 596 if (p->md_attr & EFI_MD_ATTR_WB) 597 printf("WB "); 598 if (p->md_attr & EFI_MD_ATTR_UCE) 599 printf("UCE "); 600 if (p->md_attr & EFI_MD_ATTR_WP) 601 printf("WP "); 602 if (p->md_attr & EFI_MD_ATTR_RP) 603 printf("RP "); 604 if (p->md_attr & EFI_MD_ATTR_XP) 605 printf("XP "); 606 if (p->md_attr & EFI_MD_ATTR_NV) 607 printf("NV "); 608 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 609 printf("MORE_RELIABLE "); 610 if (p->md_attr & EFI_MD_ATTR_RO) 611 printf("RO "); 612 if (p->md_attr & EFI_MD_ATTR_RT) 613 printf("RUNTIME"); 614 printf("\n"); 615 } 616 617 static void 618 print_efi_map_entries(struct efi_map_header *efihdr) 619 { 620 621 printf("%23s %12s %12s %8s %4s\n", 622 "Type", "Physical", "Virtual", "#Pages", "Attr"); 623 foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL); 624 } 625 626 /* 627 * Map the passed in VA in EFI space to a void * using the efi memory table to 628 * find the PA and return it in the DMAP, if it exists. We're used between the 629 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG 630 * tables We assume that either the entry you are mapping fits within its page, 631 * or if it spills to the next page, that's contiguous in PA and in the DMAP. 632 * All observed tables obey the first part of this precondition. 633 */ 634 struct early_map_data 635 { 636 vm_offset_t va; 637 vm_offset_t pa; 638 }; 639 640 static void 641 efi_early_map_entry(struct efi_md *p, void *argp) 642 { 643 struct early_map_data *emdp = argp; 644 vm_offset_t s, e; 645 646 if (emdp->pa != 0) 647 return; 648 if ((p->md_attr & EFI_MD_ATTR_RT) == 0) 649 return; 650 s = p->md_virt; 651 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE; 652 if (emdp->va < s || emdp->va >= e) 653 return; 654 emdp->pa = p->md_phys + (emdp->va - p->md_virt); 655 } 656 657 static void * 658 efi_early_map(vm_offset_t va) 659 { 660 struct early_map_data emd = { .va = va }; 661 662 foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd); 663 if (emd.pa == 0) 664 return NULL; 665 return (void *)PHYS_TO_DMAP(emd.pa); 666 } 667 668 669 /* 670 * When booted via kboot, the prior kernel will pass in reserved memory areas in 671 * a EFI config table. We need to find that table and walk through it excluding 672 * the memory ranges in it. btw, this is called too early for the printf to do 673 * anything since msgbufp isn't initialized, let alone a console... 674 */ 675 static void 676 exclude_efi_memreserve(vm_offset_t efi_systbl_phys) 677 { 678 struct efi_systbl *systbl; 679 struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE; 680 681 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); 682 if (systbl == NULL) { 683 printf("can't map systbl\n"); 684 return; 685 } 686 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { 687 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig); 688 return; 689 } 690 691 /* 692 * We don't yet have the pmap system booted enough to create a pmap for 693 * the efi firmware's preferred address space from the GetMemoryMap() 694 * table. The st_cfgtbl is a VA in this space, so we need to do the 695 * mapping ourselves to a kernel VA with efi_early_map. We assume that 696 * the cfgtbl entries don't span a page. Other pointers are PAs, as 697 * noted below. 698 */ 699 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */ 700 return; 701 for (int i = 0; i < systbl->st_entries; i++) { 702 struct efi_cfgtbl *cfgtbl; 703 struct linux_efi_memreserve *mr; 704 705 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl)); 706 if (cfgtbl == NULL) 707 panic("Can't map the config table entry %d\n", i); 708 if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0) 709 continue; 710 711 /* 712 * cfgtbl points are either VA or PA, depending on the GUID of 713 * the table. memreserve GUID pointers are PA and not converted 714 * after a SetVirtualAddressMap(). The list's mr_next pointer 715 * is also a PA. 716 */ 717 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP( 718 (vm_offset_t)cfgtbl->ct_data); 719 while (true) { 720 for (int j = 0; j < mr->mr_count; j++) { 721 struct linux_efi_memreserve_entry *mre; 722 723 mre = &mr->mr_entry[j]; 724 physmem_exclude_region(mre->mre_base, mre->mre_size, 725 EXFLAG_NODUMP | EXFLAG_NOALLOC); 726 } 727 if (mr->mr_next == 0) 728 break; 729 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next); 730 }; 731 } 732 733 } 734 735 #ifdef FDT 736 static void 737 try_load_dtb(void) 738 { 739 vm_offset_t dtbp; 740 741 dtbp = MD_FETCH(preload_kmdp, MODINFOMD_DTBP, vm_offset_t); 742 #if defined(FDT_DTB_STATIC) 743 /* 744 * In case the device tree blob was not retrieved (from metadata) try 745 * to use the statically embedded one. 746 */ 747 if (dtbp == 0) 748 dtbp = (vm_offset_t)&fdt_static_dtb; 749 #endif 750 751 if (dtbp == (vm_offset_t)NULL) { 752 #ifndef TSLOG 753 printf("ERROR loading DTB\n"); 754 #endif 755 return; 756 } 757 758 if (!OF_install(OFW_FDT, 0)) 759 panic("Cannot install FDT"); 760 761 if (OF_init((void *)dtbp) != 0) 762 panic("OF_init failed with the found device tree"); 763 764 parse_fdt_bootargs(); 765 } 766 #endif 767 768 static bool 769 bus_probe(void) 770 { 771 bool has_acpi, has_fdt; 772 char *order, *env; 773 774 has_acpi = has_fdt = false; 775 776 #ifdef FDT 777 has_fdt = (OF_peer(0) != 0); 778 #endif 779 #ifdef DEV_ACPI 780 has_acpi = (AcpiOsGetRootPointer() != 0); 781 #endif 782 783 env = kern_getenv("kern.cfg.order"); 784 if (env != NULL) { 785 order = env; 786 while (order != NULL) { 787 if (has_acpi && 788 strncmp(order, "acpi", 4) == 0 && 789 (order[4] == ',' || order[4] == '\0')) { 790 arm64_bus_method = ARM64_BUS_ACPI; 791 break; 792 } 793 if (has_fdt && 794 strncmp(order, "fdt", 3) == 0 && 795 (order[3] == ',' || order[3] == '\0')) { 796 arm64_bus_method = ARM64_BUS_FDT; 797 break; 798 } 799 order = strchr(order, ','); 800 if (order != NULL) 801 order++; /* Skip comma */ 802 } 803 freeenv(env); 804 805 /* If we set the bus method it is valid */ 806 if (arm64_bus_method != ARM64_BUS_NONE) 807 return (true); 808 } 809 /* If no order or an invalid order was set use the default */ 810 if (arm64_bus_method == ARM64_BUS_NONE) { 811 if (has_acpi) 812 arm64_bus_method = ARM64_BUS_ACPI; 813 else if (has_fdt) 814 arm64_bus_method = ARM64_BUS_FDT; 815 } 816 817 /* 818 * If no option was set the default is valid, otherwise we are 819 * setting one to get cninit() working, then calling panic to tell 820 * the user about the invalid bus setup. 821 */ 822 return (env == NULL); 823 } 824 825 static void 826 cache_setup(void) 827 { 828 int dczva_line_shift; 829 uint32_t dczid_el0; 830 831 identify_cache(READ_SPECIALREG(ctr_el0)); 832 833 dczid_el0 = READ_SPECIALREG(dczid_el0); 834 835 /* Check if dc zva is not prohibited */ 836 if (dczid_el0 & DCZID_DZP) 837 dczva_line_size = 0; 838 else { 839 /* Same as with above calculations */ 840 dczva_line_shift = DCZID_BS_SIZE(dczid_el0); 841 dczva_line_size = sizeof(int) << dczva_line_shift; 842 843 /* Change pagezero function */ 844 pagezero = pagezero_cache; 845 } 846 } 847 848 int 849 memory_mapping_mode(vm_paddr_t pa) 850 { 851 struct efi_md *map, *p; 852 size_t efisz; 853 int ndesc, i; 854 855 if (efihdr == NULL) 856 return (VM_MEMATTR_WRITE_BACK); 857 858 /* 859 * Memory map data provided by UEFI via the GetMemoryMap 860 * Boot Services API. 861 */ 862 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 863 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 864 865 if (efihdr->descriptor_size == 0) 866 return (VM_MEMATTR_WRITE_BACK); 867 ndesc = efihdr->memory_size / efihdr->descriptor_size; 868 869 for (i = 0, p = map; i < ndesc; i++, 870 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 871 if (pa < p->md_phys || 872 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE) 873 continue; 874 if (p->md_type == EFI_MD_TYPE_IOMEM || 875 p->md_type == EFI_MD_TYPE_IOPORT) 876 return (VM_MEMATTR_DEVICE); 877 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 || 878 p->md_type == EFI_MD_TYPE_RECLAIM) 879 return (VM_MEMATTR_WRITE_BACK); 880 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) 881 return (VM_MEMATTR_WRITE_THROUGH); 882 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) 883 return (VM_MEMATTR_WRITE_COMBINING); 884 break; 885 } 886 887 return (VM_MEMATTR_DEVICE); 888 } 889 890 void 891 initarm(struct arm64_bootparams *abp) 892 { 893 struct efi_fb *efifb; 894 struct pcpu *pcpup; 895 char *env; 896 #ifdef FDT 897 struct mem_region mem_regions[FDT_MEM_REGIONS]; 898 int mem_regions_sz; 899 phandle_t root; 900 char dts_version[255]; 901 #endif 902 vm_offset_t lastaddr; 903 bool valid; 904 905 TSRAW(&thread0, TS_ENTER, __func__, NULL); 906 907 boot_el = abp->boot_el; 908 909 /* Parse loader or FDT boot parameters. Determine last used address. */ 910 lastaddr = parse_boot_param(abp); 911 912 identify_cpu(0); 913 identify_hypervisor_smbios(); 914 915 update_special_regs(0); 916 917 /* Set the pcpu data, this is needed by pmap_bootstrap */ 918 pcpup = &pcpu0; 919 pcpu_init(pcpup, 0, sizeof(struct pcpu)); 920 921 /* 922 * Set the pcpu pointer with a backup in tpidr_el1 to be 923 * loaded when entering the kernel from userland. 924 */ 925 __asm __volatile( 926 "mov x18, %0 \n" 927 "msr tpidr_el1, %0" :: "r"(pcpup)); 928 929 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */ 930 PCPU_SET(curthread, &thread0); 931 PCPU_SET(midr, get_midr()); 932 933 link_elf_ireloc(); 934 #ifdef FDT 935 try_load_dtb(); 936 #endif 937 938 efi_systbl_phys = MD_FETCH(preload_kmdp, MODINFOMD_FW_HANDLE, 939 vm_paddr_t); 940 941 /* Load the physical memory ranges */ 942 efihdr = (struct efi_map_header *)preload_search_info(preload_kmdp, 943 MODINFO_METADATA | MODINFOMD_EFI_MAP); 944 if (efihdr != NULL) 945 add_efi_map_entries(efihdr); 946 #ifdef FDT 947 else { 948 /* Grab physical memory regions information from device tree. */ 949 if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, 950 NULL) != 0) 951 panic("Cannot get physical memory regions"); 952 physmem_hardware_regions(mem_regions, mem_regions_sz); 953 } 954 if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0) 955 physmem_exclude_regions(mem_regions, mem_regions_sz, 956 EXFLAG_NODUMP | EXFLAG_NOALLOC); 957 #endif 958 959 /* Exclude the EFI framebuffer from our view of physical memory. */ 960 efifb = (struct efi_fb *)preload_search_info(preload_kmdp, 961 MODINFO_METADATA | MODINFOMD_EFI_FB); 962 if (efifb != NULL) 963 physmem_exclude_region(efifb->fb_addr, efifb->fb_size, 964 EXFLAG_NOALLOC); 965 966 /* Do basic tuning, hz etc */ 967 init_param1(); 968 969 cache_setup(); 970 971 /* Bootstrap enough of pmap to enter the kernel proper */ 972 pmap_bootstrap(lastaddr - KERNBASE); 973 /* Exclude entries needed in the DMAP region, but not phys_avail */ 974 if (efihdr != NULL) 975 exclude_efi_map_entries(efihdr); 976 /* Do the same for reserve entries in the EFI MEMRESERVE table */ 977 if (efi_systbl_phys != 0) 978 exclude_efi_memreserve(efi_systbl_phys); 979 980 /* 981 * We carefully bootstrap the sanitizer map after we've excluded 982 * absolutely everything else that could impact phys_avail. There's not 983 * always enough room for the initial shadow map after the kernel, so 984 * we'll end up searching for segments that we can safely use. Those 985 * segments also get excluded from phys_avail. 986 */ 987 #if defined(KASAN) || defined(KMSAN) 988 pmap_bootstrap_san(); 989 #endif 990 991 physmem_init_kernel_globals(); 992 993 devmap_bootstrap(); 994 995 valid = bus_probe(); 996 997 cninit(); 998 set_ttbr0(abp->kern_ttbr0); 999 cpu_tlb_flushID(); 1000 1001 if (!valid) 1002 panic("Invalid bus configuration: %s", 1003 kern_getenv("kern.cfg.order")); 1004 1005 /* Detect early CPU feature support */ 1006 enable_cpu_feat(CPU_FEAT_EARLY_BOOT); 1007 1008 /* 1009 * Dump the boot metadata. We have to wait for cninit() since console 1010 * output is required. If it's grossly incorrect the kernel will never 1011 * make it this far. 1012 */ 1013 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1014 preload_dump(); 1015 1016 init_proc0(abp->kern_stack); 1017 msgbufinit(msgbufp, msgbufsize); 1018 mutex_init(); 1019 init_param2(physmem); 1020 1021 dbg_init(); 1022 kdb_init(); 1023 #ifdef KDB 1024 if ((boothowto & RB_KDB) != 0) 1025 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1026 #endif 1027 1028 kcsan_cpu_init(0); 1029 kasan_init(); 1030 kmsan_init(); 1031 1032 env = kern_getenv("kernelname"); 1033 if (env != NULL) 1034 strlcpy(kernelname, env, sizeof(kernelname)); 1035 1036 #ifdef FDT 1037 if (arm64_bus_method == ARM64_BUS_FDT) { 1038 root = OF_finddevice("/"); 1039 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) { 1040 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0) 1041 printf("WARNING: DTB version is %s while kernel expects %s, " 1042 "please update the DTB in the ESP\n", 1043 dts_version, 1044 LINUX_DTS_VERSION); 1045 } else { 1046 printf("WARNING: Cannot find freebsd,dts-version property, " 1047 "cannot check DTB compliance\n"); 1048 } 1049 } 1050 #endif 1051 1052 if (boothowto & RB_VERBOSE) { 1053 if (efihdr != NULL) 1054 print_efi_map_entries(efihdr); 1055 physmem_print_tables(); 1056 } 1057 1058 early_boot = 0; 1059 1060 if (bootverbose && kstack_pages != KSTACK_PAGES) 1061 printf("kern.kstack_pages = %d ignored for thread0\n", 1062 kstack_pages); 1063 1064 TSEXIT(); 1065 } 1066 1067 void 1068 dbg_init(void) 1069 { 1070 1071 /* Clear OS lock */ 1072 WRITE_SPECIALREG(oslar_el1, 0); 1073 1074 /* This permits DDB to use debug registers for watchpoints. */ 1075 dbg_monitor_init(); 1076 1077 /* TODO: Eventually will need to initialize debug registers here. */ 1078 } 1079 1080 #ifdef DDB 1081 #include <ddb/ddb.h> 1082 1083 DB_SHOW_COMMAND(specialregs, db_show_spregs) 1084 { 1085 #define PRINT_REG(reg) \ 1086 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) 1087 1088 PRINT_REG(actlr_el1); 1089 PRINT_REG(afsr0_el1); 1090 PRINT_REG(afsr1_el1); 1091 PRINT_REG(aidr_el1); 1092 PRINT_REG(amair_el1); 1093 PRINT_REG(ccsidr_el1); 1094 PRINT_REG(clidr_el1); 1095 PRINT_REG(contextidr_el1); 1096 PRINT_REG(cpacr_el1); 1097 PRINT_REG(csselr_el1); 1098 PRINT_REG(ctr_el0); 1099 PRINT_REG(currentel); 1100 PRINT_REG(daif); 1101 PRINT_REG(dczid_el0); 1102 PRINT_REG(elr_el1); 1103 PRINT_REG(esr_el1); 1104 PRINT_REG(far_el1); 1105 #if 0 1106 /* ARM64TODO: Enable VFP before reading floating-point registers */ 1107 PRINT_REG(fpcr); 1108 PRINT_REG(fpsr); 1109 #endif 1110 PRINT_REG(id_aa64afr0_el1); 1111 PRINT_REG(id_aa64afr1_el1); 1112 PRINT_REG(id_aa64dfr0_el1); 1113 PRINT_REG(id_aa64dfr1_el1); 1114 PRINT_REG(id_aa64isar0_el1); 1115 PRINT_REG(id_aa64isar1_el1); 1116 PRINT_REG(id_aa64pfr0_el1); 1117 PRINT_REG(id_aa64pfr1_el1); 1118 PRINT_REG(id_afr0_el1); 1119 PRINT_REG(id_dfr0_el1); 1120 PRINT_REG(id_isar0_el1); 1121 PRINT_REG(id_isar1_el1); 1122 PRINT_REG(id_isar2_el1); 1123 PRINT_REG(id_isar3_el1); 1124 PRINT_REG(id_isar4_el1); 1125 PRINT_REG(id_isar5_el1); 1126 PRINT_REG(id_mmfr0_el1); 1127 PRINT_REG(id_mmfr1_el1); 1128 PRINT_REG(id_mmfr2_el1); 1129 PRINT_REG(id_mmfr3_el1); 1130 #if 0 1131 /* Missing from llvm */ 1132 PRINT_REG(id_mmfr4_el1); 1133 #endif 1134 PRINT_REG(id_pfr0_el1); 1135 PRINT_REG(id_pfr1_el1); 1136 PRINT_REG(isr_el1); 1137 PRINT_REG(mair_el1); 1138 PRINT_REG(midr_el1); 1139 PRINT_REG(mpidr_el1); 1140 PRINT_REG(mvfr0_el1); 1141 PRINT_REG(mvfr1_el1); 1142 PRINT_REG(mvfr2_el1); 1143 PRINT_REG(revidr_el1); 1144 PRINT_REG(sctlr_el1); 1145 PRINT_REG(sp_el0); 1146 PRINT_REG(spsel); 1147 PRINT_REG(spsr_el1); 1148 PRINT_REG(tcr_el1); 1149 PRINT_REG(tpidr_el0); 1150 PRINT_REG(tpidr_el1); 1151 PRINT_REG(tpidrro_el0); 1152 PRINT_REG(ttbr0_el1); 1153 PRINT_REG(ttbr1_el1); 1154 PRINT_REG(vbar_el1); 1155 #undef PRINT_REG 1156 } 1157 1158 DB_SHOW_COMMAND(vtop, db_show_vtop) 1159 { 1160 uint64_t phys; 1161 1162 if (have_addr) { 1163 phys = arm64_address_translate_s1e1r(addr); 1164 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys); 1165 phys = arm64_address_translate_s1e1w(addr); 1166 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys); 1167 phys = arm64_address_translate_s1e0r(addr); 1168 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys); 1169 phys = arm64_address_translate_s1e0w(addr); 1170 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys); 1171 } else 1172 db_printf("show vtop <virt_addr>\n"); 1173 } 1174 #endif 1175