1 /* $NetBSD: x86_machdep.c,v 1.156 2024/12/06 10:53:41 bouyer Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, 5 * Copyright (c) 2005, 2008, 2009, 2019, 2023 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Julio M. Merino Vidal, and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.156 2024/12/06 10:53:41 bouyer Exp $"); 35 36 #include "opt_modular.h" 37 #include "opt_physmem.h" 38 #include "opt_splash.h" 39 #include "opt_kaslr.h" 40 #include "opt_svs.h" 41 #include "opt_xen.h" 42 43 #include <sys/types.h> 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kcore.h> 47 #include <sys/errno.h> 48 #include <sys/kauth.h> 49 #include <sys/mutex.h> 50 #include <sys/cpu.h> 51 #include <sys/intr.h> 52 #include <sys/atomic.h> 53 #include <sys/module.h> 54 #include <sys/sysctl.h> 55 #include <sys/extent.h> 56 #include <sys/rnd.h> 57 58 #include <x86/bootspace.h> 59 #include <x86/cpuvar.h> 60 #include <x86/cputypes.h> 61 #include <x86/efi.h> 62 #include <x86/machdep.h> 63 #include <x86/nmi.h> 64 #include <x86/pio.h> 65 66 #include <dev/splash/splash.h> 67 #include <dev/isa/isareg.h> 68 #include <dev/ic/i8042reg.h> 69 #include <dev/mm.h> 70 71 #include <machine/bootinfo.h> 72 #include <machine/pmap_private.h> 73 #include <machine/vmparam.h> 74 75 #include <uvm/uvm_extern.h> 76 77 #include "tsc.h" 78 79 #include "acpica.h" 80 #include "ioapic.h" 81 #include "lapic.h" 82 83 #if NACPICA > 0 84 #include <dev/acpi/acpivar.h> 85 #endif 86 87 #if NIOAPIC > 0 || NACPICA > 0 88 #include <machine/i82093var.h> 89 #endif 90 91 #include "opt_md.h" 92 #if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC) 93 #include <dev/md.h> 94 #endif 95 96 void (*x86_cpu_idle)(void); 97 static bool x86_cpu_idle_ipi; 98 static char x86_cpu_idle_text[16]; 99 100 static bool x86_user_ldt_enabled __read_mostly = false; 101 102 #ifdef XEN 103 104 #include <xen/xen.h> 105 #include <xen/hypervisor.h> 106 #endif 107 108 #ifndef XENPV 109 void (*delay_func)(unsigned int) = i8254_delay; 110 void (*x86_initclock_func)(void) = i8254_initclocks; 111 #else /* XENPV */ 112 void (*delay_func)(unsigned int) = xen_delay; 113 void (*x86_initclock_func)(void) = xen_initclocks; 114 #endif 115 116 117 /* --------------------------------------------------------------------- */ 118 119 /* 120 * Main bootinfo structure. This is filled in by the bootstrap process 121 * done in locore.S based on the information passed by the boot loader. 122 */ 123 struct bootinfo bootinfo; 124 125 /* --------------------------------------------------------------------- */ 126 127 bool bootmethod_efi; 128 129 static kauth_listener_t x86_listener; 130 131 extern paddr_t lowmem_rsvd, avail_start, avail_end; 132 133 vaddr_t msgbuf_vaddr; 134 135 struct msgbuf_p_seg msgbuf_p_seg[VM_PHYSSEG_MAX]; 136 137 unsigned int msgbuf_p_cnt = 0; 138 139 void init_x86_msgbuf(void); 140 141 /* 142 * Given the type of a bootinfo entry, looks for a matching item inside 143 * the bootinfo structure. If found, returns a pointer to it (which must 144 * then be casted to the appropriate bootinfo_* type); otherwise, returns 145 * NULL. 146 */ 147 void * 148 lookup_bootinfo(int type) 149 { 150 bool found; 151 int i; 152 struct btinfo_common *bic; 153 154 bic = (struct btinfo_common *)(bootinfo.bi_data); 155 found = FALSE; 156 for (i = 0; i < bootinfo.bi_nentries && !found; i++) { 157 if (bic->type == type) 158 found = TRUE; 159 else 160 bic = (struct btinfo_common *) 161 ((uint8_t *)bic + bic->len); 162 } 163 164 return found ? bic : NULL; 165 } 166 167 #ifdef notyet 168 /* 169 * List the available bootinfo entries. 170 */ 171 static const char *btinfo_str[] = { 172 BTINFO_STR 173 }; 174 175 void 176 aprint_bootinfo(void) 177 { 178 int i; 179 struct btinfo_common *bic; 180 181 aprint_normal("bootinfo:"); 182 bic = (struct btinfo_common *)(bootinfo.bi_data); 183 for (i = 0; i < bootinfo.bi_nentries; i++) { 184 if (bic->type >= 0 && bic->type < __arraycount(btinfo_str)) 185 aprint_normal(" %s", btinfo_str[bic->type]); 186 else 187 aprint_normal(" %d", bic->type); 188 bic = (struct btinfo_common *) 189 ((uint8_t *)bic + bic->len); 190 } 191 aprint_normal("\n"); 192 } 193 #endif 194 195 /* 196 * mm_md_physacc: check if given pa is accessible. 197 */ 198 int 199 mm_md_physacc(paddr_t pa, vm_prot_t prot) 200 { 201 extern phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 202 extern int mem_cluster_cnt; 203 int i; 204 205 for (i = 0; i < mem_cluster_cnt; i++) { 206 const phys_ram_seg_t *seg = &mem_clusters[i]; 207 paddr_t lstart = seg->start; 208 209 if (lstart <= pa && pa - lstart <= seg->size) { 210 return 0; 211 } 212 } 213 return kauth_authorize_machdep(kauth_cred_get(), 214 KAUTH_MACHDEP_UNMANAGEDMEM, NULL, NULL, NULL, NULL); 215 } 216 217 #ifdef MODULAR 218 /* 219 * Push any modules loaded by the boot loader. 220 */ 221 void 222 module_init_md(void) 223 { 224 struct btinfo_modulelist *biml; 225 struct bi_modulelist_entry *bi, *bimax; 226 227 biml = lookup_bootinfo(BTINFO_MODULELIST); 228 if (biml == NULL) { 229 aprint_debug("No module info at boot\n"); 230 return; 231 } 232 233 bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml)); 234 bimax = bi + biml->num; 235 for (; bi < bimax; bi++) { 236 switch (bi->type) { 237 case BI_MODULE_ELF: 238 aprint_debug("Prep module path=%s len=%d pa=%x\n", 239 bi->path, bi->len, bi->base); 240 KASSERT(trunc_page(bi->base) == bi->base); 241 module_prime(bi->path, 242 #ifdef KASLR 243 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 244 #else 245 (void *)((uintptr_t)bi->base + KERNBASE), 246 #endif 247 bi->len); 248 break; 249 case BI_MODULE_IMAGE: 250 #ifdef SPLASHSCREEN 251 aprint_debug("Splash image path=%s len=%d pa=%x\n", 252 bi->path, bi->len, bi->base); 253 KASSERT(trunc_page(bi->base) == bi->base); 254 splash_setimage( 255 #ifdef KASLR 256 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 257 #else 258 (void *)((uintptr_t)bi->base + KERNBASE), 259 #endif 260 bi->len); 261 #endif 262 break; 263 case BI_MODULE_RND: 264 /* handled in x86_rndseed */ 265 break; 266 case BI_MODULE_FS: 267 aprint_debug("File-system image path=%s len=%d pa=%x\n", 268 bi->path, bi->len, bi->base); 269 KASSERT(trunc_page(bi->base) == bi->base); 270 #if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC) 271 md_root_setconf( 272 #ifdef KASLR 273 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 274 #else 275 (void *)((uintptr_t)bi->base + KERNBASE), 276 #endif 277 bi->len); 278 #endif 279 break; 280 default: 281 aprint_debug("Skipping non-ELF module\n"); 282 break; 283 } 284 } 285 } 286 #endif /* MODULAR */ 287 288 void 289 x86_rndseed(void) 290 { 291 struct btinfo_modulelist *biml; 292 struct bi_modulelist_entry *bi, *bimax; 293 294 biml = lookup_bootinfo(BTINFO_MODULELIST); 295 if (biml == NULL) { 296 aprint_debug("No module info at boot\n"); 297 return; 298 } 299 300 bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml)); 301 bimax = bi + biml->num; 302 for (; bi < bimax; bi++) { 303 switch (bi->type) { 304 case BI_MODULE_RND: 305 aprint_debug("Random seed data path=%s len=%d pa=%x\n", 306 bi->path, bi->len, bi->base); 307 KASSERT(trunc_page(bi->base) == bi->base); 308 rnd_seed( 309 #ifdef KASLR 310 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 311 #else 312 (void *)((uintptr_t)bi->base + KERNBASE), 313 #endif 314 bi->len); 315 } 316 } 317 } 318 319 void 320 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags) 321 { 322 323 KASSERT(kpreempt_disabled()); 324 325 if ((flags & RESCHED_IDLE) != 0) { 326 if ((flags & RESCHED_REMOTE) != 0 && 327 x86_cpu_idle_ipi != false) { 328 cpu_kick(ci); 329 } 330 return; 331 } 332 333 #ifdef __HAVE_PREEMPTION 334 if ((flags & RESCHED_KPREEMPT) != 0) { 335 if ((flags & RESCHED_REMOTE) != 0) { 336 #ifdef XENPV 337 xen_send_ipi(ci, XEN_IPI_KPREEMPT); 338 #else 339 x86_send_ipi(ci, X86_IPI_KPREEMPT); 340 #endif 341 } else { 342 softint_trigger(1 << SIR_PREEMPT); 343 } 344 return; 345 } 346 #endif 347 348 KASSERT((flags & RESCHED_UPREEMPT) != 0); 349 if ((flags & RESCHED_REMOTE) != 0) { 350 cpu_kick(ci); 351 } else { 352 aston(l); 353 } 354 } 355 356 void 357 cpu_signotify(struct lwp *l) 358 { 359 360 KASSERT(kpreempt_disabled()); 361 362 if (l->l_cpu != curcpu()) { 363 cpu_kick(l->l_cpu); 364 } else { 365 aston(l); 366 } 367 } 368 369 void 370 cpu_need_proftick(struct lwp *l) 371 { 372 373 KASSERT(kpreempt_disabled()); 374 KASSERT(l->l_cpu == curcpu()); 375 376 l->l_pflag |= LP_OWEUPC; 377 aston(l); 378 } 379 380 bool 381 cpu_intr_p(void) 382 { 383 int idepth; 384 long pctr; 385 lwp_t *l; 386 387 l = curlwp; 388 if (__predict_false(l->l_cpu == NULL)) { 389 KASSERT(l == &lwp0); 390 return false; 391 } 392 do { 393 pctr = lwp_pctr(); 394 idepth = l->l_cpu->ci_idepth; 395 } while (__predict_false(pctr != lwp_pctr())); 396 397 return idepth >= 0; 398 } 399 400 #ifdef __HAVE_PREEMPTION 401 /* 402 * Called to check MD conditions that would prevent preemption, and to 403 * arrange for those conditions to be rechecked later. 404 */ 405 bool 406 cpu_kpreempt_enter(uintptr_t where, int s) 407 { 408 struct pcb *pcb; 409 lwp_t *l; 410 411 KASSERT(kpreempt_disabled()); 412 l = curlwp; 413 414 /* 415 * If SPL raised, can't go. Note this implies that spin 416 * mutexes at IPL_NONE are _not_ valid to use. 417 */ 418 if (s > IPL_PREEMPT) { 419 softint_trigger(1 << SIR_PREEMPT); 420 return false; 421 } 422 423 /* Must save cr2 or it could be clobbered. */ 424 pcb = lwp_getpcb(l); 425 pcb->pcb_cr2 = rcr2(); 426 427 return true; 428 } 429 430 /* 431 * Called after returning from a kernel preemption, and called with 432 * preemption disabled. 433 */ 434 void 435 cpu_kpreempt_exit(uintptr_t where) 436 { 437 extern char x86_copyfunc_start, x86_copyfunc_end; 438 struct pcb *pcb; 439 440 KASSERT(kpreempt_disabled()); 441 442 /* 443 * If we interrupted any of the copy functions we must reload 444 * the pmap when resuming, as they cannot tolerate it being 445 * swapped out. 446 */ 447 if (where >= (uintptr_t)&x86_copyfunc_start && 448 where < (uintptr_t)&x86_copyfunc_end) { 449 pmap_load(); 450 } 451 452 /* Restore cr2 only after the pmap, as pmap_load can block. */ 453 pcb = lwp_getpcb(curlwp); 454 lcr2(pcb->pcb_cr2); 455 } 456 457 /* 458 * Return true if preemption is disabled for MD reasons. Must be called 459 * with preemption disabled, and thus is only for diagnostic checks. 460 */ 461 bool 462 cpu_kpreempt_disabled(void) 463 { 464 465 return curcpu()->ci_ilevel > IPL_NONE; 466 } 467 #endif /* __HAVE_PREEMPTION */ 468 469 SYSCTL_SETUP(sysctl_machdep_cpu_idle, "sysctl machdep cpu_idle") 470 { 471 const struct sysctlnode *mnode, *node; 472 473 sysctl_createv(NULL, 0, NULL, &mnode, 474 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL, 475 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); 476 477 sysctl_createv(NULL, 0, &mnode, &node, 478 CTLFLAG_PERMANENT, CTLTYPE_STRING, "idle-mechanism", 479 SYSCTL_DESCR("Mechanism used for the idle loop."), 480 NULL, 0, x86_cpu_idle_text, 0, 481 CTL_CREATE, CTL_EOL); 482 } 483 484 void 485 x86_cpu_idle_init(void) 486 { 487 488 #ifndef XENPV 489 if ((cpu_feature[1] & CPUID2_MONITOR) == 0) 490 x86_cpu_idle_set(x86_cpu_idle_halt, "halt", true); 491 else 492 x86_cpu_idle_set(x86_cpu_idle_mwait, "mwait", false); 493 #else 494 x86_cpu_idle_set(x86_cpu_idle_xen, "xen", true); 495 #endif 496 } 497 498 void 499 x86_cpu_idle_get(void (**func)(void), char *text, size_t len) 500 { 501 502 *func = x86_cpu_idle; 503 504 (void)strlcpy(text, x86_cpu_idle_text, len); 505 } 506 507 void 508 x86_cpu_idle_set(void (*func)(void), const char *text, bool ipi) 509 { 510 511 x86_cpu_idle = func; 512 x86_cpu_idle_ipi = ipi; 513 514 (void)strlcpy(x86_cpu_idle_text, text, sizeof(x86_cpu_idle_text)); 515 } 516 517 #ifndef XENPV 518 519 #define KBTOB(x) ((size_t)(x) * 1024UL) 520 #define MBTOB(x) ((size_t)(x) * 1024UL * 1024UL) 521 522 static struct { 523 int freelist; 524 uint64_t limit; 525 } x86_freelists[VM_NFREELIST] = { 526 { VM_FREELIST_DEFAULT, 0 }, 527 #ifdef VM_FREELIST_FIRST1T 528 /* 40-bit addresses needed for modern graphics. */ 529 { VM_FREELIST_FIRST1T, 1ULL * 1024 * 1024 * 1024 * 1024 }, 530 #endif 531 #ifdef VM_FREELIST_FIRST64G 532 /* 36-bit addresses needed for oldish graphics. */ 533 { VM_FREELIST_FIRST64G, 64ULL * 1024 * 1024 * 1024 }, 534 #endif 535 #ifdef VM_FREELIST_FIRST4G 536 /* 32-bit addresses needed for PCI 32-bit DMA and old graphics. */ 537 { VM_FREELIST_FIRST4G, 4ULL * 1024 * 1024 * 1024 }, 538 #endif 539 /* 30-bit addresses needed for ancient graphics. */ 540 { VM_FREELIST_FIRST1G, 1ULL * 1024 * 1024 * 1024 }, 541 /* 24-bit addresses needed for ISA DMA. */ 542 { VM_FREELIST_FIRST16, 16 * 1024 * 1024 }, 543 }; 544 545 int 546 x86_select_freelist(uint64_t maxaddr) 547 { 548 unsigned int i; 549 550 if (avail_end <= maxaddr) 551 return VM_NFREELIST; 552 553 for (i = 0; i < __arraycount(x86_freelists); i++) { 554 if ((x86_freelists[i].limit - 1) <= maxaddr) 555 return x86_freelists[i].freelist; 556 } 557 558 panic("no freelist for maximum address %"PRIx64, maxaddr); 559 } 560 561 static int 562 x86_add_cluster(uint64_t seg_start, uint64_t seg_end, uint32_t type) 563 { 564 extern struct extent *iomem_ex; 565 const uint64_t endext = MAXIOMEM + 1; 566 uint64_t new_physmem = 0; 567 phys_ram_seg_t *cluster; 568 int i; 569 570 if (seg_end > MAXPHYSMEM) { 571 aprint_verbose("WARNING: skipping large memory map entry: " 572 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", 573 seg_start, (seg_end - seg_start), type); 574 return 0; 575 } 576 577 /* 578 * XXX: Chop the last page off the size so that it can fit in avail_end. 579 */ 580 if (seg_end == MAXPHYSMEM) 581 seg_end -= PAGE_SIZE; 582 583 if (seg_end <= seg_start) 584 return 0; 585 586 for (i = 0; i < mem_cluster_cnt; i++) { 587 cluster = &mem_clusters[i]; 588 if ((cluster->start == round_page(seg_start)) && 589 (cluster->size == trunc_page(seg_end) - cluster->start)) { 590 #ifdef DEBUG_MEMLOAD 591 printf("WARNING: skipping duplicate segment entry\n"); 592 #endif 593 return 0; 594 } 595 } 596 597 /* 598 * This cluster is used by RAM. If it is included in the iomem extent, 599 * allocate it from there, so that we won't unintentionally reuse it 600 * later with extent_alloc_region. A way to avoid collision (with UVM 601 * for example). 602 * 603 * This is done before the addresses are page rounded just to make 604 * sure we get them all. 605 */ 606 if (seg_start < endext) { 607 uint64_t io_end; 608 609 if (seg_end > endext) 610 io_end = endext; 611 else 612 io_end = seg_end; 613 614 if (iomem_ex != NULL && extent_alloc_region(iomem_ex, seg_start, 615 io_end - seg_start, EX_NOWAIT)) { 616 /* XXX What should we do? */ 617 printf("WARNING: CAN't ALLOCATE MEMORY SEGMENT " 618 "(0x%"PRIx64"/0x%"PRIx64"/0x%x) FROM " 619 "IOMEM EXTENT MAP!\n", 620 seg_start, seg_end - seg_start, type); 621 return 0; 622 } 623 } 624 625 /* If it's not free memory, skip it. */ 626 if (type != BIM_Memory) 627 return 0; 628 629 if (mem_cluster_cnt >= VM_PHYSSEG_MAX) { 630 printf("WARNING: too many memory segments" 631 "(increase VM_PHYSSEG_MAX)"); 632 return -1; 633 } 634 635 #ifdef PHYSMEM_MAX_ADDR 636 if (seg_start >= MBTOB(PHYSMEM_MAX_ADDR)) 637 return 0; 638 if (seg_end > MBTOB(PHYSMEM_MAX_ADDR)) 639 seg_end = MBTOB(PHYSMEM_MAX_ADDR); 640 #endif 641 642 seg_start = round_page(seg_start); 643 seg_end = trunc_page(seg_end); 644 645 if (seg_start == seg_end) 646 return 0; 647 648 cluster = &mem_clusters[mem_cluster_cnt]; 649 cluster->start = seg_start; 650 if (iomem_ex != NULL) 651 new_physmem = physmem + atop(seg_end - seg_start); 652 653 #ifdef PHYSMEM_MAX_SIZE 654 if (iomem_ex != NULL) { 655 if (physmem >= atop(MBTOB(PHYSMEM_MAX_SIZE))) 656 return 0; 657 if (new_physmem > atop(MBTOB(PHYSMEM_MAX_SIZE))) { 658 seg_end = seg_start + MBTOB(PHYSMEM_MAX_SIZE) - ptoa(physmem); 659 new_physmem = atop(MBTOB(PHYSMEM_MAX_SIZE)); 660 } 661 } 662 #endif 663 664 cluster->size = seg_end - seg_start; 665 666 if (iomem_ex != NULL) { 667 if (avail_end < seg_end) 668 avail_end = seg_end; 669 physmem = new_physmem; 670 } 671 mem_cluster_cnt++; 672 673 return 0; 674 } 675 676 static int 677 x86_parse_clusters(struct btinfo_memmap *bim) 678 { 679 uint64_t seg_start, seg_end; 680 uint64_t addr, size; 681 uint32_t type; 682 int x; 683 684 KASSERT(bim != NULL); 685 KASSERT(bim->num > 0); 686 687 #ifdef DEBUG_MEMLOAD 688 printf("MEMMAP: %s MEMORY MAP (%d ENTRIES):\n", 689 lookup_bootinfo(BTINFO_EFIMEMMAP) != NULL ? "UEFI" : "BIOS", 690 bim->num); 691 #endif 692 693 for (x = 0; x < bim->num; x++) { 694 addr = bim->entry[x].addr; 695 size = bim->entry[x].size; 696 type = bim->entry[x].type; 697 #ifdef DEBUG_MEMLOAD 698 printf("MEMMAP: 0x%016" PRIx64 "-0x%016" PRIx64 699 "\n\tsize=0x%016" PRIx64 ", type=%d(%s)\n", 700 addr, addr + size - 1, size, type, 701 (type == BIM_Memory) ? "Memory" : 702 (type == BIM_Reserved) ? "Reserved" : 703 (type == BIM_ACPI) ? "ACPI" : 704 (type == BIM_NVS) ? "NVS" : 705 (type == BIM_PMEM) ? "Persistent" : 706 (type == BIM_PRAM) ? "Persistent (Legacy)" : 707 "unknown"); 708 #endif 709 710 /* If the segment is not memory, skip it. */ 711 switch (type) { 712 case BIM_Memory: 713 case BIM_ACPI: 714 case BIM_NVS: 715 break; 716 default: 717 continue; 718 } 719 720 /* If the segment is smaller than a page, skip it. */ 721 if (size < PAGE_SIZE) 722 continue; 723 724 seg_start = addr; 725 seg_end = addr + size; 726 727 /* 728 * XXX XXX: Avoid the ISA I/O MEM. 729 * 730 * Some laptops (for example, Toshiba Satellite2550X) report 731 * this area as valid. 732 */ 733 if (seg_start < IOM_END && seg_end > IOM_BEGIN) { 734 printf("WARNING: memory map entry overlaps " 735 "with ``Compatibility Holes'': " 736 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", seg_start, 737 seg_end - seg_start, type); 738 739 if (x86_add_cluster(seg_start, IOM_BEGIN, type) == -1) 740 break; 741 if (x86_add_cluster(IOM_END, seg_end, type) == -1) 742 break; 743 } else { 744 if (x86_add_cluster(seg_start, seg_end, type) == -1) 745 break; 746 } 747 } 748 749 return 0; 750 } 751 752 static int 753 x86_fake_clusters(void) 754 { 755 extern struct extent *iomem_ex; 756 phys_ram_seg_t *cluster; 757 KASSERT(mem_cluster_cnt == 0); 758 759 /* 760 * Allocate the physical addresses used by RAM from the iomem extent 761 * map. This is done before the addresses are page rounded just to make 762 * sure we get them all. 763 */ 764 if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem), EX_NOWAIT)) { 765 /* XXX What should we do? */ 766 printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM " 767 "IOMEM EXTENT MAP!\n"); 768 } 769 770 cluster = &mem_clusters[0]; 771 cluster->start = 0; 772 cluster->size = trunc_page(KBTOB(biosbasemem)); 773 physmem += atop(cluster->size); 774 775 if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem), 776 EX_NOWAIT)) { 777 /* XXX What should we do? */ 778 printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM " 779 "IOMEM EXTENT MAP!\n"); 780 } 781 782 #if NISADMA > 0 783 /* 784 * Some motherboards/BIOSes remap the 384K of RAM that would 785 * normally be covered by the ISA hole to the end of memory 786 * so that it can be used. However, on a 16M system, this 787 * would cause bounce buffers to be allocated and used. 788 * This is not desirable behaviour, as more than 384K of 789 * bounce buffers might be allocated. As a work-around, 790 * we round memory down to the nearest 1M boundary if 791 * we're using any isadma devices and the remapped memory 792 * is what puts us over 16M. 793 */ 794 if (biosextmem > (15*1024) && biosextmem < (16*1024)) { 795 char pbuf[9]; 796 797 format_bytes(pbuf, sizeof(pbuf), biosextmem - (15*1024)); 798 printf("Warning: ignoring %s of remapped memory\n", pbuf); 799 biosextmem = (15*1024); 800 } 801 #endif 802 803 cluster = &mem_clusters[1]; 804 cluster->start = IOM_END; 805 cluster->size = trunc_page(KBTOB(biosextmem)); 806 physmem += atop(cluster->size); 807 808 mem_cluster_cnt = 2; 809 810 avail_end = IOM_END + trunc_page(KBTOB(biosextmem)); 811 812 return 0; 813 } 814 815 /* 816 * x86_load_region: load the physical memory region from seg_start to seg_end 817 * into the VM system. 818 */ 819 static void 820 x86_load_region(uint64_t seg_start, uint64_t seg_end) 821 { 822 unsigned int i; 823 uint64_t tmp; 824 825 i = __arraycount(x86_freelists); 826 while (i--) { 827 if (x86_freelists[i].limit <= seg_start) 828 continue; 829 if (x86_freelists[i].freelist == VM_FREELIST_DEFAULT) 830 continue; 831 tmp = MIN(x86_freelists[i].limit, seg_end); 832 if (tmp == seg_start) 833 continue; 834 835 #ifdef DEBUG_MEMLOAD 836 printf("loading freelist %d 0x%"PRIx64"-0x%"PRIx64 837 " (0x%"PRIx64"-0x%"PRIx64")\n", x86_freelists[i].freelist, 838 seg_start, tmp, (uint64_t)atop(seg_start), 839 (uint64_t)atop(tmp)); 840 #endif 841 842 uvm_page_physload(atop(seg_start), atop(tmp), atop(seg_start), 843 atop(tmp), x86_freelists[i].freelist); 844 seg_start = tmp; 845 } 846 847 if (seg_start != seg_end) { 848 #ifdef DEBUG_MEMLOAD 849 printf("loading default 0x%"PRIx64"-0x%"PRIx64 850 " (0x%"PRIx64"-0x%"PRIx64")\n", seg_start, seg_end, 851 (uint64_t)atop(seg_start), (uint64_t)atop(seg_end)); 852 #endif 853 uvm_page_physload(atop(seg_start), atop(seg_end), 854 atop(seg_start), atop(seg_end), VM_FREELIST_DEFAULT); 855 } 856 } 857 858 #ifdef XEN 859 static void 860 x86_add_xen_clusters(void) 861 { 862 if (hvm_start_info->memmap_entries > 0) { 863 struct hvm_memmap_table_entry *map_entry; 864 map_entry = (void *)((uintptr_t)hvm_start_info->memmap_paddr + KERNBASE); 865 for (int i = 0; i < hvm_start_info->memmap_entries; i++) { 866 if (map_entry[i].size < PAGE_SIZE) 867 continue; 868 switch (map_entry[i].type) { 869 case XEN_HVM_MEMMAP_TYPE_RAM: 870 x86_add_cluster(map_entry[i].addr, 871 map_entry[i].addr + map_entry[i].size, 872 BIM_Memory); 873 break; 874 case XEN_HVM_MEMMAP_TYPE_ACPI: 875 x86_add_cluster(map_entry[i].addr, 876 map_entry[i].addr + map_entry[i].size, 877 BIM_ACPI); 878 break; 879 } 880 } 881 } else { 882 struct xen_memory_map memmap; 883 static struct _xen_mmap { 884 struct btinfo_memmap bim; 885 struct bi_memmap_entry map[128]; /* same as FreeBSD */ 886 } __packed xen_mmap; 887 int err; 888 889 memmap.nr_entries = 128; 890 set_xen_guest_handle(memmap.buffer, &xen_mmap.bim.entry[0]); 891 if ((err = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap)) 892 < 0) 893 panic("XENMEM_memory_map %d", err); 894 xen_mmap.bim.num = memmap.nr_entries; 895 x86_parse_clusters(&xen_mmap.bim); 896 } 897 } 898 #endif /* XEN */ 899 /* 900 * init_x86_clusters: retrieve the memory clusters provided by the BIOS, and 901 * initialize mem_clusters. 902 */ 903 void 904 init_x86_clusters(void) 905 { 906 struct btinfo_memmap *bim; 907 struct btinfo_efimemmap *biem; 908 909 /* 910 * Check to see if we have a memory map from the BIOS (passed to us by 911 * the boot program). 912 */ 913 #ifdef XEN 914 if (vm_guest_is_pvh()) { 915 x86_add_xen_clusters(); 916 } 917 #endif /* XEN */ 918 919 #ifdef i386 920 extern int biosmem_implicit; 921 biem = lookup_bootinfo(BTINFO_EFIMEMMAP); 922 if (biem != NULL) 923 bim = efi_get_e820memmap(); 924 else 925 bim = lookup_bootinfo(BTINFO_MEMMAP); 926 if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) && 927 bim != NULL && bim->num > 0) 928 x86_parse_clusters(bim); 929 #else 930 #if !defined(REALBASEMEM) && !defined(REALEXTMEM) 931 biem = lookup_bootinfo(BTINFO_EFIMEMMAP); 932 if (biem != NULL) 933 bim = efi_get_e820memmap(); 934 else 935 bim = lookup_bootinfo(BTINFO_MEMMAP); 936 if (bim != NULL && bim->num > 0) 937 x86_parse_clusters(bim); 938 #else 939 (void)bim, (void)biem; 940 #endif 941 #endif 942 943 if (mem_cluster_cnt == 0) { 944 /* 945 * If x86_parse_clusters didn't find any valid segment, create 946 * fake clusters. 947 */ 948 x86_fake_clusters(); 949 } 950 } 951 952 /* 953 * init_x86_vm: initialize the VM system on x86. We basically internalize as 954 * many physical pages as we can, starting at lowmem_rsvd, but we don't 955 * internalize the kernel physical pages (from pa_kstart to pa_kend). 956 */ 957 int 958 init_x86_vm(paddr_t pa_kend) 959 { 960 extern struct bootspace bootspace; 961 paddr_t pa_kstart = bootspace.head.pa; 962 uint64_t seg_start, seg_end; 963 uint64_t seg_start1, seg_end1; 964 int x; 965 unsigned i; 966 967 for (i = 0; i < __arraycount(x86_freelists); i++) { 968 if (avail_end < x86_freelists[i].limit) 969 x86_freelists[i].freelist = VM_FREELIST_DEFAULT; 970 } 971 972 /* 973 * Now, load the memory clusters (which have already been rounded and 974 * truncated) into the VM system. 975 * 976 * NOTE: we assume that memory starts at 0. 977 */ 978 for (x = 0; x < mem_cluster_cnt; x++) { 979 const phys_ram_seg_t *cluster = &mem_clusters[x]; 980 981 seg_start = cluster->start; 982 seg_end = cluster->start + cluster->size; 983 seg_start1 = 0; 984 seg_end1 = 0; 985 986 #ifdef DEBUG_MEMLOAD 987 printf("segment %" PRIx64 " - %" PRIx64 "\n", 988 seg_start, seg_end); 989 #endif 990 991 /* Skip memory before our available starting point. */ 992 if (seg_end <= lowmem_rsvd) { 993 #ifdef DEBUG_MEMLOAD 994 printf("discard segment below starting point " 995 "%" PRIx64 " - %" PRIx64 "\n", seg_start, seg_end); 996 #endif 997 continue; 998 } 999 1000 if (seg_start <= lowmem_rsvd && lowmem_rsvd < seg_end) { 1001 seg_start = lowmem_rsvd; 1002 if (seg_start == seg_end) { 1003 #ifdef DEBUG_MEMLOAD 1004 printf("discard segment below starting point " 1005 "%" PRIx64 " - %" PRIx64 "\n", 1006 seg_start, seg_end); 1007 1008 1009 #endif 1010 continue; 1011 } 1012 } 1013 1014 /* 1015 * If this segment contains the kernel, split it in two, around 1016 * the kernel. 1017 * [seg_start seg_end] 1018 * [pa_kstart pa_kend] 1019 */ 1020 if (seg_start <= pa_kstart && pa_kend <= seg_end) { 1021 #ifdef DEBUG_MEMLOAD 1022 printf("split kernel overlapping to " 1023 "%" PRIx64 " - %" PRIxPADDR " and " 1024 "%" PRIxPADDR " - %" PRIx64 "\n", 1025 seg_start, pa_kstart, pa_kend, seg_end); 1026 #endif 1027 seg_start1 = pa_kend; 1028 seg_end1 = seg_end; 1029 seg_end = pa_kstart; 1030 KASSERT(seg_end < seg_end1); 1031 } 1032 1033 /* 1034 * Discard a segment inside the kernel 1035 * [pa_kstart pa_kend] 1036 * [seg_start seg_end] 1037 */ 1038 if (pa_kstart < seg_start && seg_end < pa_kend) { 1039 #ifdef DEBUG_MEMLOAD 1040 printf("discard complete kernel overlap " 1041 "%" PRIx64 " - %" PRIx64 "\n", seg_start, seg_end); 1042 #endif 1043 continue; 1044 } 1045 1046 /* 1047 * Discard leading hunk that overlaps the kernel 1048 * [pa_kstart pa_kend] 1049 * [seg_start seg_end] 1050 */ 1051 if (pa_kstart < seg_start && 1052 seg_start < pa_kend && 1053 pa_kend < seg_end) { 1054 #ifdef DEBUG_MEMLOAD 1055 printf("discard leading kernel overlap " 1056 "%" PRIx64 " - %" PRIxPADDR "\n", 1057 seg_start, pa_kend); 1058 #endif 1059 seg_start = pa_kend; 1060 } 1061 1062 /* 1063 * Discard trailing hunk that overlaps the kernel 1064 * [pa_kstart pa_kend] 1065 * [seg_start seg_end] 1066 */ 1067 if (seg_start < pa_kstart && 1068 pa_kstart < seg_end && 1069 seg_end < pa_kend) { 1070 #ifdef DEBUG_MEMLOAD 1071 printf("discard trailing kernel overlap " 1072 "%" PRIxPADDR " - %" PRIx64 "\n", 1073 pa_kstart, seg_end); 1074 #endif 1075 seg_end = pa_kstart; 1076 } 1077 1078 /* First hunk */ 1079 if (seg_start != seg_end) { 1080 x86_load_region(seg_start, seg_end); 1081 } 1082 1083 /* Second hunk */ 1084 if (seg_start1 != seg_end1) { 1085 x86_load_region(seg_start1, seg_end1); 1086 } 1087 } 1088 1089 return 0; 1090 } 1091 1092 #endif /* !XENPV */ 1093 1094 void 1095 init_x86_msgbuf(void) 1096 { 1097 /* Message buffer is located at end of core. */ 1098 psize_t sz = round_page(MSGBUFSIZE); 1099 psize_t reqsz = sz; 1100 uvm_physseg_t x; 1101 1102 search_again: 1103 for (x = uvm_physseg_get_first(); 1104 uvm_physseg_valid_p(x); 1105 x = uvm_physseg_get_next(x)) { 1106 1107 if (ctob(uvm_physseg_get_avail_end(x)) == avail_end) 1108 break; 1109 } 1110 1111 if (uvm_physseg_valid_p(x) == false) 1112 panic("init_x86_msgbuf: can't find end of memory"); 1113 1114 /* Shrink so it'll fit in the last segment. */ 1115 if (uvm_physseg_get_avail_end(x) - uvm_physseg_get_avail_start(x) < atop(sz)) 1116 sz = ctob(uvm_physseg_get_avail_end(x) - uvm_physseg_get_avail_start(x)); 1117 1118 msgbuf_p_seg[msgbuf_p_cnt].sz = sz; 1119 msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(uvm_physseg_get_avail_end(x)) - sz; 1120 uvm_physseg_unplug(uvm_physseg_get_end(x) - atop(sz), atop(sz)); 1121 1122 /* Now find where the new avail_end is. */ 1123 avail_end = ctob(uvm_physseg_get_highest_frame()); 1124 1125 if (sz == reqsz) 1126 return; 1127 1128 reqsz -= sz; 1129 if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { 1130 /* No more segments available, bail out. */ 1131 printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", 1132 (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); 1133 return; 1134 } 1135 1136 sz = reqsz; 1137 goto search_again; 1138 } 1139 1140 void 1141 x86_reset(void) 1142 { 1143 uint8_t b; 1144 1145 #if NACPICA > 0 1146 /* 1147 * If ACPI is active, try to reset using the reset register 1148 * defined in the FADT. 1149 */ 1150 if (acpi_active) { 1151 if (acpi_reset() == 0) { 1152 delay(500000); /* wait 0.5 sec to see if that did it */ 1153 } 1154 } 1155 #endif 1156 1157 /* 1158 * The keyboard controller has 4 random output pins, one of which is 1159 * connected to the RESET pin on the CPU in many PCs. We tell the 1160 * keyboard controller to pulse this line a couple of times. 1161 */ 1162 outb(IO_KBD + KBCMDP, KBC_PULSE0); 1163 delay(100000); 1164 outb(IO_KBD + KBCMDP, KBC_PULSE0); 1165 delay(100000); 1166 1167 /* 1168 * Attempt to force a reset via the Reset Control register at 1169 * I/O port 0xcf9. Bit 2 forces a system reset when it 1170 * transitions from 0 to 1. Bit 1 selects the type of reset 1171 * to attempt: 0 selects a "soft" reset, and 1 selects a 1172 * "hard" reset. We try a "hard" reset. The first write sets 1173 * bit 1 to select a "hard" reset and clears bit 2. The 1174 * second write forces a 0 -> 1 transition in bit 2 to trigger 1175 * a reset. 1176 */ 1177 outb(0xcf9, 0x2); 1178 outb(0xcf9, 0x6); 1179 DELAY(500000); /* wait 0.5 sec to see if that did it */ 1180 1181 /* 1182 * Attempt to force a reset via the Fast A20 and Init register 1183 * at I/O port 0x92. Bit 1 serves as an alternate A20 gate. 1184 * Bit 0 asserts INIT# when set to 1. We are careful to only 1185 * preserve bit 1 while setting bit 0. We also must clear bit 1186 * 0 before setting it if it isn't already clear. 1187 */ 1188 b = inb(0x92); 1189 if (b != 0xff) { 1190 if ((b & 0x1) != 0) 1191 outb(0x92, b & 0xfe); 1192 outb(0x92, b | 0x1); 1193 DELAY(500000); /* wait 0.5 sec to see if that did it */ 1194 } 1195 } 1196 1197 static int 1198 x86_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 1199 void *arg0, void *arg1, void *arg2, void *arg3) 1200 { 1201 int result; 1202 1203 result = KAUTH_RESULT_DEFER; 1204 1205 switch (action) { 1206 case KAUTH_MACHDEP_IOPERM_GET: 1207 result = KAUTH_RESULT_ALLOW; 1208 break; 1209 1210 case KAUTH_MACHDEP_LDT_GET: 1211 case KAUTH_MACHDEP_LDT_SET: 1212 if (x86_user_ldt_enabled) { 1213 result = KAUTH_RESULT_ALLOW; 1214 } 1215 break; 1216 1217 default: 1218 break; 1219 } 1220 1221 return result; 1222 } 1223 1224 void 1225 machdep_init(void) 1226 { 1227 1228 x86_listener = kauth_listen_scope(KAUTH_SCOPE_MACHDEP, 1229 x86_listener_cb, NULL); 1230 } 1231 1232 /* 1233 * x86_startup: x86 common startup routine 1234 * 1235 * called by cpu_startup. 1236 */ 1237 1238 void 1239 x86_startup(void) 1240 { 1241 #if !defined(XENPV) 1242 nmi_init(); 1243 #endif 1244 } 1245 1246 const char * 1247 get_booted_kernel(void) 1248 { 1249 const struct btinfo_bootpath *bibp = lookup_bootinfo(BTINFO_BOOTPATH); 1250 return bibp ? bibp->bootpath : NULL; 1251 } 1252 1253 /* 1254 * machine dependent system variables. 1255 */ 1256 static int 1257 sysctl_machdep_booted_kernel(SYSCTLFN_ARGS) 1258 { 1259 struct btinfo_bootpath *bibp; 1260 struct sysctlnode node; 1261 1262 bibp = lookup_bootinfo(BTINFO_BOOTPATH); 1263 if (!bibp) 1264 return ENOENT; /* ??? */ 1265 1266 node = *rnode; 1267 node.sysctl_data = bibp->bootpath; 1268 node.sysctl_size = sizeof(bibp->bootpath); 1269 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1270 } 1271 1272 static int 1273 sysctl_machdep_bootmethod(SYSCTLFN_ARGS) 1274 { 1275 struct sysctlnode node; 1276 char buf[5]; 1277 1278 node = *rnode; 1279 node.sysctl_data = buf; 1280 if (bootmethod_efi) 1281 memcpy(node.sysctl_data, "UEFI", 5); 1282 else 1283 memcpy(node.sysctl_data, "BIOS", 5); 1284 1285 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1286 } 1287 1288 1289 static int 1290 sysctl_machdep_diskinfo(SYSCTLFN_ARGS) 1291 { 1292 struct sysctlnode node; 1293 extern struct bi_devmatch *x86_alldisks; 1294 extern int x86_ndisks; 1295 1296 if (x86_alldisks == NULL) 1297 return EOPNOTSUPP; 1298 1299 node = *rnode; 1300 node.sysctl_data = x86_alldisks; 1301 node.sysctl_size = sizeof(struct disklist) + 1302 (x86_ndisks - 1) * sizeof(struct nativedisk_info); 1303 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1304 } 1305 1306 #ifndef XENPV 1307 static int 1308 sysctl_machdep_tsc_enable(SYSCTLFN_ARGS) 1309 { 1310 struct sysctlnode node; 1311 int error, val; 1312 1313 val = *(int *)rnode->sysctl_data; 1314 1315 node = *rnode; 1316 node.sysctl_data = &val; 1317 1318 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1319 if (error != 0 || newp == NULL) 1320 return error; 1321 1322 if (val == 1) { 1323 tsc_user_enable(); 1324 } else if (val == 0) { 1325 tsc_user_disable(); 1326 } else { 1327 error = EINVAL; 1328 } 1329 if (error) 1330 return error; 1331 1332 *(int *)rnode->sysctl_data = val; 1333 1334 return 0; 1335 } 1336 #endif 1337 1338 static const char * const vm_guest_name[VM_LAST] = { 1339 [VM_GUEST_NO] = "none", 1340 [VM_GUEST_VM] = "generic", 1341 [VM_GUEST_XENPV] = "XenPV", 1342 [VM_GUEST_XENPVH] = "XenPVH", 1343 [VM_GUEST_XENHVM] = "XenHVM", 1344 [VM_GUEST_XENPVHVM] = "XenPVHVM", 1345 [VM_GUEST_HV] = "Hyper-V", 1346 [VM_GUEST_VMWARE] = "VMware", 1347 [VM_GUEST_KVM] = "KVM", 1348 [VM_GUEST_VIRTUALBOX] = "VirtualBox", 1349 }; 1350 1351 static int 1352 sysctl_machdep_hypervisor(SYSCTLFN_ARGS) 1353 { 1354 struct sysctlnode node; 1355 const char *t = NULL; 1356 char buf[64]; 1357 1358 node = *rnode; 1359 node.sysctl_data = buf; 1360 if (vm_guest >= VM_GUEST_NO && vm_guest < VM_LAST) 1361 t = vm_guest_name[vm_guest]; 1362 if (t == NULL) 1363 t = "unknown"; 1364 strlcpy(buf, t, sizeof(buf)); 1365 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1366 } 1367 1368 static void 1369 const_sysctl(struct sysctllog **clog, const char *name, int type, 1370 u_quad_t value, int tag) 1371 { 1372 (sysctl_createv)(clog, 0, NULL, NULL, 1373 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 1374 type, name, NULL, NULL, value, NULL, 0, 1375 CTL_MACHDEP, tag, CTL_EOL); 1376 } 1377 1378 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup") 1379 { 1380 extern uint64_t tsc_freq; 1381 #ifndef XENPV 1382 extern int tsc_user_enabled; 1383 #endif 1384 extern int sparse_dump; 1385 1386 sysctl_createv(clog, 0, NULL, NULL, 1387 CTLFLAG_PERMANENT, 1388 CTLTYPE_NODE, "machdep", NULL, 1389 NULL, 0, NULL, 0, 1390 CTL_MACHDEP, CTL_EOL); 1391 1392 sysctl_createv(clog, 0, NULL, NULL, 1393 CTLFLAG_PERMANENT, 1394 CTLTYPE_STRUCT, "console_device", NULL, 1395 sysctl_consdev, 0, NULL, sizeof(dev_t), 1396 CTL_MACHDEP, CPU_CONSDEV, CTL_EOL); 1397 sysctl_createv(clog, 0, NULL, NULL, 1398 CTLFLAG_PERMANENT, 1399 CTLTYPE_STRING, "booted_kernel", NULL, 1400 sysctl_machdep_booted_kernel, 0, NULL, 0, 1401 CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL); 1402 sysctl_createv(clog, 0, NULL, NULL, 1403 CTLFLAG_PERMANENT, 1404 CTLTYPE_STRING, "bootmethod", NULL, 1405 sysctl_machdep_bootmethod, 0, NULL, 0, 1406 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1407 sysctl_createv(clog, 0, NULL, NULL, 1408 CTLFLAG_PERMANENT, 1409 CTLTYPE_STRUCT, "diskinfo", NULL, 1410 sysctl_machdep_diskinfo, 0, NULL, 0, 1411 CTL_MACHDEP, CPU_DISKINFO, CTL_EOL); 1412 sysctl_createv(clog, 0, NULL, NULL, 1413 CTLFLAG_PERMANENT, 1414 CTLTYPE_STRING, "cpu_brand", NULL, 1415 NULL, 0, cpu_brand_string, 0, 1416 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1417 sysctl_createv(clog, 0, NULL, NULL, 1418 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1419 CTLTYPE_INT, "sparse_dump", NULL, 1420 NULL, 0, &sparse_dump, 0, 1421 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1422 sysctl_createv(clog, 0, NULL, NULL, 1423 CTLFLAG_PERMANENT, 1424 CTLTYPE_QUAD, "tsc_freq", NULL, 1425 NULL, 0, &tsc_freq, 0, 1426 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1427 sysctl_createv(clog, 0, NULL, NULL, 1428 CTLFLAG_PERMANENT, 1429 CTLTYPE_INT, "pae", 1430 SYSCTL_DESCR("Whether the kernel uses PAE"), 1431 NULL, 0, &use_pae, 0, 1432 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1433 #ifndef XENPV 1434 sysctl_createv(clog, 0, NULL, NULL, 1435 CTLFLAG_READWRITE, 1436 CTLTYPE_INT, "tsc_user_enable", 1437 SYSCTL_DESCR("RDTSC instruction enabled in usermode"), 1438 sysctl_machdep_tsc_enable, 0, &tsc_user_enabled, 0, 1439 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1440 #endif 1441 sysctl_createv(clog, 0, NULL, NULL, 1442 CTLFLAG_PERMANENT, 1443 CTLTYPE_STRING, "hypervisor", NULL, 1444 sysctl_machdep_hypervisor, 0, NULL, 0, 1445 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1446 #ifdef SVS 1447 const struct sysctlnode *svs_rnode = NULL; 1448 sysctl_createv(clog, 0, NULL, &svs_rnode, 1449 CTLFLAG_PERMANENT, 1450 CTLTYPE_NODE, "svs", NULL, 1451 NULL, 0, NULL, 0, 1452 CTL_MACHDEP, CTL_CREATE); 1453 sysctl_createv(clog, 0, &svs_rnode, NULL, 1454 CTLFLAG_PERMANENT, 1455 CTLTYPE_BOOL, "enabled", 1456 SYSCTL_DESCR("Whether the kernel uses SVS"), 1457 NULL, 0, &svs_enabled, 0, 1458 CTL_CREATE, CTL_EOL); 1459 sysctl_createv(clog, 0, &svs_rnode, NULL, 1460 CTLFLAG_PERMANENT, 1461 CTLTYPE_BOOL, "pcid", 1462 SYSCTL_DESCR("Whether SVS uses PCID"), 1463 NULL, 0, &svs_pcid, 0, 1464 CTL_CREATE, CTL_EOL); 1465 #endif 1466 1467 sysctl_createv(clog, 0, NULL, NULL, 1468 CTLFLAG_READWRITE, 1469 CTLTYPE_BOOL, "user_ldt", 1470 SYSCTL_DESCR("Whether USER_LDT is enabled"), 1471 NULL, 0, &x86_user_ldt_enabled, 0, 1472 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1473 1474 #ifndef XENPV 1475 void sysctl_speculation_init(struct sysctllog **); 1476 sysctl_speculation_init(clog); 1477 #endif 1478 1479 /* None of these can ever change once the system has booted */ 1480 const_sysctl(clog, "fpu_present", CTLTYPE_INT, i386_fpu_present, 1481 CPU_FPU_PRESENT); 1482 const_sysctl(clog, "osfxsr", CTLTYPE_INT, i386_use_fxsave, 1483 CPU_OSFXSR); 1484 const_sysctl(clog, "sse", CTLTYPE_INT, i386_has_sse, 1485 CPU_SSE); 1486 const_sysctl(clog, "sse2", CTLTYPE_INT, i386_has_sse2, 1487 CPU_SSE2); 1488 1489 const_sysctl(clog, "fpu_save", CTLTYPE_INT, x86_fpu_save, 1490 CPU_FPU_SAVE); 1491 const_sysctl(clog, "fpu_save_size", CTLTYPE_INT, x86_fpu_save_size, 1492 CPU_FPU_SAVE_SIZE); 1493 const_sysctl(clog, "xsave_features", CTLTYPE_QUAD, x86_xsave_features, 1494 CPU_XSAVE_FEATURES); 1495 1496 #ifndef XENPV 1497 const_sysctl(clog, "biosbasemem", CTLTYPE_INT, biosbasemem, 1498 CPU_BIOSBASEMEM); 1499 const_sysctl(clog, "biosextmem", CTLTYPE_INT, biosextmem, 1500 CPU_BIOSEXTMEM); 1501 #endif 1502 } 1503 1504 /* Here for want of a better place */ 1505 #if defined(DOM0OPS) || !defined(XENPV) 1506 struct pic * 1507 intr_findpic(int num) 1508 { 1509 #if NIOAPIC > 0 1510 struct ioapic_softc *pic; 1511 1512 pic = ioapic_find_bybase(num); 1513 if (pic != NULL) 1514 return &pic->sc_pic; 1515 #endif 1516 if (num < NUM_LEGACY_IRQS) 1517 return &i8259_pic; 1518 1519 return NULL; 1520 } 1521 #endif 1522 1523 void 1524 cpu_initclocks(void) 1525 { 1526 1527 /* 1528 * Re-calibrate TSC on boot CPU using most accurate time source, 1529 * thus making accurate TSC available for x86_initclock_func(). 1530 */ 1531 cpu_get_tsc_freq(curcpu()); 1532 1533 /* Now start the clocks on this CPU (the boot CPU). */ 1534 (*x86_initclock_func)(); 1535 } 1536 1537 int 1538 x86_cpu_is_lcall(const void *ip) 1539 { 1540 static const uint8_t lcall[] = { 0x9a, 0, 0, 0, 0 }; 1541 int error; 1542 const size_t sz = sizeof(lcall) + 2; 1543 uint8_t tmp[sizeof(lcall) + 2]; 1544 1545 if ((error = copyin(ip, tmp, sz)) != 0) 1546 return error; 1547 1548 if (memcmp(tmp, lcall, sizeof(lcall)) != 0 || tmp[sz - 1] != 0) 1549 return EINVAL; 1550 1551 switch (tmp[sz - 2]) { 1552 case (uint8_t)0x07: /* NetBSD */ 1553 case (uint8_t)0x87: /* BSD/OS */ 1554 return 0; 1555 default: 1556 return EINVAL; 1557 } 1558 } 1559