1 /* $NetBSD: cpu.c,v 1.40 2010/01/08 19:43:26 dyoung Exp $ */ 2 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp */ 3 4 /*- 5 * Copyright (c) 2000 The NetBSD Foundation, Inc. 6 * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by RedBack Networks Inc. 11 * 12 * Author: Bill Sommerfeld 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1999 Stefan Grefen 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed by the NetBSD 50 * Foundation, Inc. and its contributors. 51 * 4. Neither the name of The NetBSD Foundation nor the names of its 52 * contributors may be used to endorse or promote products derived 53 * from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY 56 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.40 2010/01/08 19:43:26 dyoung Exp $"); 70 71 #include "opt_ddb.h" 72 #include "opt_multiprocessor.h" 73 #include "opt_mpbios.h" /* for MPDEBUG */ 74 #include "opt_mtrr.h" 75 #include "opt_xen.h" 76 77 #include "lapic.h" 78 #include "ioapic.h" 79 80 #include <sys/param.h> 81 #include <sys/proc.h> 82 #include <sys/systm.h> 83 #include <sys/device.h> 84 #include <sys/kmem.h> 85 #include <sys/cpu.h> 86 #include <sys/atomic.h> 87 #include <sys/reboot.h> 88 89 #include <uvm/uvm_extern.h> 90 91 #include <machine/cpufunc.h> 92 #include <machine/cpuvar.h> 93 #include <machine/pmap.h> 94 #include <machine/vmparam.h> 95 #include <machine/mpbiosvar.h> 96 #include <machine/pcb.h> 97 #include <machine/specialreg.h> 98 #include <machine/segments.h> 99 #include <machine/gdt.h> 100 #include <machine/mtrr.h> 101 #include <machine/pio.h> 102 103 #include <xen/vcpuvar.h> 104 105 #if NLAPIC > 0 106 #include <machine/apicvar.h> 107 #include <machine/i82489reg.h> 108 #include <machine/i82489var.h> 109 #endif 110 111 #include <dev/ic/mc146818reg.h> 112 #include <dev/isa/isareg.h> 113 114 #if MAXCPUS > 32 115 #error cpu_info contains 32bit bitmasks 116 #endif 117 118 int cpu_match(device_t, cfdata_t, void *); 119 void cpu_attach(device_t, device_t, void *); 120 int vcpu_match(device_t, cfdata_t, void *); 121 void vcpu_attach(device_t, device_t, void *); 122 void cpu_attach_common(device_t, device_t, void *); 123 void cpu_offline_md(void); 124 125 struct cpu_softc { 126 device_t sc_dev; /* device tree glue */ 127 struct cpu_info *sc_info; /* pointer to CPU info */ 128 bool sc_wasonline; 129 }; 130 131 int mp_cpu_start(struct cpu_info *, paddr_t); 132 void mp_cpu_start_cleanup(struct cpu_info *); 133 const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, 134 mp_cpu_start_cleanup }; 135 136 CFATTACH_DECL_NEW(cpu, sizeof(struct cpu_softc), 137 cpu_match, cpu_attach, NULL, NULL); 138 CFATTACH_DECL_NEW(vcpu, sizeof(struct cpu_softc), 139 vcpu_match, vcpu_attach, NULL, NULL); 140 141 /* 142 * Statically-allocated CPU info for the primary CPU (or the only 143 * CPU, on uniprocessors). The CPU info list is initialized to 144 * point at it. 145 */ 146 #ifdef TRAPLOG 147 #include <machine/tlog.h> 148 struct tlog tlog_primary; 149 #endif 150 struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { 151 .ci_dev = 0, 152 .ci_self = &cpu_info_primary, 153 .ci_idepth = -1, 154 .ci_curlwp = &lwp0, 155 .ci_curldt = -1, 156 #ifdef TRAPLOG 157 .ci_tlog = &tlog_primary, 158 #endif 159 160 }; 161 struct cpu_info phycpu_info_primary __aligned(CACHE_LINE_SIZE) = { 162 .ci_dev = 0, 163 .ci_self = &phycpu_info_primary, 164 }; 165 166 struct cpu_info *cpu_info_list = &cpu_info_primary; 167 struct cpu_info *phycpu_info_list = &phycpu_info_primary; 168 169 static void cpu_set_tss_gates(struct cpu_info *ci); 170 171 uint32_t cpus_attached = 0; 172 uint32_t cpus_running = 0; 173 174 uint32_t phycpus_attached = 0; 175 uint32_t phycpus_running = 0; 176 177 bool x86_mp_online; 178 paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; 179 180 #if defined(MULTIPROCESSOR) 181 void cpu_hatch(void *); 182 static void cpu_boot_secondary(struct cpu_info *ci); 183 static void cpu_start_secondary(struct cpu_info *ci); 184 static void cpu_copy_trampoline(void); 185 186 /* 187 * Runs once per boot once multiprocessor goo has been detected and 188 * the local APIC on the boot processor has been mapped. 189 * 190 * Called from lapic_boot_init() (from mpbios_scan()). 191 */ 192 void 193 cpu_init_first(void) 194 { 195 196 cpu_info_primary.ci_cpuid = lapic_cpu_number(); 197 cpu_copy_trampoline(); 198 } 199 #endif /* MULTIPROCESSOR */ 200 201 int 202 cpu_match(device_t parent, cfdata_t match, void *aux) 203 { 204 205 return 1; 206 } 207 208 void 209 cpu_attach(device_t parent, device_t self, void *aux) 210 { 211 struct cpu_softc *sc = device_private(self); 212 struct cpu_attach_args *caa = aux; 213 struct cpu_info *ci; 214 uintptr_t ptr; 215 static bool again = false; 216 217 sc->sc_dev = self; 218 219 if (phycpus_attached == ~0) { 220 aprint_error(": increase MAXCPUS\n"); 221 return; 222 } 223 224 /* 225 * If we're an Application Processor, allocate a cpu_info 226 * structure, otherwise use the primary's. 227 */ 228 if (caa->cpu_role == CPU_ROLE_AP) { 229 if ((boothowto & RB_MD1) != 0) { 230 aprint_error(": multiprocessor boot disabled\n"); 231 if (!pmf_device_register(self, NULL, NULL)) 232 aprint_error_dev(self, 233 "couldn't establish power handler\n"); 234 return; 235 } 236 aprint_naive(": Application Processor\n"); 237 ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 238 KM_SLEEP); 239 ci = (struct cpu_info *)((ptr + CACHE_LINE_SIZE - 1) & 240 ~(CACHE_LINE_SIZE - 1)); 241 ci->ci_curldt = -1; 242 } else { 243 aprint_naive(": %s Processor\n", 244 caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 245 ci = &phycpu_info_primary; 246 } 247 248 ci->ci_self = ci; 249 sc->sc_info = ci; 250 251 ci->ci_dev = self; 252 ci->ci_cpuid = caa->cpu_number; 253 ci->ci_vcpu = NULL; 254 255 /* 256 * Boot processor may not be attached first, but the below 257 * must be done to allow booting other processors. 258 */ 259 if (!again) { 260 atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 261 /* Basic init */ 262 again = true; 263 } 264 265 printf(": "); 266 switch (caa->cpu_role) { 267 case CPU_ROLE_SP: 268 printf("(uniprocessor)\n"); 269 atomic_or_32(&ci->ci_flags, CPUF_SP); 270 break; 271 272 case CPU_ROLE_BP: 273 printf("(boot processor)\n"); 274 atomic_or_32(&ci->ci_flags, CPUF_BSP); 275 break; 276 277 case CPU_ROLE_AP: 278 /* 279 * report on an AP 280 */ 281 printf("(application processor)\n"); 282 if (ci->ci_flags & CPUF_PRESENT) { 283 struct cpu_info *tmp; 284 285 tmp = phycpu_info_list; 286 while (tmp->ci_next) 287 tmp = tmp->ci_next; 288 289 tmp->ci_next = ci; 290 } 291 break; 292 293 default: 294 panic("unknown processor type??\n"); 295 } 296 297 atomic_or_32(&phycpus_attached, ci->ci_cpumask); 298 299 return; 300 } 301 302 int 303 vcpu_match(device_t parent, cfdata_t match, void *aux) 304 { 305 struct vcpu_attach_args *vcaa = aux; 306 307 if (strcmp(vcaa->vcaa_name, match->cf_name) == 0) 308 return 1; 309 return 0; 310 } 311 312 void 313 vcpu_attach(device_t parent, device_t self, void *aux) 314 { 315 struct vcpu_attach_args *vcaa = aux; 316 317 cpu_attach_common(parent, self, &vcaa->vcaa_caa); 318 } 319 320 static void 321 cpu_vm_init(struct cpu_info *ci) 322 { 323 int ncolors = 2, i; 324 325 for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { 326 struct x86_cache_info *cai; 327 int tcolors; 328 329 cai = &ci->ci_cinfo[i]; 330 331 tcolors = atop(cai->cai_totalsize); 332 switch(cai->cai_associativity) { 333 case 0xff: 334 tcolors = 1; /* fully associative */ 335 break; 336 case 0: 337 case 1: 338 break; 339 default: 340 tcolors /= cai->cai_associativity; 341 } 342 ncolors = max(ncolors, tcolors); 343 } 344 345 /* 346 * Knowing the size of the largest cache on this CPU, re-color 347 * our pages. 348 */ 349 if (ncolors <= uvmexp.ncolors) 350 return; 351 aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); 352 uvm_page_recolor(ncolors); 353 } 354 355 void 356 cpu_attach_common(device_t parent, device_t self, void *aux) 357 { 358 struct cpu_softc *sc = device_private(self); 359 struct cpu_attach_args *caa = aux; 360 struct cpu_info *ci; 361 uintptr_t ptr; 362 int cpunum = caa->cpu_number; 363 static bool again = false; 364 365 sc->sc_dev = self; 366 367 /* 368 * If we're an Application Processor, allocate a cpu_info 369 * structure, otherwise use the primary's. 370 */ 371 if (caa->cpu_role == CPU_ROLE_AP) { 372 aprint_naive(": Application Processor\n"); 373 ptr = (uintptr_t)kmem_alloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 374 KM_SLEEP); 375 ci = (struct cpu_info *)((ptr + CACHE_LINE_SIZE - 1) & 376 ~(CACHE_LINE_SIZE - 1)); 377 memset(ci, 0, sizeof(*ci)); 378 #ifdef TRAPLOG 379 ci->ci_tlog_base = kmem_zalloc(sizeof(struct tlog), KM_SLEEP); 380 #endif 381 } else { 382 aprint_naive(": %s Processor\n", 383 caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 384 ci = &cpu_info_primary; 385 #if NLAPIC > 0 386 if (cpunum != lapic_cpu_number()) { 387 /* XXX should be done earlier */ 388 uint32_t reg; 389 aprint_verbose("\n"); 390 aprint_verbose_dev(self, "running CPU at apic %d" 391 " instead of at expected %d", lapic_cpu_number(), 392 cpunum); 393 reg = i82489_readreg(LAPIC_ID); 394 i82489_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) | 395 (cpunum << LAPIC_ID_SHIFT)); 396 } 397 if (cpunum != lapic_cpu_number()) { 398 aprint_error_dev(self, "unable to reset apic id\n"); 399 } 400 #endif 401 } 402 403 ci->ci_self = ci; 404 sc->sc_info = ci; 405 ci->ci_dev = self; 406 ci->ci_cpuid = cpunum; 407 408 KASSERT(HYPERVISOR_shared_info != NULL); 409 ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[cpunum]; 410 411 ci->ci_func = caa->cpu_func; 412 413 /* Must be called before mi_cpu_attach(). */ 414 cpu_vm_init(ci); 415 416 if (caa->cpu_role == CPU_ROLE_AP) { 417 int error; 418 419 error = mi_cpu_attach(ci); 420 if (error != 0) { 421 aprint_normal("\n"); 422 aprint_error_dev(self, 423 "mi_cpu_attach failed with %d\n", error); 424 return; 425 } 426 } else { 427 KASSERT(ci->ci_data.cpu_idlelwp != NULL); 428 } 429 430 ci->ci_cpumask = (1 << cpu_index(ci)); 431 pmap_reference(pmap_kernel()); 432 ci->ci_pmap = pmap_kernel(); 433 ci->ci_tlbstate = TLBSTATE_STALE; 434 435 /* 436 * Boot processor may not be attached first, but the below 437 * must be done to allow booting other processors. 438 */ 439 if (!again) { 440 atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 441 /* Basic init. */ 442 cpu_intr_init(ci); 443 cpu_get_tsc_freq(ci); 444 cpu_init(ci); 445 cpu_set_tss_gates(ci); 446 pmap_cpu_init_late(ci); 447 #if NLAPIC > 0 448 if (caa->cpu_role != CPU_ROLE_SP) { 449 /* Enable lapic. */ 450 lapic_enable(); 451 lapic_set_lvt(); 452 lapic_calibrate_timer(); 453 } 454 #endif 455 /* Make sure DELAY() is initialized. */ 456 DELAY(1); 457 again = true; 458 } 459 460 /* further PCB init done later. */ 461 462 switch (caa->cpu_role) { 463 case CPU_ROLE_SP: 464 atomic_or_32(&ci->ci_flags, CPUF_SP); 465 cpu_identify(ci); 466 #if 0 467 x86_errata(); 468 #endif 469 x86_cpu_idle_init(); 470 break; 471 472 case CPU_ROLE_BP: 473 atomic_or_32(&ci->ci_flags, CPUF_BSP); 474 cpu_identify(ci); 475 cpu_init(ci); 476 #if 0 477 x86_errata(); 478 #endif 479 x86_cpu_idle_init(); 480 break; 481 482 case CPU_ROLE_AP: 483 /* 484 * report on an AP 485 */ 486 487 #if defined(MULTIPROCESSOR) 488 cpu_intr_init(ci); 489 gdt_alloc_cpu(ci); 490 cpu_set_tss_gates(ci); 491 pmap_cpu_init_early(ci); 492 pmap_cpu_init_late(ci); 493 cpu_start_secondary(ci); 494 if (ci->ci_flags & CPUF_PRESENT) { 495 struct cpu_info *tmp; 496 497 identifycpu(ci); 498 tmp = cpu_info_list; 499 while (tmp->ci_next) 500 tmp = tmp->ci_next; 501 502 tmp->ci_next = ci; 503 } 504 #else 505 aprint_error_dev(self, "not started\n"); 506 #endif 507 break; 508 509 default: 510 aprint_normal("\n"); 511 panic("unknown processor type??\n"); 512 } 513 514 atomic_or_32(&cpus_attached, ci->ci_cpumask); 515 516 #if 0 517 if (!pmf_device_register(self, cpu_suspend, cpu_resume)) 518 aprint_error_dev(self, "couldn't establish power handler\n"); 519 #endif 520 521 #if defined(MULTIPROCESSOR) 522 if (mp_verbose) { 523 struct lwp *l = ci->ci_data.cpu_idlelwp; 524 struct pcb *pcb = lwp_getpcb(l); 525 526 aprint_verbose_dev(self, 527 "idle lwp at %p, idle sp at 0x%p\n", 528 l, 529 #ifdef i386 530 (void *)pcb->pcb_esp 531 #else 532 (void *)pcb->pcb_rsp 533 #endif 534 ); 535 536 } 537 #endif 538 } 539 540 /* 541 * Initialize the processor appropriately. 542 */ 543 544 void 545 cpu_init(struct cpu_info *ci) 546 { 547 548 /* 549 * On a P6 or above, enable global TLB caching if the 550 * hardware supports it. 551 */ 552 if (cpu_feature & CPUID_PGE) 553 lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */ 554 555 #ifdef XXXMTRR 556 /* 557 * On a P6 or above, initialize MTRR's if the hardware supports them. 558 */ 559 if (cpu_feature & CPUID_MTRR) { 560 if ((ci->ci_flags & CPUF_AP) == 0) 561 i686_mtrr_init_first(); 562 mtrr_init_cpu(ci); 563 } 564 #endif 565 /* 566 * If we have FXSAVE/FXRESTOR, use them. 567 */ 568 if (cpu_feature & CPUID_FXSR) { 569 lcr4(rcr4() | CR4_OSFXSR); 570 571 /* 572 * If we have SSE/SSE2, enable XMM exceptions. 573 */ 574 if (cpu_feature & (CPUID_SSE|CPUID_SSE2)) 575 lcr4(rcr4() | CR4_OSXMMEXCPT); 576 } 577 578 atomic_or_32(&cpus_running, ci->ci_cpumask); 579 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 580 } 581 582 583 #ifdef MULTIPROCESSOR 584 void 585 cpu_boot_secondary_processors(void) 586 { 587 struct cpu_info *ci; 588 u_long i; 589 590 for (i = 0; i < maxcpus; i++) { 591 ci = cpu_lookup(i); 592 if (ci == NULL) 593 continue; 594 if (ci->ci_data.cpu_idlelwp == NULL) 595 continue; 596 if ((ci->ci_flags & CPUF_PRESENT) == 0) 597 continue; 598 if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) 599 continue; 600 cpu_boot_secondary(ci); 601 } 602 603 x86_mp_online = true; 604 } 605 606 static void 607 cpu_init_idle_lwp(struct cpu_info *ci) 608 { 609 struct lwp *l = ci->ci_data.cpu_idlelwp; 610 struct pcb *pcb = lwp_getpcb(l); 611 612 pcb->pcb_cr0 = rcr0(); 613 } 614 615 void 616 cpu_init_idle_lwps(void) 617 { 618 struct cpu_info *ci; 619 u_long i; 620 621 for (i = 0; i < maxcpus; i++) { 622 ci = cpu_lookup(i); 623 if (ci == NULL) 624 continue; 625 if (ci->ci_data.cpu_idlelwp == NULL) 626 continue; 627 if ((ci->ci_flags & CPUF_PRESENT) == 0) 628 continue; 629 cpu_init_idle_lwp(ci); 630 } 631 } 632 633 void 634 cpu_start_secondary(struct cpu_info *ci) 635 { 636 int i; 637 struct pmap *kpm = pmap_kernel(); 638 extern uint32_t mp_pdirpa; 639 640 mp_pdirpa = kpm->pm_pdirpa; /* XXX move elsewhere, not per CPU. */ 641 642 atomic_or_32(&ci->ci_flags, CPUF_AP); 643 644 aprint_debug_dev(ci->ci_dev, "starting\n"); 645 646 ci->ci_curlwp = ci->ci_data.cpu_idlelwp; 647 if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) 648 return; 649 650 /* 651 * wait for it to become ready 652 */ 653 for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { 654 #ifdef MPDEBUG 655 extern int cpu_trace[3]; 656 static int otrace[3]; 657 if (memcmp(otrace, cpu_trace, sizeof(otrace)) != 0) { 658 aprint_debug_dev(ci->ci_dev, "trace %02x %02x %02x\n", 659 cpu_trace[0], cpu_trace[1], cpu_trace[2]); 660 memcpy(otrace, cpu_trace, sizeof(otrace)); 661 } 662 #endif 663 delay(10); 664 } 665 if ((ci->ci_flags & CPUF_PRESENT) == 0) { 666 aprint_error_dev(ci->ci_dev, "failed to become ready\n"); 667 #if defined(MPDEBUG) && defined(DDB) 668 printf("dropping into debugger; continue from here to resume boot\n"); 669 Debugger(); 670 #endif 671 } 672 673 CPU_START_CLEANUP(ci); 674 } 675 676 void 677 cpu_boot_secondary(struct cpu_info *ci) 678 { 679 int i; 680 681 atomic_or_32(&ci->ci_flags, CPUF_GO); 682 for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { 683 delay(10); 684 } 685 if ((ci->ci_flags & CPUF_RUNNING) == 0) { 686 aprint_error_dev(ci->ci_dev, "CPU failed to start\n"); 687 #if defined(MPDEBUG) && defined(DDB) 688 printf("dropping into debugger; continue from here to resume boot\n"); 689 Debugger(); 690 #endif 691 } 692 } 693 694 /* 695 * The CPU ends up here when its ready to run 696 * This is called from code in mptramp.s; at this point, we are running 697 * in the idle pcb/idle stack of the new CPU. When this function returns, 698 * this processor will enter the idle loop and start looking for work. 699 * 700 * XXX should share some of this with init386 in machdep.c 701 */ 702 void 703 cpu_hatch(void *v) 704 { 705 struct cpu_info *ci = (struct cpu_info *)v; 706 struct pcb *pcb; 707 uint32_t blacklist_features; 708 int s, i; 709 710 #ifdef __x86_64__ 711 cpu_init_msrs(ci, true); 712 #endif 713 714 cpu_probe(ci); 715 716 /* not on Xen... */ 717 blacklist_features = ~(CPUID_PGE|CPUID_PSE|CPUID_MTRR|CPUID_FXSR|CPUID_NOX); /* XXX add CPUID_SVM */ 718 719 cpu_feature &= blacklist_features; 720 721 KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0); 722 atomic_or_32(&ci->ci_flags, CPUF_PRESENT); 723 while ((ci->ci_flags & CPUF_GO) == 0) { 724 /* Don't use delay, boot CPU may be patching the text. */ 725 for (i = 10000; i != 0; i--) 726 x86_pause(); 727 } 728 729 /* Because the text may have been patched in x86_patch(). */ 730 wbinvd(); 731 x86_flush(); 732 733 KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); 734 735 pcb = lwp_getpcb(curlwp); 736 lcr3(pmap_kernel()->pm_pdirpa); 737 pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa; 738 pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); 739 lcr0(pcb->pcb_cr0); 740 741 cpu_init_idt(); 742 gdt_init_cpu(ci); 743 lapic_enable(); 744 lapic_set_lvt(); 745 lapic_initclocks(); 746 747 #ifdef i386 748 npxinit(ci); 749 #else 750 fpuinit(ci); 751 #endif 752 753 lldt(GSEL(GLDT_SEL, SEL_KPL)); 754 ltr(ci->ci_tss_sel); 755 756 cpu_init(ci); 757 cpu_get_tsc_freq(ci); 758 759 s = splhigh(); 760 #ifdef i386 761 lapic_tpr = 0; 762 #else 763 lcr8(0); 764 #endif 765 x86_enable_intr(); 766 splx(s); 767 #if 0 768 x86_errata(); 769 #endif 770 771 aprint_debug_dev(ci->ci_dev, "CPU %ld running\n", 772 (long)ci->ci_cpuid); 773 } 774 775 #if defined(DDB) 776 777 #include <ddb/db_output.h> 778 #include <machine/db_machdep.h> 779 780 /* 781 * Dump CPU information from ddb. 782 */ 783 void 784 cpu_debug_dump(void) 785 { 786 struct cpu_info *ci; 787 CPU_INFO_ITERATOR cii; 788 789 db_printf("addr dev id flags ipis curlwp fpcurlwp\n"); 790 for (CPU_INFO_FOREACH(cii, ci)) { 791 db_printf("%p %s %ld %x %x %10p %10p\n", 792 ci, 793 ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), 794 (long)ci->ci_cpuid, 795 ci->ci_flags, ci->ci_ipis, 796 ci->ci_curlwp, 797 ci->ci_fpcurlwp); 798 } 799 } 800 #endif /* DDB */ 801 802 static void 803 cpu_copy_trampoline(void) 804 { 805 /* 806 * Copy boot code. 807 */ 808 extern u_char cpu_spinup_trampoline[]; 809 extern u_char cpu_spinup_trampoline_end[]; 810 811 vaddr_t mp_trampoline_vaddr; 812 813 mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 814 UVM_KMF_VAONLY); 815 816 pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr, 817 VM_PROT_READ | VM_PROT_WRITE, 0); 818 pmap_update(pmap_kernel()); 819 memcpy((void *)mp_trampoline_vaddr, 820 cpu_spinup_trampoline, 821 cpu_spinup_trampoline_end - cpu_spinup_trampoline); 822 823 pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE); 824 pmap_update(pmap_kernel()); 825 uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY); 826 } 827 828 #endif /* MULTIPROCESSOR */ 829 830 #ifdef i386 831 #if 0 832 static void 833 tss_init(struct i386tss *tss, void *stack, void *func) 834 { 835 memset(tss, 0, sizeof *tss); 836 tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16); 837 tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 838 tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL); 839 tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL); 840 tss->tss_gs = tss->__tss_es = tss->__tss_ds = 841 tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL); 842 tss->tss_cr3 = pmap_kernel()->pm_pdirpa; 843 tss->tss_esp = (int)((char *)stack + USPACE - 16); 844 tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 845 tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */ 846 tss->__tss_eip = (int)func; 847 } 848 #endif 849 850 /* XXX */ 851 #define IDTVEC(name) __CONCAT(X, name) 852 typedef void (vector)(void); 853 extern vector IDTVEC(tss_trap08); 854 #ifdef DDB 855 extern vector Xintrddbipi; 856 extern int ddb_vec; 857 #endif 858 859 static void 860 cpu_set_tss_gates(struct cpu_info *ci) 861 { 862 #if 0 863 struct segment_descriptor sd; 864 865 ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, 866 UVM_KMF_WIRED); 867 tss_init(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack, 868 IDTVEC(tss_trap08)); 869 setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1, 870 SDT_SYS386TSS, SEL_KPL, 0, 0); 871 ci->ci_gdt[GTRAPTSS_SEL].sd = sd; 872 setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL, 873 GSEL(GTRAPTSS_SEL, SEL_KPL)); 874 #endif 875 876 #if defined(DDB) && defined(MULTIPROCESSOR) 877 /* 878 * Set up separate handler for the DDB IPI, so that it doesn't 879 * stomp on a possibly corrupted stack. 880 * 881 * XXX overwriting the gate set in db_machine_init. 882 * Should rearrange the code so that it's set only once. 883 */ 884 ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, 885 UVM_KMF_WIRED); 886 tss_init(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack, 887 Xintrddbipi); 888 889 setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1, 890 SDT_SYS386TSS, SEL_KPL, 0, 0); 891 ci->ci_gdt[GIPITSS_SEL].sd = sd; 892 893 setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL, 894 GSEL(GIPITSS_SEL, SEL_KPL)); 895 #endif 896 } 897 #else 898 static void 899 cpu_set_tss_gates(struct cpu_info *ci) 900 { 901 902 } 903 #endif /* i386 */ 904 905 int 906 mp_cpu_start(struct cpu_info *ci, paddr_t target) 907 { 908 #if 0 909 #if NLAPIC > 0 910 int error; 911 #endif 912 unsigned short dwordptr[2]; 913 914 /* 915 * Bootstrap code must be addressable in real mode 916 * and it must be page aligned. 917 */ 918 KASSERT(target < 0x10000 && target % PAGE_SIZE == 0); 919 920 /* 921 * "The BSP must initialize CMOS shutdown code to 0Ah ..." 922 */ 923 924 outb(IO_RTC, NVRAM_RESET); 925 outb(IO_RTC+1, NVRAM_RESET_JUMP); 926 927 /* 928 * "and the warm reset vector (DWORD based at 40:67) to point 929 * to the AP startup code ..." 930 */ 931 932 dwordptr[0] = 0; 933 dwordptr[1] = target >> 4; 934 935 pmap_kenter_pa (0, 0, VM_PROT_READ|VM_PROT_WRITE, 0); 936 memcpy ((uint8_t *) 0x467, dwordptr, 4); 937 pmap_kremove (0, PAGE_SIZE); 938 939 #if NLAPIC > 0 940 /* 941 * ... prior to executing the following sequence:" 942 */ 943 944 if (ci->ci_flags & CPUF_AP) { 945 if ((error = x86_ipi_init(ci->ci_cpuid)) != 0) 946 return error; 947 948 delay(10000); 949 950 if (cpu_feature & CPUID_APIC) { 951 error = x86_ipi_init(ci->ci_cpuid); 952 if (error != 0) { 953 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n", 954 __func__); 955 return error; 956 } 957 958 delay(10000); 959 960 error = x86_ipi(target / PAGE_SIZE, ci->ci_cpuid, 961 LAPIC_DLMODE_STARTUP); 962 if (error != 0) { 963 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n", 964 __func__); 965 return error; 966 } 967 delay(200); 968 969 error = x86_ipi(target / PAGE_SIZE, ci->ci_cpuid, 970 LAPIC_DLMODE_STARTUP); 971 if (error != 0) { 972 aprint_error_dev(ci->ci_dev, "%s: IPI not taken ((3)\n", 973 __func__); 974 return error; 975 } 976 delay(200); 977 } 978 } 979 #endif 980 #endif /* 0 */ 981 return 0; 982 } 983 984 void 985 mp_cpu_start_cleanup(struct cpu_info *ci) 986 { 987 #if 0 988 /* 989 * Ensure the NVRAM reset byte contains something vaguely sane. 990 */ 991 992 outb(IO_RTC, NVRAM_RESET); 993 outb(IO_RTC+1, NVRAM_RESET_RST); 994 #endif 995 } 996 997 #ifdef __x86_64__ 998 999 void 1000 cpu_init_msrs(struct cpu_info *ci, bool full) 1001 { 1002 if (full) { 1003 HYPERVISOR_set_segment_base (SEGBASE_FS, 0); 1004 HYPERVISOR_set_segment_base (SEGBASE_GS_KERNEL, (uint64_t) ci); 1005 HYPERVISOR_set_segment_base (SEGBASE_GS_USER, 0); 1006 } 1007 } 1008 #endif /* __x86_64__ */ 1009 1010 void 1011 cpu_offline_md(void) 1012 { 1013 int s; 1014 1015 s = splhigh(); 1016 #ifdef __i386__ 1017 npxsave_cpu(true); 1018 #else 1019 fpusave_cpu(true); 1020 #endif 1021 splx(s); 1022 } 1023 1024 #if 0 1025 /* XXX joerg restructure and restart CPUs individually */ 1026 static bool 1027 cpu_suspend(device_t dv, pmf_qual_t qual) 1028 { 1029 struct cpu_softc *sc = device_private(dv); 1030 struct cpu_info *ci = sc->sc_info; 1031 int err; 1032 1033 if (ci->ci_flags & CPUF_PRIMARY) 1034 return true; 1035 if (ci->ci_data.cpu_idlelwp == NULL) 1036 return true; 1037 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1038 return true; 1039 1040 sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE); 1041 1042 if (sc->sc_wasonline) { 1043 mutex_enter(&cpu_lock); 1044 err = cpu_setstate(ci, false); 1045 mutex_exit(&cpu_lock); 1046 1047 if (err) 1048 return false; 1049 } 1050 1051 return true; 1052 } 1053 1054 static bool 1055 cpu_resume(device_t dv, pmf_qual_t qual) 1056 { 1057 struct cpu_softc *sc = device_private(dv); 1058 struct cpu_info *ci = sc->sc_info; 1059 int err = 0; 1060 1061 if (ci->ci_flags & CPUF_PRIMARY) 1062 return true; 1063 if (ci->ci_data.cpu_idlelwp == NULL) 1064 return true; 1065 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1066 return true; 1067 1068 if (sc->sc_wasonline) { 1069 mutex_enter(&cpu_lock); 1070 err = cpu_setstate(ci, true); 1071 mutex_exit(&cpu_lock); 1072 } 1073 1074 return err == 0; 1075 } 1076 #endif 1077 1078 void 1079 cpu_get_tsc_freq(struct cpu_info *ci) 1080 { 1081 const volatile vcpu_time_info_t *tinfo = &ci->ci_vcpu->time; 1082 delay(1000000); 1083 uint64_t freq = 1000000000ULL << 32; 1084 freq = freq / (uint64_t)tinfo->tsc_to_system_mul; 1085 if ( tinfo->tsc_shift < 0 ) 1086 freq = freq << -tinfo->tsc_shift; 1087 else 1088 freq = freq >> tinfo->tsc_shift; 1089 ci->ci_data.cpu_cc_freq = freq; 1090 } 1091 1092 void 1093 x86_cpu_idle_xen(void) 1094 { 1095 struct cpu_info *ci = curcpu(); 1096 1097 KASSERT(ci->ci_ilevel == IPL_NONE); 1098 1099 x86_disable_intr(); 1100 if (!__predict_false(ci->ci_want_resched)) { 1101 idle_block(); 1102 } else { 1103 x86_enable_intr(); 1104 } 1105 } 1106