1 /* $NetBSD: cpu.c,v 1.77 2012/01/09 04:39:14 cherry Exp $ */ 2 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp */ 3 4 /*- 5 * Copyright (c) 2000 The NetBSD Foundation, Inc. 6 * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by RedBack Networks Inc. 11 * 12 * Author: Bill Sommerfeld 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1999 Stefan Grefen 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed by the NetBSD 50 * Foundation, Inc. and its contributors. 51 * 4. Neither the name of The NetBSD Foundation nor the names of its 52 * contributors may be used to endorse or promote products derived 53 * from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY 56 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.77 2012/01/09 04:39:14 cherry Exp $"); 70 71 #include "opt_ddb.h" 72 #include "opt_multiprocessor.h" 73 #include "opt_mpbios.h" /* for MPDEBUG */ 74 #include "opt_mtrr.h" 75 #include "opt_xen.h" 76 77 #include "lapic.h" 78 #include "ioapic.h" 79 80 #include <sys/param.h> 81 #include <sys/proc.h> 82 #include <sys/systm.h> 83 #include <sys/device.h> 84 #include <sys/kmem.h> 85 #include <sys/cpu.h> 86 #include <sys/cpufreq.h> 87 #include <sys/atomic.h> 88 #include <sys/reboot.h> 89 #include <sys/idle.h> 90 91 #include <uvm/uvm.h> 92 93 #include <machine/cpufunc.h> 94 #include <machine/cpuvar.h> 95 #include <machine/pmap.h> 96 #include <machine/vmparam.h> 97 #include <machine/mpbiosvar.h> 98 #include <machine/pcb.h> 99 #include <machine/specialreg.h> 100 #include <machine/segments.h> 101 #include <machine/gdt.h> 102 #include <machine/mtrr.h> 103 #include <machine/pio.h> 104 105 #ifdef i386 106 #include <machine/npx.h> 107 #else 108 #include <machine/fpu.h> 109 #endif 110 111 #include <xen/xen.h> 112 #include <xen/xen-public/vcpu.h> 113 #include <xen/vcpuvar.h> 114 115 #if NLAPIC > 0 116 #include <machine/apicvar.h> 117 #include <machine/i82489reg.h> 118 #include <machine/i82489var.h> 119 #endif 120 121 #include <dev/ic/mc146818reg.h> 122 #include <dev/isa/isareg.h> 123 124 #if MAXCPUS > 32 125 #error cpu_info contains 32bit bitmasks 126 #endif 127 128 static int cpu_match(device_t, cfdata_t, void *); 129 static void cpu_attach(device_t, device_t, void *); 130 static void cpu_defer(device_t); 131 static int cpu_rescan(device_t, const char *, const int *); 132 static void cpu_childdetached(device_t, device_t); 133 static int vcpu_match(device_t, cfdata_t, void *); 134 static void vcpu_attach(device_t, device_t, void *); 135 static void cpu_attach_common(device_t, device_t, void *); 136 void cpu_offline_md(void); 137 138 struct cpu_softc { 139 device_t sc_dev; /* device tree glue */ 140 struct cpu_info *sc_info; /* pointer to CPU info */ 141 bool sc_wasonline; 142 }; 143 144 int mp_cpu_start(struct cpu_info *, vaddr_t); 145 void mp_cpu_start_cleanup(struct cpu_info *); 146 const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, 147 mp_cpu_start_cleanup }; 148 149 CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc), 150 cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached); 151 152 CFATTACH_DECL_NEW(vcpu, sizeof(struct cpu_softc), 153 vcpu_match, vcpu_attach, NULL, NULL); 154 155 /* 156 * Statically-allocated CPU info for the primary CPU (or the only 157 * CPU, on uniprocessors). The CPU info list is initialized to 158 * point at it. 159 */ 160 #ifdef TRAPLOG 161 #include <machine/tlog.h> 162 struct tlog tlog_primary; 163 #endif 164 struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { 165 .ci_dev = 0, 166 .ci_self = &cpu_info_primary, 167 .ci_idepth = -1, 168 .ci_curlwp = &lwp0, 169 .ci_curldt = -1, 170 .ci_cpumask = 1, 171 #ifdef TRAPLOG 172 .ci_tlog = &tlog_primary, 173 #endif 174 175 }; 176 struct cpu_info phycpu_info_primary __aligned(CACHE_LINE_SIZE) = { 177 .ci_dev = 0, 178 .ci_self = &phycpu_info_primary, 179 }; 180 181 struct cpu_info *cpu_info_list = &cpu_info_primary; 182 struct cpu_info *phycpu_info_list = &phycpu_info_primary; 183 184 uint32_t cpus_attached = 1; 185 uint32_t cpus_running = 1; 186 187 uint32_t phycpus_attached = 0; 188 uint32_t phycpus_running = 0; 189 190 uint32_t cpu_feature[5]; /* X86 CPUID feature bits 191 * [0] basic features %edx 192 * [1] basic features %ecx 193 * [2] extended features %edx 194 * [3] extended features %ecx 195 * [4] VIA padlock features 196 */ 197 198 bool x86_mp_online; 199 paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; 200 201 #if defined(MULTIPROCESSOR) 202 void cpu_hatch(void *); 203 static void cpu_boot_secondary(struct cpu_info *ci); 204 static void cpu_start_secondary(struct cpu_info *ci); 205 #endif /* MULTIPROCESSOR */ 206 207 static int 208 cpu_match(device_t parent, cfdata_t match, void *aux) 209 { 210 211 return 1; 212 } 213 214 static void 215 cpu_attach(device_t parent, device_t self, void *aux) 216 { 217 struct cpu_softc *sc = device_private(self); 218 struct cpu_attach_args *caa = aux; 219 struct cpu_info *ci; 220 uintptr_t ptr; 221 static int nphycpu = 0; 222 223 sc->sc_dev = self; 224 225 if (phycpus_attached == ~0) { 226 aprint_error(": increase MAXCPUS\n"); 227 return; 228 } 229 230 /* 231 * If we're an Application Processor, allocate a cpu_info 232 * If we're the first attached CPU use the primary cpu_info, 233 * otherwise allocate a new one 234 */ 235 aprint_naive("\n"); 236 aprint_normal("\n"); 237 if (nphycpu > 0) { 238 struct cpu_info *tmp; 239 ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 240 KM_SLEEP); 241 ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 242 ci->ci_curldt = -1; 243 244 tmp = phycpu_info_list; 245 while (tmp->ci_next) 246 tmp = tmp->ci_next; 247 248 tmp->ci_next = ci; 249 } else { 250 ci = &phycpu_info_primary; 251 } 252 253 ci->ci_self = ci; 254 sc->sc_info = ci; 255 256 ci->ci_dev = self; 257 ci->ci_acpiid = caa->cpu_id; 258 ci->ci_cpuid = caa->cpu_number; 259 ci->ci_vcpu = NULL; 260 ci->ci_index = nphycpu++; 261 ci->ci_cpumask = (1 << cpu_index(ci)); 262 263 atomic_or_32(&phycpus_attached, ci->ci_cpumask); 264 265 if (!pmf_device_register(self, NULL, NULL)) 266 aprint_error_dev(self, "couldn't establish power handler\n"); 267 268 (void)config_defer(self, cpu_defer); 269 } 270 271 static void 272 cpu_defer(device_t self) 273 { 274 cpu_rescan(self, NULL, NULL); 275 } 276 277 static int 278 cpu_rescan(device_t self, const char *ifattr, const int *locators) 279 { 280 struct cpu_softc *sc = device_private(self); 281 struct cpufeature_attach_args cfaa; 282 struct cpu_info *ci = sc->sc_info; 283 284 memset(&cfaa, 0, sizeof(cfaa)); 285 cfaa.ci = ci; 286 287 if (ifattr_match(ifattr, "cpufeaturebus")) { 288 289 if (ci->ci_frequency == NULL) { 290 cfaa.name = "frequency"; 291 ci->ci_frequency = config_found_ia(self, 292 "cpufeaturebus", &cfaa, NULL); 293 } 294 } 295 296 return 0; 297 } 298 299 static void 300 cpu_childdetached(device_t self, device_t child) 301 { 302 struct cpu_softc *sc = device_private(self); 303 struct cpu_info *ci = sc->sc_info; 304 305 if (ci->ci_frequency == child) 306 ci->ci_frequency = NULL; 307 } 308 309 static int 310 vcpu_match(device_t parent, cfdata_t match, void *aux) 311 { 312 struct vcpu_attach_args *vcaa = aux; 313 struct vcpu_runstate_info vcr; 314 int error; 315 316 if (strcmp(vcaa->vcaa_name, match->cf_name) == 0) { 317 error = HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, 318 vcaa->vcaa_caa.cpu_number, 319 &vcr); 320 switch (error) { 321 case 0: 322 return 1; 323 case -ENOENT: 324 return 0; 325 default: 326 panic("Unknown hypervisor error %d returned on vcpu runstate probe\n", error); 327 } 328 } 329 330 return 0; 331 } 332 333 static void 334 vcpu_attach(device_t parent, device_t self, void *aux) 335 { 336 struct vcpu_attach_args *vcaa = aux; 337 338 KASSERT(vcaa->vcaa_caa.cpu_func == NULL); 339 vcaa->vcaa_caa.cpu_func = &mp_cpu_funcs; 340 cpu_attach_common(parent, self, &vcaa->vcaa_caa); 341 342 if (!pmf_device_register(self, NULL, NULL)) 343 aprint_error_dev(self, "couldn't establish power handler\n"); 344 } 345 346 static int 347 vcpu_is_up(struct cpu_info *ci) 348 { 349 KASSERT(ci != NULL); 350 return HYPERVISOR_vcpu_op(VCPUOP_is_up, ci->ci_cpuid, NULL); 351 } 352 353 static void 354 cpu_vm_init(struct cpu_info *ci) 355 { 356 int ncolors = 2, i; 357 358 for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { 359 struct x86_cache_info *cai; 360 int tcolors; 361 362 cai = &ci->ci_cinfo[i]; 363 364 tcolors = atop(cai->cai_totalsize); 365 switch(cai->cai_associativity) { 366 case 0xff: 367 tcolors = 1; /* fully associative */ 368 break; 369 case 0: 370 case 1: 371 break; 372 default: 373 tcolors /= cai->cai_associativity; 374 } 375 ncolors = max(ncolors, tcolors); 376 } 377 378 /* 379 * Knowing the size of the largest cache on this CPU, potentially 380 * re-color our pages. 381 */ 382 aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); 383 uvm_page_recolor(ncolors); 384 } 385 386 static void 387 cpu_attach_common(device_t parent, device_t self, void *aux) 388 { 389 struct cpu_softc *sc = device_private(self); 390 struct cpu_attach_args *caa = aux; 391 struct cpu_info *ci; 392 uintptr_t ptr; 393 int cpunum = caa->cpu_number; 394 static bool again = false; 395 396 sc->sc_dev = self; 397 398 /* 399 * If we're an Application Processor, allocate a cpu_info 400 * structure, otherwise use the primary's. 401 */ 402 if (caa->cpu_role == CPU_ROLE_AP) { 403 aprint_naive(": Application Processor\n"); 404 ptr = (uintptr_t)kmem_alloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 405 KM_SLEEP); 406 ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 407 memset(ci, 0, sizeof(*ci)); 408 #ifdef TRAPLOG 409 ci->ci_tlog_base = kmem_zalloc(sizeof(struct tlog), KM_SLEEP); 410 #endif 411 } else { 412 aprint_naive(": %s Processor\n", 413 caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 414 ci = &cpu_info_primary; 415 } 416 417 ci->ci_self = ci; 418 sc->sc_info = ci; 419 ci->ci_dev = self; 420 ci->ci_cpuid = cpunum; 421 422 KASSERT(HYPERVISOR_shared_info != NULL); 423 ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[cpunum]; 424 425 KASSERT(ci->ci_func == 0); 426 ci->ci_func = caa->cpu_func; 427 428 /* Must be called before mi_cpu_attach(). */ 429 cpu_vm_init(ci); 430 431 if (caa->cpu_role == CPU_ROLE_AP) { 432 int error; 433 434 error = mi_cpu_attach(ci); 435 436 KASSERT(ci->ci_data.cpu_idlelwp != NULL); 437 if (error != 0) { 438 aprint_normal("\n"); 439 aprint_error_dev(self, 440 "mi_cpu_attach failed with %d\n", error); 441 return; 442 } 443 444 } else { 445 KASSERT(ci->ci_data.cpu_idlelwp != NULL); 446 } 447 448 ci->ci_cpumask = (1 << cpu_index(ci)); 449 pmap_reference(pmap_kernel()); 450 ci->ci_pmap = pmap_kernel(); 451 ci->ci_tlbstate = TLBSTATE_STALE; 452 453 /* 454 * Boot processor may not be attached first, but the below 455 * must be done to allow booting other processors. 456 */ 457 if (!again) { 458 atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 459 /* Basic init. */ 460 cpu_intr_init(ci); 461 cpu_get_tsc_freq(ci); 462 cpu_init(ci); 463 pmap_cpu_init_late(ci); /* XXX: cosmetic */ 464 465 /* Every processor needs to init it's own ipi h/w (similar to lapic) */ 466 xen_ipi_init(); 467 /* XXX: clock_init() */ 468 469 /* Make sure DELAY() is initialized. */ 470 DELAY(1); 471 again = true; 472 } 473 474 /* further PCB init done later. */ 475 476 switch (caa->cpu_role) { 477 case CPU_ROLE_SP: 478 atomic_or_32(&ci->ci_flags, CPUF_SP); 479 cpu_identify(ci); 480 #if 0 481 x86_errata(); 482 #endif 483 x86_cpu_idle_init(); 484 485 break; 486 487 case CPU_ROLE_BP: 488 atomic_or_32(&ci->ci_flags, CPUF_BSP); 489 cpu_identify(ci); 490 cpu_init(ci); 491 #if 0 492 x86_errata(); 493 #endif 494 x86_cpu_idle_init(); 495 496 break; 497 498 case CPU_ROLE_AP: 499 atomic_or_32(&ci->ci_flags, CPUF_AP); 500 501 /* 502 * report on an AP 503 */ 504 505 #if defined(MULTIPROCESSOR) 506 /* interrupt handler stack */ 507 cpu_intr_init(ci); 508 509 /* Setup per-cpu memory for gdt */ 510 gdt_alloc_cpu(ci); 511 512 pmap_cpu_init_late(ci); 513 cpu_start_secondary(ci); 514 515 if (ci->ci_flags & CPUF_PRESENT) { 516 struct cpu_info *tmp; 517 518 cpu_identify(ci); 519 tmp = cpu_info_list; 520 while (tmp->ci_next) 521 tmp = tmp->ci_next; 522 523 tmp->ci_next = ci; 524 } 525 #else 526 aprint_error(": not started\n"); 527 #endif 528 break; 529 530 default: 531 aprint_normal("\n"); 532 panic("unknown processor type??\n"); 533 } 534 535 pat_init(ci); 536 atomic_or_32(&cpus_attached, ci->ci_cpumask); 537 538 #ifdef MPVERBOSE 539 if (mp_verbose) { 540 struct lwp *l = ci->ci_data.cpu_idlelwp; 541 struct pcb *pcb = lwp_getpcb(l); 542 543 aprint_verbose_dev(self, 544 "idle lwp at %p, idle sp at 0x%p\n", 545 l, 546 #ifdef i386 547 (void *)pcb->pcb_esp 548 #else /* i386 */ 549 (void *)pcb->pcb_rsp 550 #endif /* i386 */ 551 ); 552 553 } 554 #endif /* MPVERBOSE */ 555 } 556 557 /* 558 * Initialize the processor appropriately. 559 */ 560 561 void 562 cpu_init(struct cpu_info *ci) 563 { 564 565 /* 566 * On a P6 or above, enable global TLB caching if the 567 * hardware supports it. 568 */ 569 if (cpu_feature[0] & CPUID_PGE) 570 lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */ 571 572 #ifdef XXXMTRR 573 /* 574 * On a P6 or above, initialize MTRR's if the hardware supports them. 575 */ 576 if (cpu_feature[0] & CPUID_MTRR) { 577 if ((ci->ci_flags & CPUF_AP) == 0) 578 i686_mtrr_init_first(); 579 mtrr_init_cpu(ci); 580 } 581 #endif 582 /* 583 * If we have FXSAVE/FXRESTOR, use them. 584 */ 585 if (cpu_feature[0] & CPUID_FXSR) { 586 lcr4(rcr4() | CR4_OSFXSR); 587 588 /* 589 * If we have SSE/SSE2, enable XMM exceptions. 590 */ 591 if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2)) 592 lcr4(rcr4() | CR4_OSXMMEXCPT); 593 } 594 595 #ifdef __x86_64__ 596 /* No user PGD mapped for this CPU yet */ 597 ci->ci_xen_current_user_pgd = 0; 598 #endif 599 600 atomic_or_32(&cpus_running, ci->ci_cpumask); 601 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 602 603 /* XXX: register vcpu_register_runstate_memory_area, and figure out how to make sure this VCPU is running ? */ 604 } 605 606 607 #ifdef MULTIPROCESSOR 608 609 void 610 cpu_boot_secondary_processors(void) 611 { 612 struct cpu_info *ci; 613 u_long i; 614 for (i = 0; i < maxcpus; i++) { 615 ci = cpu_lookup(i); 616 if (ci == NULL) 617 continue; 618 if (ci->ci_data.cpu_idlelwp == NULL) 619 continue; 620 if ((ci->ci_flags & CPUF_PRESENT) == 0) 621 continue; 622 if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) 623 continue; 624 cpu_boot_secondary(ci); 625 } 626 627 x86_mp_online = true; 628 } 629 630 static void 631 cpu_init_idle_lwp(struct cpu_info *ci) 632 { 633 struct lwp *l = ci->ci_data.cpu_idlelwp; 634 struct pcb *pcb = lwp_getpcb(l); 635 636 pcb->pcb_cr0 = rcr0(); 637 } 638 639 void 640 cpu_init_idle_lwps(void) 641 { 642 struct cpu_info *ci; 643 u_long i; 644 645 for (i = 0; i < maxcpus; i++) { 646 ci = cpu_lookup(i); 647 if (ci == NULL) 648 continue; 649 if (ci->ci_data.cpu_idlelwp == NULL) 650 continue; 651 if ((ci->ci_flags & CPUF_PRESENT) == 0) 652 continue; 653 cpu_init_idle_lwp(ci); 654 } 655 } 656 657 static void 658 cpu_start_secondary(struct cpu_info *ci) 659 { 660 int i; 661 662 aprint_debug_dev(ci->ci_dev, "starting\n"); 663 664 ci->ci_curlwp = ci->ci_data.cpu_idlelwp; 665 666 if (CPU_STARTUP(ci, (vaddr_t) cpu_hatch) != 0) { 667 return; 668 } 669 670 /* 671 * wait for it to become ready 672 */ 673 for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { 674 delay(10); 675 } 676 if ((ci->ci_flags & CPUF_PRESENT) == 0) { 677 aprint_error_dev(ci->ci_dev, "failed to become ready\n"); 678 #if defined(MPDEBUG) && defined(DDB) 679 printf("dropping into debugger; continue from here to resume boot\n"); 680 Debugger(); 681 #endif 682 } 683 684 CPU_START_CLEANUP(ci); 685 } 686 687 void 688 cpu_boot_secondary(struct cpu_info *ci) 689 { 690 int i; 691 atomic_or_32(&ci->ci_flags, CPUF_GO); 692 for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { 693 delay(10); 694 } 695 if ((ci->ci_flags & CPUF_RUNNING) == 0) { 696 aprint_error_dev(ci->ci_dev, "CPU failed to start\n"); 697 #if defined(MPDEBUG) && defined(DDB) 698 printf("dropping into debugger; continue from here to resume boot\n"); 699 Debugger(); 700 #endif 701 } 702 } 703 704 /* 705 * APs end up here immediately after initialisation and VCPUOP_up in 706 * mp_cpu_start(). 707 * At this point, we are running in the idle pcb/idle stack of the new 708 * CPU. This function jumps to the idle loop and starts looking for 709 * work. 710 */ 711 extern void x86_64_tls_switch(struct lwp *); 712 void 713 cpu_hatch(void *v) 714 { 715 struct cpu_info *ci = (struct cpu_info *)v; 716 struct pcb *pcb; 717 int s, i; 718 719 /* Setup TLS and kernel GS/FS */ 720 cpu_init_msrs(ci, true); 721 cpu_init_idt(); 722 gdt_init_cpu(ci); 723 724 cpu_probe(ci); 725 726 atomic_or_32(&ci->ci_flags, CPUF_PRESENT); 727 728 while ((ci->ci_flags & CPUF_GO) == 0) { 729 /* Don't use delay, boot CPU may be patching the text. */ 730 for (i = 10000; i != 0; i--) 731 x86_pause(); 732 } 733 734 /* Because the text may have been patched in x86_patch(). */ 735 x86_flush(); 736 tlbflushg(); 737 738 KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); 739 740 pcb = lwp_getpcb(curlwp); 741 pcb->pcb_cr3 = pmap_pdirpa(pmap_kernel(), 0); /* XXX: consider using pmap_load() ? */ 742 pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); 743 744 xen_ipi_init(); 745 746 xen_initclocks(); 747 748 /* XXX: lapic_initclocks(); */ 749 750 #ifdef __x86_64__ 751 fpuinit(ci); 752 #endif 753 754 lldt(GSEL(GLDT_SEL, SEL_KPL)); 755 756 cpu_init(ci); 757 cpu_get_tsc_freq(ci); 758 759 s = splhigh(); 760 x86_enable_intr(); 761 splx(s); 762 #if 0 763 x86_errata(); 764 #endif 765 766 aprint_debug_dev(ci->ci_dev, "running\n"); 767 768 cpu_switchto(NULL, ci->ci_data.cpu_idlelwp, true); 769 770 panic("switch to idle_loop context returned!\n"); 771 /* NOTREACHED */ 772 } 773 774 #if defined(DDB) 775 776 #include <ddb/db_output.h> 777 #include <machine/db_machdep.h> 778 779 /* 780 * Dump CPU information from ddb. 781 */ 782 void 783 cpu_debug_dump(void) 784 { 785 struct cpu_info *ci; 786 CPU_INFO_ITERATOR cii; 787 788 db_printf("addr dev id flags ipis curlwp fpcurlwp\n"); 789 for (CPU_INFO_FOREACH(cii, ci)) { 790 db_printf("%p %s %ld %x %x %10p %10p\n", 791 ci, 792 ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), 793 (long)ci->ci_cpuid, 794 ci->ci_flags, ci->ci_ipis, 795 ci->ci_curlwp, 796 ci->ci_fpcurlwp); 797 } 798 } 799 #endif /* DDB */ 800 801 #endif /* MULTIPROCESSOR */ 802 803 extern void hypervisor_callback(void); 804 extern void failsafe_callback(void); 805 #ifdef __x86_64__ 806 typedef void (vector)(void); 807 extern vector Xsyscall, Xsyscall32; 808 #endif 809 810 /* 811 * Setup the "trampoline". On Xen, we setup nearly all cpu context 812 * outside a trampoline, so we prototype and call targetip like so: 813 * void targetip(struct cpu_info *); 814 */ 815 816 static void 817 gdt_prepframes(paddr_t *frames, vaddr_t base, uint32_t entries) 818 { 819 int i; 820 for (i = 0; i < roundup(entries, PAGE_SIZE) >> PAGE_SHIFT; i++) { 821 822 frames[i] = ((paddr_t) xpmap_ptetomach( 823 (pt_entry_t *) (base + (i << PAGE_SHIFT)))) 824 >> PAGE_SHIFT; 825 826 /* Mark Read-only */ 827 pmap_pte_clearbits(kvtopte(base + (i << PAGE_SHIFT)), 828 PG_RW); 829 } 830 } 831 832 #ifdef __x86_64__ 833 extern char *ldtstore; /* XXX: Xen MP todo */ 834 835 static void 836 xen_init_amd64_vcpuctxt(struct cpu_info *ci, 837 struct vcpu_guest_context *initctx, 838 void targetrip(struct cpu_info *)) 839 { 840 /* page frames to point at GDT */ 841 extern int gdt_size; 842 paddr_t frames[16]; 843 psize_t gdt_ents; 844 845 struct lwp *l; 846 struct pcb *pcb; 847 848 volatile struct vcpu_info *vci; 849 850 KASSERT(ci != NULL); 851 KASSERT(ci != &cpu_info_primary); 852 KASSERT(initctx != NULL); 853 KASSERT(targetrip != NULL); 854 855 memset(initctx, 0, sizeof *initctx); 856 857 gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; /* XXX: re-investigate roundup(gdt_size... ) for gdt_ents. */ 858 KASSERT(gdt_ents <= 16); 859 860 gdt_prepframes(frames, (vaddr_t) ci->ci_gdt, gdt_ents); 861 862 /* XXX: The stuff in here is amd64 specific. move to mptramp.[Sc] ? */ 863 864 /* Initialise the vcpu context: We use idle_loop()'s pcb context. */ 865 866 l = ci->ci_data.cpu_idlelwp; 867 868 KASSERT(l != NULL); 869 pcb = lwp_getpcb(l); 870 KASSERT(pcb != NULL); 871 872 /* resume with interrupts off */ 873 vci = ci->ci_vcpu; 874 vci->evtchn_upcall_mask = 1; 875 xen_mb(); 876 877 /* resume in kernel-mode */ 878 initctx->flags = VGCF_in_kernel | VGCF_online; 879 880 /* Stack and entry points: 881 * We arrange for the stack frame for cpu_hatch() to 882 * appear as a callee frame of lwp_trampoline(). Being a 883 * leaf frame prevents trampling on any of the MD stack setup 884 * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() 885 */ 886 887 initctx->user_regs.rdi = (uint64_t) ci; /* targetrip(ci); */ 888 initctx->user_regs.rip = (vaddr_t) targetrip; 889 890 initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 891 892 initctx->user_regs.rflags = pcb->pcb_flags; 893 initctx->user_regs.rsp = pcb->pcb_rsp; 894 895 /* Data segments */ 896 initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 897 initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 898 initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 899 900 /* GDT */ 901 memcpy(initctx->gdt_frames, frames, sizeof frames); 902 initctx->gdt_ents = gdt_ents; 903 904 /* LDT */ 905 initctx->ldt_base = (unsigned long) ldtstore; 906 initctx->ldt_ents = LDT_SIZE >> 3; 907 908 /* Kernel context state */ 909 initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 910 initctx->kernel_sp = pcb->pcb_rsp0; 911 initctx->ctrlreg[0] = pcb->pcb_cr0; 912 initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ 913 initctx->ctrlreg[2] = pcb->pcb_cr2; /* XXX: */ 914 /* 915 * Use pmap_kernel() L4 PD directly, until we setup the 916 * per-cpu L4 PD in pmap_cpu_init_late() 917 */ 918 initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_kpm_pdirpa))); 919 initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT; 920 921 922 /* Xen callbacks */ 923 initctx->event_callback_eip = (unsigned long) hypervisor_callback; 924 initctx->failsafe_callback_eip = (unsigned long) failsafe_callback; 925 initctx->syscall_callback_eip = (unsigned long) Xsyscall; 926 927 return; 928 } 929 #else /* i386 */ 930 extern union descriptor *ldt; 931 extern void Xsyscall(void); 932 933 static void 934 xen_init_i386_vcpuctxt(struct cpu_info *ci, 935 struct vcpu_guest_context *initctx, 936 void targeteip(struct cpu_info *)) 937 { 938 /* page frames to point at GDT */ 939 extern int gdt_size; 940 paddr_t frames[16]; 941 psize_t gdt_ents; 942 943 struct lwp *l; 944 struct pcb *pcb; 945 946 volatile struct vcpu_info *vci; 947 948 KASSERT(ci != NULL); 949 KASSERT(ci != &cpu_info_primary); 950 KASSERT(initctx != NULL); 951 KASSERT(targeteip != NULL); 952 953 memset(initctx, 0, sizeof *initctx); 954 955 gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; /* XXX: re-investigate roundup(gdt_size... ) for gdt_ents. */ 956 KASSERT(gdt_ents <= 16); 957 958 gdt_prepframes(frames, (vaddr_t) ci->ci_gdt, gdt_ents); 959 960 /* 961 * Initialise the vcpu context: 962 * We use this cpu's idle_loop() pcb context. 963 */ 964 965 l = ci->ci_data.cpu_idlelwp; 966 967 KASSERT(l != NULL); 968 pcb = lwp_getpcb(l); 969 KASSERT(pcb != NULL); 970 971 /* resume with interrupts off */ 972 vci = ci->ci_vcpu; 973 vci->evtchn_upcall_mask = 1; 974 xen_mb(); 975 976 /* resume in kernel-mode */ 977 initctx->flags = VGCF_in_kernel | VGCF_online; 978 979 /* Stack frame setup for cpu_hatch(): 980 * We arrange for the stack frame for cpu_hatch() to 981 * appear as a callee frame of lwp_trampoline(). Being a 982 * leaf frame prevents trampling on any of the MD stack setup 983 * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() 984 */ 985 986 initctx->user_regs.esp = pcb->pcb_esp - 4; /* Leave word for 987 arg1 */ 988 { /* targeteip(ci); */ 989 uint32_t *arg = (uint32_t *) initctx->user_regs.esp; 990 arg[1] = (uint32_t) ci; /* arg1 */ 991 992 } 993 994 initctx->user_regs.eip = (vaddr_t) targeteip; 995 initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 996 initctx->user_regs.eflags |= pcb->pcb_iopl; 997 998 /* Data segments */ 999 initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 1000 initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 1001 initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 1002 initctx->user_regs.fs = GSEL(GDATA_SEL, SEL_KPL); 1003 1004 /* GDT */ 1005 memcpy(initctx->gdt_frames, frames, sizeof frames); 1006 initctx->gdt_ents = gdt_ents; 1007 1008 /* LDT */ 1009 initctx->ldt_base = (unsigned long) ldt; 1010 initctx->ldt_ents = NLDT; 1011 1012 /* Kernel context state */ 1013 initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 1014 initctx->kernel_sp = pcb->pcb_esp0; 1015 initctx->ctrlreg[0] = pcb->pcb_cr0; 1016 initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ 1017 initctx->ctrlreg[2] = pcb->pcb_cr2; /* XXX: */ 1018 #ifdef PAE 1019 initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_pae_l3_pdirpa))); 1020 #else /* PAE */ 1021 initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(pcb->pcb_cr3))); 1022 #endif /* PAE */ 1023 initctx->ctrlreg[4] = /* CR4_PAE | */CR4_OSFXSR | CR4_OSXMMEXCPT; 1024 1025 1026 /* Xen callbacks */ 1027 initctx->event_callback_eip = (unsigned long) hypervisor_callback; 1028 initctx->event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 1029 initctx->failsafe_callback_eip = (unsigned long) failsafe_callback; 1030 initctx->failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 1031 1032 return; 1033 } 1034 #endif /* __x86_64__ */ 1035 1036 int 1037 mp_cpu_start(struct cpu_info *ci, vaddr_t target) 1038 { 1039 1040 int hyperror; 1041 struct vcpu_guest_context vcpuctx; 1042 1043 KASSERT(ci != NULL); 1044 KASSERT(ci != &cpu_info_primary); 1045 KASSERT(ci->ci_flags & CPUF_AP); 1046 1047 #ifdef __x86_64__ 1048 xen_init_amd64_vcpuctxt(ci, &vcpuctx, (void (*)(struct cpu_info *))target); 1049 #else /* i386 */ 1050 xen_init_i386_vcpuctxt(ci, &vcpuctx, (void (*)(struct cpu_info *))target); 1051 #endif /* __x86_64__ */ 1052 1053 /* Initialise the given vcpu to execute cpu_hatch(ci); */ 1054 if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_initialise, ci->ci_cpuid, &vcpuctx))) { 1055 aprint_error(": context initialisation failed. errno = %d\n", hyperror); 1056 return hyperror; 1057 } 1058 1059 /* Start it up */ 1060 1061 /* First bring it down */ 1062 if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_down, ci->ci_cpuid, NULL))) { 1063 aprint_error(": VCPUOP_down hypervisor command failed. errno = %d\n", hyperror); 1064 return hyperror; 1065 } 1066 1067 if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_up, ci->ci_cpuid, NULL))) { 1068 aprint_error(": VCPUOP_up hypervisor command failed. errno = %d\n", hyperror); 1069 return hyperror; 1070 } 1071 1072 if (!vcpu_is_up(ci)) { 1073 aprint_error(": did not come up\n"); 1074 return -1; 1075 } 1076 1077 return 0; 1078 } 1079 1080 void 1081 mp_cpu_start_cleanup(struct cpu_info *ci) 1082 { 1083 #if 0 1084 /* 1085 * Ensure the NVRAM reset byte contains something vaguely sane. 1086 */ 1087 1088 outb(IO_RTC, NVRAM_RESET); 1089 outb(IO_RTC+1, NVRAM_RESET_RST); 1090 #endif 1091 if (vcpu_is_up(ci)) { 1092 aprint_debug_dev(ci->ci_dev, "is started.\n"); 1093 } 1094 else { 1095 aprint_error_dev(ci->ci_dev, "did not start up.\n"); 1096 } 1097 1098 } 1099 1100 /* curcpu() uses %fs - shim for until cpu_init_msrs(), below */ 1101 static struct cpu_info *cpu_primary(void) 1102 { 1103 return &cpu_info_primary; 1104 } 1105 /* XXX: rename to something more generic. users other than xpq exist */ 1106 struct cpu_info * (*xpq_cpu)(void) = cpu_primary; 1107 1108 void 1109 cpu_init_msrs(struct cpu_info *ci, bool full) 1110 { 1111 #ifdef __x86_64__ 1112 if (full) { 1113 HYPERVISOR_set_segment_base (SEGBASE_FS, 0); 1114 HYPERVISOR_set_segment_base (SEGBASE_GS_KERNEL, (uint64_t) ci); 1115 HYPERVISOR_set_segment_base (SEGBASE_GS_USER, 0); 1116 xpq_cpu = x86_curcpu; 1117 } 1118 #endif /* __x86_64__ */ 1119 1120 if (cpu_feature[2] & CPUID_NOX) 1121 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); 1122 1123 } 1124 1125 void 1126 cpu_offline_md(void) 1127 { 1128 int s; 1129 1130 s = splhigh(); 1131 #ifdef __i386__ 1132 npxsave_cpu(true); 1133 #else 1134 fpusave_cpu(true); 1135 #endif 1136 splx(s); 1137 } 1138 1139 void 1140 cpu_get_tsc_freq(struct cpu_info *ci) 1141 { 1142 uint32_t vcpu_tversion; 1143 const volatile vcpu_time_info_t *tinfo = &ci->ci_vcpu->time; 1144 1145 vcpu_tversion = tinfo->version; 1146 while (tinfo->version == vcpu_tversion); /* Wait for a time update. XXX: timeout ? */ 1147 1148 uint64_t freq = 1000000000ULL << 32; 1149 freq = freq / (uint64_t)tinfo->tsc_to_system_mul; 1150 if ( tinfo->tsc_shift < 0 ) 1151 freq = freq << -tinfo->tsc_shift; 1152 else 1153 freq = freq >> tinfo->tsc_shift; 1154 ci->ci_data.cpu_cc_freq = freq; 1155 } 1156 1157 void 1158 x86_cpu_idle_xen(void) 1159 { 1160 struct cpu_info *ci = curcpu(); 1161 1162 KASSERT(ci->ci_ilevel == IPL_NONE); 1163 1164 x86_disable_intr(); 1165 if (!__predict_false(ci->ci_want_resched)) { 1166 idle_block(); 1167 } else { 1168 x86_enable_intr(); 1169 } 1170 } 1171 1172 /* 1173 * Loads pmap for the current CPU. 1174 */ 1175 void 1176 cpu_load_pmap(struct pmap *pmap) 1177 { 1178 #ifdef i386 1179 #ifdef PAE 1180 int i, s; 1181 struct cpu_info *ci; 1182 1183 s = splvm(); /* just to be safe */ 1184 ci = curcpu(); 1185 paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa); 1186 /* don't update the kernel L3 slot */ 1187 for (i = 0 ; i < PDP_SIZE - 1; i++) { 1188 xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t), 1189 xpmap_ptom(pmap->pm_pdirpa[i]) | PG_V); 1190 } 1191 splx(s); 1192 tlbflush(); 1193 #else /* PAE */ 1194 lcr3(pmap_pdirpa(pmap, 0)); 1195 #endif /* PAE */ 1196 #endif /* i386 */ 1197 1198 #ifdef __x86_64__ 1199 int i, s; 1200 pd_entry_t *new_pgd; 1201 struct cpu_info *ci; 1202 paddr_t l4_pd_ma; 1203 1204 ci = curcpu(); 1205 l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa); 1206 1207 /* 1208 * Map user space address in kernel space and load 1209 * user cr3 1210 */ 1211 s = splvm(); 1212 new_pgd = pmap->pm_pdir; 1213 1214 /* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */ 1215 for (i = 0; i < PDIR_SLOT_PTE; i++) { 1216 xpq_queue_pte_update(l4_pd_ma + i * sizeof(pd_entry_t), new_pgd[i]); 1217 } 1218 1219 if (__predict_true(pmap != pmap_kernel())) { 1220 xen_set_user_pgd(pmap_pdirpa(pmap, 0)); 1221 ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0); 1222 } 1223 else { 1224 xpq_queue_pt_switch(l4_pd_ma); 1225 ci->ci_xen_current_user_pgd = 0; 1226 } 1227 1228 tlbflush(); 1229 splx(s); 1230 1231 #endif /* __x86_64__ */ 1232 } 1233 1234 /* 1235 * pmap_cpu_init_late: perform late per-CPU initialization. 1236 * Short note about percpu PDIR pages: 1237 * Both the PAE and __x86_64__ architectures have per-cpu PDIR 1238 * tables. This is to get around Xen's pagetable setup constraints for 1239 * PAE (multiple L3[3]s cannot point to the same L2 - Xen 1240 * will refuse to pin a table setup this way.) and for multiple cpus 1241 * to map in different user pmaps on __x86_64__ (see: cpu_load_pmap()) 1242 * 1243 * What this means for us is that the PDIR of the pmap_kernel() is 1244 * considered to be a canonical "SHADOW" PDIR with the following 1245 * properties: 1246 * - Its recursive mapping points to itself 1247 * - per-cpu recurseive mappings point to themselves 1248 * - per-cpu L4 pages' kernel entries are expected to be in sync with 1249 * the shadow 1250 * - APDP_PDE_SHADOW accesses the shadow pdir 1251 * - APDP_PDE accesses the per-cpu pdir 1252 * - alternate mappings are considered per-cpu - however, x86 pmap 1253 * currently partially consults the shadow - this works because the 1254 * shadow PDE is updated together with the per-cpu entry (see: 1255 * xen_pmap.c: pmap_map_ptes(), and the pmap is locked while the 1256 * alternate ptes are mapped in. 1257 */ 1258 1259 void 1260 pmap_cpu_init_late(struct cpu_info *ci) 1261 { 1262 #if defined(PAE) || defined(__x86_64__) 1263 /* 1264 * The BP has already its own PD page allocated during early 1265 * MD startup. 1266 */ 1267 1268 if (ci == &cpu_info_primary) 1269 return; 1270 1271 KASSERT(ci != NULL); 1272 1273 #if defined(PAE) 1274 cpu_alloc_l3_page(ci); 1275 KASSERT(ci->ci_pae_l3_pdirpa != 0); 1276 1277 /* Initialise L2 entries 0 - 2: Point them to pmap_kernel() */ 1278 int i; 1279 for (i = 0 ; i < PDP_SIZE - 1; i++) { 1280 ci->ci_pae_l3_pdir[i] = 1281 xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[i]) | PG_V; 1282 } 1283 #endif /* PAE */ 1284 1285 ci->ci_kpm_pdir = (pd_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 1286 UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT); 1287 1288 if (ci->ci_kpm_pdir == NULL) { 1289 panic("%s: failed to allocate L4 per-cpu PD for CPU %d\n", 1290 __func__, cpu_index(ci)); 1291 } 1292 ci->ci_kpm_pdirpa = vtophys((vaddr_t) ci->ci_kpm_pdir); 1293 KASSERT(ci->ci_kpm_pdirpa != 0); 1294 1295 #if defined(__x86_64__) 1296 /* 1297 * Copy over the pmap_kernel() shadow L4 entries 1298 */ 1299 1300 memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir, PAGE_SIZE); 1301 1302 /* Recursive kernel mapping */ 1303 ci->ci_kpm_pdir[PDIR_SLOT_PTE] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V; 1304 #elif defined(PAE) 1305 /* Copy over the pmap_kernel() shadow L2 entries that map the kernel */ 1306 memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir + PDIR_SLOT_KERN, nkptp[PTP_LEVELS - 1] * sizeof(pd_entry_t)); 1307 #endif /* __x86_64__ else PAE */ 1308 1309 /* Xen wants R/O */ 1310 pmap_kenter_pa((vaddr_t)ci->ci_kpm_pdir, ci->ci_kpm_pdirpa, 1311 VM_PROT_READ, 0); 1312 1313 #if defined(PAE) 1314 /* Initialise L3 entry 3. This mapping is shared across all 1315 * pmaps and is static, ie; loading a new pmap will not update 1316 * this entry. 1317 */ 1318 1319 ci->ci_pae_l3_pdir[3] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V; 1320 1321 /* Mark L3 R/O (Xen wants this) */ 1322 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, 1323 VM_PROT_READ, 0); 1324 1325 xpq_queue_pin_l3_table(xpmap_ptom_masked(ci->ci_pae_l3_pdirpa)); 1326 1327 #elif defined(__x86_64__) 1328 xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa)); 1329 #endif /* PAE */ 1330 #endif /* defined(PAE) || defined(__x86_64__) */ 1331 } 1332 1333 /* 1334 * Notify all other cpus to halt. 1335 */ 1336 1337 void 1338 cpu_broadcast_halt(void) 1339 { 1340 xen_broadcast_ipi(XEN_IPI_HALT); 1341 } 1342 1343 /* 1344 * Send a dummy ipi to a cpu. 1345 */ 1346 1347 void 1348 cpu_kick(struct cpu_info *ci) 1349 { 1350 (void)xen_send_ipi(ci, XEN_IPI_KICK); 1351 } 1352