10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51455Sandrei * Common Development and Distribution License (the "License"). 61455Sandrei * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 213446Smrj 220Sstevel@tonic-gate /* 238803SJonathan.Haslam@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 26*12004Sjiang.liu@intel.com /* 27*12004Sjiang.liu@intel.com * Copyright (c) 2010, Intel Corporation. 28*12004Sjiang.liu@intel.com * All rights reserved. 29*12004Sjiang.liu@intel.com */ 300Sstevel@tonic-gate 310Sstevel@tonic-gate #include <sys/types.h> 320Sstevel@tonic-gate #include <sys/thread.h> 330Sstevel@tonic-gate #include <sys/cpuvar.h> 34*12004Sjiang.liu@intel.com #include <sys/cpu.h> 350Sstevel@tonic-gate #include <sys/t_lock.h> 360Sstevel@tonic-gate #include <sys/param.h> 370Sstevel@tonic-gate #include <sys/proc.h> 380Sstevel@tonic-gate #include <sys/disp.h> 390Sstevel@tonic-gate #include <sys/class.h> 400Sstevel@tonic-gate #include <sys/cmn_err.h> 410Sstevel@tonic-gate #include <sys/debug.h> 42*12004Sjiang.liu@intel.com #include <sys/note.h> 430Sstevel@tonic-gate #include <sys/asm_linkage.h> 440Sstevel@tonic-gate #include <sys/x_call.h> 450Sstevel@tonic-gate #include <sys/systm.h> 460Sstevel@tonic-gate #include <sys/var.h> 470Sstevel@tonic-gate #include <sys/vtrace.h> 480Sstevel@tonic-gate #include <vm/hat.h> 490Sstevel@tonic-gate #include <vm/as.h> 500Sstevel@tonic-gate #include <vm/seg_kmem.h> 513446Smrj #include <vm/seg_kp.h> 520Sstevel@tonic-gate #include <sys/segments.h> 530Sstevel@tonic-gate #include <sys/kmem.h> 540Sstevel@tonic-gate #include <sys/stack.h> 550Sstevel@tonic-gate #include <sys/smp_impldefs.h> 560Sstevel@tonic-gate #include <sys/x86_archext.h> 570Sstevel@tonic-gate #include <sys/machsystm.h> 580Sstevel@tonic-gate #include <sys/traptrace.h> 590Sstevel@tonic-gate #include <sys/clock.h> 600Sstevel@tonic-gate #include <sys/cpc_impl.h> 613434Sesaxe #include <sys/pg.h> 623434Sesaxe #include <sys/cmt.h> 630Sstevel@tonic-gate #include <sys/dtrace.h> 640Sstevel@tonic-gate #include <sys/archsystm.h> 650Sstevel@tonic-gate #include <sys/fp.h> 660Sstevel@tonic-gate #include <sys/reboot.h> 673446Smrj #include <sys/kdi_machimpl.h> 680Sstevel@tonic-gate #include <vm/hat_i86.h> 69*12004Sjiang.liu@intel.com #include <vm/vm_dep.h> 700Sstevel@tonic-gate #include <sys/memnode.h> 71938Sesaxe #include <sys/pci_cfgspace.h> 723446Smrj #include <sys/mach_mmu.h> 733446Smrj #include <sys/sysmacros.h> 745084Sjohnlev #if defined(__xpv) 755084Sjohnlev #include <sys/hypervisor.h> 765084Sjohnlev #endif 771414Scindi #include <sys/cpu_module.h> 780Sstevel@tonic-gate 790Sstevel@tonic-gate struct cpu cpus[1]; /* CPU data */ 800Sstevel@tonic-gate struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */ 81*12004Sjiang.liu@intel.com struct cpu *cpu_free_list; /* list for released CPUs */ 820Sstevel@tonic-gate cpu_core_t cpu_core[NCPU]; /* cpu_core structures */ 830Sstevel@tonic-gate 84*12004Sjiang.liu@intel.com #define cpu_next_free cpu_prev 85*12004Sjiang.liu@intel.com 860Sstevel@tonic-gate /* 873446Smrj * Useful for disabling MP bring-up on a MP capable system. 880Sstevel@tonic-gate */ 890Sstevel@tonic-gate int use_mp = 1; 900Sstevel@tonic-gate 912006Sandrei /* 923446Smrj * to be set by a PSM to indicate what cpus 933446Smrj * are sitting around on the system. 942006Sandrei */ 953446Smrj cpuset_t mp_cpus; 960Sstevel@tonic-gate 970Sstevel@tonic-gate /* 980Sstevel@tonic-gate * This variable is used by the hat layer to decide whether or not 990Sstevel@tonic-gate * critical sections are needed to prevent race conditions. For sun4m, 1000Sstevel@tonic-gate * this variable is set once enough MP initialization has been done in 1010Sstevel@tonic-gate * order to allow cross calls. 1020Sstevel@tonic-gate */ 1033446Smrj int flushes_require_xcalls; 1046336Sbholler 1056336Sbholler cpuset_t cpu_ready_set; /* initialized in startup() */ 1060Sstevel@tonic-gate 107*12004Sjiang.liu@intel.com static void mp_startup_boot(void); 108*12004Sjiang.liu@intel.com static void mp_startup_hotplug(void); 1090Sstevel@tonic-gate 1100Sstevel@tonic-gate static void cpu_sep_enable(void); 1110Sstevel@tonic-gate static void cpu_sep_disable(void); 1120Sstevel@tonic-gate static void cpu_asysc_enable(void); 1130Sstevel@tonic-gate static void cpu_asysc_disable(void); 1140Sstevel@tonic-gate 1150Sstevel@tonic-gate /* 1160Sstevel@tonic-gate * Init CPU info - get CPU type info for processor_info system call. 1170Sstevel@tonic-gate */ 1180Sstevel@tonic-gate void 1190Sstevel@tonic-gate init_cpu_info(struct cpu *cp) 1200Sstevel@tonic-gate { 1210Sstevel@tonic-gate processor_info_t *pi = &cp->cpu_type_info; 1220Sstevel@tonic-gate 1230Sstevel@tonic-gate /* 1240Sstevel@tonic-gate * Get clock-frequency property for the CPU. 1250Sstevel@tonic-gate */ 1260Sstevel@tonic-gate pi->pi_clock = cpu_freq; 1270Sstevel@tonic-gate 1284667Smh27603 /* 1294667Smh27603 * Current frequency in Hz. 1304667Smh27603 */ 1314718Smh27603 cp->cpu_curr_clock = cpu_freq_hz; 1324667Smh27603 1339004SNapanda.Pemmaiah@Sun.COM /* 1349004SNapanda.Pemmaiah@Sun.COM * Supported frequencies. 1359004SNapanda.Pemmaiah@Sun.COM */ 1369004SNapanda.Pemmaiah@Sun.COM if (cp->cpu_supp_freqs == NULL) { 1379004SNapanda.Pemmaiah@Sun.COM cpu_set_supp_freqs(cp, NULL); 1389004SNapanda.Pemmaiah@Sun.COM } 1399004SNapanda.Pemmaiah@Sun.COM 1400Sstevel@tonic-gate (void) strcpy(pi->pi_processor_type, "i386"); 1410Sstevel@tonic-gate if (fpu_exists) 1420Sstevel@tonic-gate (void) strcpy(pi->pi_fputypes, "i387 compatible"); 1430Sstevel@tonic-gate 144*12004Sjiang.liu@intel.com cp->cpu_idstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP); 145*12004Sjiang.liu@intel.com cp->cpu_brandstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP); 1460Sstevel@tonic-gate 147*12004Sjiang.liu@intel.com /* 148*12004Sjiang.liu@intel.com * If called for the BSP, cp is equal to current CPU. 149*12004Sjiang.liu@intel.com * For non-BSPs, cpuid info of cp is not ready yet, so use cpuid info 150*12004Sjiang.liu@intel.com * of current CPU as default values for cpu_idstr and cpu_brandstr. 151*12004Sjiang.liu@intel.com * They will be corrected in mp_startup_common() after cpuid_pass1() 152*12004Sjiang.liu@intel.com * has been invoked on target CPU. 153*12004Sjiang.liu@intel.com */ 154*12004Sjiang.liu@intel.com (void) cpuid_getidstr(CPU, cp->cpu_idstr, CPU_IDSTRLEN); 155*12004Sjiang.liu@intel.com (void) cpuid_getbrandstr(CPU, cp->cpu_brandstr, CPU_IDSTRLEN); 1560Sstevel@tonic-gate } 1570Sstevel@tonic-gate 1580Sstevel@tonic-gate /* 1590Sstevel@tonic-gate * Configure syscall support on this CPU. 1600Sstevel@tonic-gate */ 1610Sstevel@tonic-gate /*ARGSUSED*/ 1625295Srandyf void 1630Sstevel@tonic-gate init_cpu_syscall(struct cpu *cp) 1640Sstevel@tonic-gate { 1650Sstevel@tonic-gate kpreempt_disable(); 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate #if defined(__amd64) 1683446Smrj if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) { 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate #if !defined(__lint) 1710Sstevel@tonic-gate /* 1720Sstevel@tonic-gate * The syscall instruction imposes a certain ordering on 1730Sstevel@tonic-gate * segment selectors, so we double-check that ordering 1740Sstevel@tonic-gate * here. 1750Sstevel@tonic-gate */ 1760Sstevel@tonic-gate ASSERT(KDS_SEL == KCS_SEL + 8); 1770Sstevel@tonic-gate ASSERT(UDS_SEL == U32CS_SEL + 8); 1780Sstevel@tonic-gate ASSERT(UCS_SEL == U32CS_SEL + 16); 1790Sstevel@tonic-gate #endif 1800Sstevel@tonic-gate /* 1810Sstevel@tonic-gate * Turn syscall/sysret extensions on. 1820Sstevel@tonic-gate */ 1830Sstevel@tonic-gate cpu_asysc_enable(); 1840Sstevel@tonic-gate 1850Sstevel@tonic-gate /* 1860Sstevel@tonic-gate * Program the magic registers .. 1870Sstevel@tonic-gate */ 1883446Smrj wrmsr(MSR_AMD_STAR, 1893446Smrj ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32); 190770Skucharsk wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall); 191770Skucharsk wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32); 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate /* 1940Sstevel@tonic-gate * This list of flags is masked off the incoming 1950Sstevel@tonic-gate * %rfl when we enter the kernel. 1960Sstevel@tonic-gate */ 197770Skucharsk wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T)); 1980Sstevel@tonic-gate } 1990Sstevel@tonic-gate #endif 2000Sstevel@tonic-gate 2010Sstevel@tonic-gate /* 2020Sstevel@tonic-gate * On 32-bit kernels, we use sysenter/sysexit because it's too 2030Sstevel@tonic-gate * hard to use syscall/sysret, and it is more portable anyway. 2040Sstevel@tonic-gate * 2050Sstevel@tonic-gate * On 64-bit kernels on Nocona machines, the 32-bit syscall 2060Sstevel@tonic-gate * variant isn't available to 32-bit applications, but sysenter is. 2070Sstevel@tonic-gate */ 2083446Smrj if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) { 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate #if !defined(__lint) 2110Sstevel@tonic-gate /* 2120Sstevel@tonic-gate * The sysenter instruction imposes a certain ordering on 2130Sstevel@tonic-gate * segment selectors, so we double-check that ordering 2140Sstevel@tonic-gate * here. See "sysenter" in Intel document 245471-012, "IA-32 2150Sstevel@tonic-gate * Intel Architecture Software Developer's Manual Volume 2: 2160Sstevel@tonic-gate * Instruction Set Reference" 2170Sstevel@tonic-gate */ 2180Sstevel@tonic-gate ASSERT(KDS_SEL == KCS_SEL + 8); 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3)); 2210Sstevel@tonic-gate ASSERT32(UDS_SEL == UCS_SEL + 8); 2220Sstevel@tonic-gate 2230Sstevel@tonic-gate ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3)); 2240Sstevel@tonic-gate ASSERT64(UDS_SEL == U32CS_SEL + 8); 2250Sstevel@tonic-gate #endif 2260Sstevel@tonic-gate 2270Sstevel@tonic-gate cpu_sep_enable(); 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate /* 2300Sstevel@tonic-gate * resume() sets this value to the base of the threads stack 2310Sstevel@tonic-gate * via a context handler. 2320Sstevel@tonic-gate */ 2333446Smrj wrmsr(MSR_INTC_SEP_ESP, 0); 234770Skucharsk wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter); 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate kpreempt_enable(); 2380Sstevel@tonic-gate } 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate /* 2410Sstevel@tonic-gate * Multiprocessor initialization. 2420Sstevel@tonic-gate * 2430Sstevel@tonic-gate * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the 2440Sstevel@tonic-gate * startup and idle threads for the specified CPU. 245*12004Sjiang.liu@intel.com * Parameter boot is true for boot time operations and is false for CPU 246*12004Sjiang.liu@intel.com * DR operations. 2470Sstevel@tonic-gate */ 248*12004Sjiang.liu@intel.com static struct cpu * 249*12004Sjiang.liu@intel.com mp_cpu_configure_common(int cpun, boolean_t boot) 2500Sstevel@tonic-gate { 2510Sstevel@tonic-gate struct cpu *cp; 2520Sstevel@tonic-gate kthread_id_t tp; 2530Sstevel@tonic-gate caddr_t sp; 2540Sstevel@tonic-gate proc_t *procp; 2555084Sjohnlev #if !defined(__xpv) 2565045Sbholler extern int idle_cpu_prefer_mwait; 2578906SEric.Saxe@Sun.COM extern void cpu_idle_mwait(); 2585084Sjohnlev #endif 2590Sstevel@tonic-gate extern void idle(); 2608906SEric.Saxe@Sun.COM extern void cpu_idle(); 2610Sstevel@tonic-gate 2620Sstevel@tonic-gate #ifdef TRAPTRACE 2630Sstevel@tonic-gate trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun]; 2640Sstevel@tonic-gate #endif 2650Sstevel@tonic-gate 266*12004Sjiang.liu@intel.com ASSERT(MUTEX_HELD(&cpu_lock)); 2670Sstevel@tonic-gate ASSERT(cpun < NCPU && cpu[cpun] == NULL); 2680Sstevel@tonic-gate 269*12004Sjiang.liu@intel.com if (cpu_free_list == NULL) { 270*12004Sjiang.liu@intel.com cp = kmem_zalloc(sizeof (*cp), KM_SLEEP); 271*12004Sjiang.liu@intel.com } else { 272*12004Sjiang.liu@intel.com cp = cpu_free_list; 273*12004Sjiang.liu@intel.com cpu_free_list = cp->cpu_next_free; 274*12004Sjiang.liu@intel.com } 2754481Sbholler 27611330SFrank.Vanderlinden@Sun.COM cp->cpu_m.mcpu_istamp = cpun << 16; 27711330SFrank.Vanderlinden@Sun.COM 278*12004Sjiang.liu@intel.com /* Create per CPU specific threads in the process p0. */ 279*12004Sjiang.liu@intel.com procp = &p0; 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate /* 2820Sstevel@tonic-gate * Initialize the dispatcher first. 2830Sstevel@tonic-gate */ 2840Sstevel@tonic-gate disp_cpu_init(cp); 2850Sstevel@tonic-gate 286414Skchow cpu_vm_data_init(cp); 287414Skchow 2880Sstevel@tonic-gate /* 2890Sstevel@tonic-gate * Allocate and initialize the startup thread for this CPU. 2900Sstevel@tonic-gate * Interrupt and process switch stacks get allocated later 2910Sstevel@tonic-gate * when the CPU starts running. 2920Sstevel@tonic-gate */ 2930Sstevel@tonic-gate tp = thread_create(NULL, 0, NULL, NULL, 0, procp, 2940Sstevel@tonic-gate TS_STOPPED, maxclsyspri); 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate /* 2970Sstevel@tonic-gate * Set state to TS_ONPROC since this thread will start running 2980Sstevel@tonic-gate * as soon as the CPU comes online. 2990Sstevel@tonic-gate * 3000Sstevel@tonic-gate * All the other fields of the thread structure are setup by 3010Sstevel@tonic-gate * thread_create(). 3020Sstevel@tonic-gate */ 3030Sstevel@tonic-gate THREAD_ONPROC(tp, cp); 3040Sstevel@tonic-gate tp->t_preempt = 1; 3050Sstevel@tonic-gate tp->t_bound_cpu = cp; 3060Sstevel@tonic-gate tp->t_affinitycnt = 1; 3070Sstevel@tonic-gate tp->t_cpu = cp; 3080Sstevel@tonic-gate tp->t_disp_queue = cp->cpu_disp; 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate /* 311*12004Sjiang.liu@intel.com * Setup thread to start in mp_startup_common. 3120Sstevel@tonic-gate */ 3130Sstevel@tonic-gate sp = tp->t_stk; 3140Sstevel@tonic-gate tp->t_sp = (uintptr_t)(sp - MINFRAME); 3153446Smrj #if defined(__amd64) 3163446Smrj tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */ 3173446Smrj #endif 318*12004Sjiang.liu@intel.com /* 319*12004Sjiang.liu@intel.com * Setup thread start entry point for boot or hotplug. 320*12004Sjiang.liu@intel.com */ 321*12004Sjiang.liu@intel.com if (boot) { 322*12004Sjiang.liu@intel.com tp->t_pc = (uintptr_t)mp_startup_boot; 323*12004Sjiang.liu@intel.com } else { 324*12004Sjiang.liu@intel.com tp->t_pc = (uintptr_t)mp_startup_hotplug; 325*12004Sjiang.liu@intel.com } 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate cp->cpu_id = cpun; 3280Sstevel@tonic-gate cp->cpu_self = cp; 3290Sstevel@tonic-gate cp->cpu_thread = tp; 3300Sstevel@tonic-gate cp->cpu_lwp = NULL; 3310Sstevel@tonic-gate cp->cpu_dispthread = tp; 3320Sstevel@tonic-gate cp->cpu_dispatch_pri = DISP_PRIO(tp); 3330Sstevel@tonic-gate 3340Sstevel@tonic-gate /* 3351482Ssethg * cpu_base_spl must be set explicitly here to prevent any blocking 336*12004Sjiang.liu@intel.com * operations in mp_startup_common from causing the spl of the cpu 337*12004Sjiang.liu@intel.com * to drop to 0 (allowing device interrupts before we're ready) in 338*12004Sjiang.liu@intel.com * resume(). 3391482Ssethg * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY. 3401482Ssethg * As an extra bit of security on DEBUG kernels, this is enforced with 341*12004Sjiang.liu@intel.com * an assertion in mp_startup_common() -- before cpu_base_spl is set 342*12004Sjiang.liu@intel.com * to its proper value. 3431482Ssethg */ 3441482Ssethg cp->cpu_base_spl = ipltospl(LOCK_LEVEL); 3451482Ssethg 3461482Ssethg /* 3470Sstevel@tonic-gate * Now, initialize per-CPU idle thread for this CPU. 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1); 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate cp->cpu_idle_thread = tp; 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate tp->t_preempt = 1; 3540Sstevel@tonic-gate tp->t_bound_cpu = cp; 3550Sstevel@tonic-gate tp->t_affinitycnt = 1; 3560Sstevel@tonic-gate tp->t_cpu = cp; 3570Sstevel@tonic-gate tp->t_disp_queue = cp->cpu_disp; 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate /* 3603434Sesaxe * Bootstrap the CPU's PG data 36160Sesaxe */ 3623434Sesaxe pg_cpu_bootstrap(cp); 36360Sesaxe 36460Sesaxe /* 3653446Smrj * Perform CPC initialization on the new CPU. 3660Sstevel@tonic-gate */ 3670Sstevel@tonic-gate kcpc_hw_init(cp); 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate /* 3700Sstevel@tonic-gate * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2 3710Sstevel@tonic-gate * for each CPU. 3720Sstevel@tonic-gate */ 3730Sstevel@tonic-gate setup_vaddr_for_ppcopy(cp); 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate /* 3763446Smrj * Allocate page for new GDT and initialize from current GDT. 3770Sstevel@tonic-gate */ 3783446Smrj #if !defined(__lint) 3793446Smrj ASSERT((sizeof (*cp->cpu_gdt) * NGDT) <= PAGESIZE); 3803446Smrj #endif 3815460Sjosephb cp->cpu_gdt = kmem_zalloc(PAGESIZE, KM_SLEEP); 3825460Sjosephb bcopy(CPU->cpu_gdt, cp->cpu_gdt, (sizeof (*cp->cpu_gdt) * NGDT)); 3831626Srab 3843446Smrj #if defined(__i386) 3850Sstevel@tonic-gate /* 3860Sstevel@tonic-gate * setup kernel %gs. 3870Sstevel@tonic-gate */ 3880Sstevel@tonic-gate set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA, 3890Sstevel@tonic-gate SEL_KPL, 0, 1); 3903446Smrj #endif 3910Sstevel@tonic-gate 3920Sstevel@tonic-gate /* 3930Sstevel@tonic-gate * If we have more than one node, each cpu gets a copy of IDT 3940Sstevel@tonic-gate * local to its node. If this is a Pentium box, we use cpu 0's 3950Sstevel@tonic-gate * IDT. cpu 0's IDT has been made read-only to workaround the 3960Sstevel@tonic-gate * cmpxchgl register bug 3970Sstevel@tonic-gate */ 3980Sstevel@tonic-gate if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) { 3995460Sjosephb #if !defined(__lint) 4005460Sjosephb ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE); 4015460Sjosephb #endif 4025460Sjosephb cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP); 4035460Sjosephb bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE); 4043446Smrj } else { 4055460Sjosephb cp->cpu_idt = CPU->cpu_idt; 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate /* 4093446Smrj * Get interrupt priority data from cpu 0. 4100Sstevel@tonic-gate */ 4110Sstevel@tonic-gate cp->cpu_pri_data = CPU->cpu_pri_data; 4120Sstevel@tonic-gate 4133446Smrj /* 4143446Smrj * alloc space for cpuid info 4153446Smrj */ 4163446Smrj cpuid_alloc_space(cp); 417*12004Sjiang.liu@intel.com #if !defined(__xpv) 418*12004Sjiang.liu@intel.com if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) { 419*12004Sjiang.liu@intel.com cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp); 420*12004Sjiang.liu@intel.com cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait; 421*12004Sjiang.liu@intel.com } else 422*12004Sjiang.liu@intel.com #endif 423*12004Sjiang.liu@intel.com cp->cpu_m.mcpu_idle_cpu = cpu_idle; 424*12004Sjiang.liu@intel.com 425*12004Sjiang.liu@intel.com init_cpu_info(cp); 4263446Smrj 4274581Ssherrym /* 4284581Ssherrym * alloc space for ucode_info 4294581Ssherrym */ 4304581Ssherrym ucode_alloc_space(cp); 4319489SJoe.Bonasera@sun.com xc_init_cpu(cp); 4320Sstevel@tonic-gate hat_cpu_online(cp); 4330Sstevel@tonic-gate 4340Sstevel@tonic-gate #ifdef TRAPTRACE 4350Sstevel@tonic-gate /* 4363446Smrj * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers 4370Sstevel@tonic-gate */ 4380Sstevel@tonic-gate ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP); 4390Sstevel@tonic-gate ttc->ttc_next = ttc->ttc_first; 4400Sstevel@tonic-gate ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize; 4410Sstevel@tonic-gate #endif 442*12004Sjiang.liu@intel.com 4430Sstevel@tonic-gate /* 4440Sstevel@tonic-gate * Record that we have another CPU. 4450Sstevel@tonic-gate */ 4460Sstevel@tonic-gate /* 4470Sstevel@tonic-gate * Initialize the interrupt threads for this CPU 4480Sstevel@tonic-gate */ 4491455Sandrei cpu_intr_alloc(cp, NINTR_THREADS); 450*12004Sjiang.liu@intel.com 451*12004Sjiang.liu@intel.com cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF; 452*12004Sjiang.liu@intel.com cpu_set_state(cp); 453*12004Sjiang.liu@intel.com 4540Sstevel@tonic-gate /* 4550Sstevel@tonic-gate * Add CPU to list of available CPUs. It'll be on the active list 456*12004Sjiang.liu@intel.com * after mp_startup_common(). 4570Sstevel@tonic-gate */ 4580Sstevel@tonic-gate cpu_add_unit(cp); 4593446Smrj 4603446Smrj return (cp); 4613446Smrj } 4623446Smrj 4633446Smrj /* 464*12004Sjiang.liu@intel.com * Undo what was done in mp_cpu_configure_common 4653446Smrj */ 4663446Smrj static void 467*12004Sjiang.liu@intel.com mp_cpu_unconfigure_common(struct cpu *cp, int error) 4683446Smrj { 469*12004Sjiang.liu@intel.com ASSERT(MUTEX_HELD(&cpu_lock)); 4703446Smrj 4713446Smrj /* 4723446Smrj * Remove the CPU from the list of available CPUs. 4733446Smrj */ 4743446Smrj cpu_del_unit(cp->cpu_id); 4753446Smrj 4763446Smrj if (error == ETIMEDOUT) { 4773446Smrj /* 4783446Smrj * The cpu was started, but never *seemed* to run any 4793446Smrj * code in the kernel; it's probably off spinning in its 4803446Smrj * own private world, though with potential references to 4813446Smrj * our kmem-allocated IDTs and GDTs (for example). 4823446Smrj * 4833446Smrj * Worse still, it may actually wake up some time later, 4843446Smrj * so rather than guess what it might or might not do, we 4853446Smrj * leave the fundamental data structures intact. 4863446Smrj */ 4873446Smrj cp->cpu_flags = 0; 4883446Smrj return; 4893446Smrj } 4903446Smrj 4913446Smrj /* 4923446Smrj * At this point, the only threads bound to this CPU should 4933446Smrj * special per-cpu threads: it's idle thread, it's pause threads, 4943446Smrj * and it's interrupt threads. Clean these up. 4953446Smrj */ 4963446Smrj cpu_destroy_bound_threads(cp); 4973446Smrj cp->cpu_idle_thread = NULL; 4983446Smrj 4993446Smrj /* 5003446Smrj * Free the interrupt stack. 5013446Smrj */ 5023446Smrj segkp_release(segkp, 5033446Smrj cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME))); 504*12004Sjiang.liu@intel.com cp->cpu_intr_stack = NULL; 5053446Smrj 5063446Smrj #ifdef TRAPTRACE 5073446Smrj /* 5083446Smrj * Discard the trap trace buffer 5093446Smrj */ 5103446Smrj { 5113446Smrj trap_trace_ctl_t *ttc = &trap_trace_ctl[cp->cpu_id]; 5123446Smrj 5133446Smrj kmem_free((void *)ttc->ttc_first, trap_trace_bufsize); 5143446Smrj ttc->ttc_first = NULL; 5153446Smrj } 5163446Smrj #endif 5173446Smrj 5183446Smrj hat_cpu_offline(cp); 5193446Smrj 520*12004Sjiang.liu@intel.com ucode_free_space(cp); 5213446Smrj 522*12004Sjiang.liu@intel.com /* Free CPU ID string and brand string. */ 523*12004Sjiang.liu@intel.com if (cp->cpu_idstr) { 524*12004Sjiang.liu@intel.com kmem_free(cp->cpu_idstr, CPU_IDSTRLEN); 525*12004Sjiang.liu@intel.com cp->cpu_idstr = NULL; 526*12004Sjiang.liu@intel.com } 527*12004Sjiang.liu@intel.com if (cp->cpu_brandstr) { 528*12004Sjiang.liu@intel.com kmem_free(cp->cpu_brandstr, CPU_IDSTRLEN); 529*12004Sjiang.liu@intel.com cp->cpu_brandstr = NULL; 530*12004Sjiang.liu@intel.com } 531*12004Sjiang.liu@intel.com 532*12004Sjiang.liu@intel.com #if !defined(__xpv) 533*12004Sjiang.liu@intel.com if (cp->cpu_m.mcpu_mwait != NULL) { 534*12004Sjiang.liu@intel.com cpuid_mwait_free(cp); 535*12004Sjiang.liu@intel.com cp->cpu_m.mcpu_mwait = NULL; 536*12004Sjiang.liu@intel.com } 537*12004Sjiang.liu@intel.com #endif 538*12004Sjiang.liu@intel.com cpuid_free_space(cp); 5394581Ssherrym 5405460Sjosephb if (cp->cpu_idt != CPU->cpu_idt) 5415460Sjosephb kmem_free(cp->cpu_idt, PAGESIZE); 5425460Sjosephb cp->cpu_idt = NULL; 5433446Smrj 5445460Sjosephb kmem_free(cp->cpu_gdt, PAGESIZE); 5455460Sjosephb cp->cpu_gdt = NULL; 5463446Smrj 547*12004Sjiang.liu@intel.com if (cp->cpu_supp_freqs != NULL) { 548*12004Sjiang.liu@intel.com size_t len = strlen(cp->cpu_supp_freqs) + 1; 549*12004Sjiang.liu@intel.com kmem_free(cp->cpu_supp_freqs, len); 550*12004Sjiang.liu@intel.com cp->cpu_supp_freqs = NULL; 551*12004Sjiang.liu@intel.com } 552*12004Sjiang.liu@intel.com 5533446Smrj teardown_vaddr_for_ppcopy(cp); 5543446Smrj 5553446Smrj kcpc_hw_fini(cp); 5563446Smrj 5573446Smrj cp->cpu_dispthread = NULL; 5583446Smrj cp->cpu_thread = NULL; /* discarded by cpu_destroy_bound_threads() */ 5593446Smrj 5603446Smrj cpu_vm_data_destroy(cp); 5613446Smrj 562*12004Sjiang.liu@intel.com xc_fini_cpu(cp); 5633446Smrj disp_cpu_fini(cp); 5643446Smrj 565*12004Sjiang.liu@intel.com ASSERT(cp != CPU0); 566*12004Sjiang.liu@intel.com bzero(cp, sizeof (*cp)); 567*12004Sjiang.liu@intel.com cp->cpu_next_free = cpu_free_list; 568*12004Sjiang.liu@intel.com cpu_free_list = cp; 5690Sstevel@tonic-gate } 5700Sstevel@tonic-gate 5710Sstevel@tonic-gate /* 5720Sstevel@tonic-gate * Apply workarounds for known errata, and warn about those that are absent. 5730Sstevel@tonic-gate * 5740Sstevel@tonic-gate * System vendors occasionally create configurations which contain different 5750Sstevel@tonic-gate * revisions of the CPUs that are almost but not exactly the same. At the 5760Sstevel@tonic-gate * time of writing, this meant that their clock rates were the same, their 5770Sstevel@tonic-gate * feature sets were the same, but the required workaround were -not- 5780Sstevel@tonic-gate * necessarily the same. So, this routine is invoked on -every- CPU soon 5790Sstevel@tonic-gate * after starting to make sure that the resulting system contains the most 5800Sstevel@tonic-gate * pessimal set of workarounds needed to cope with *any* of the CPUs in the 5810Sstevel@tonic-gate * system. 5820Sstevel@tonic-gate * 583938Sesaxe * workaround_errata is invoked early in mlsetup() for CPU 0, and in 584*12004Sjiang.liu@intel.com * mp_startup_common() for all slave CPUs. Slaves process workaround_errata 585*12004Sjiang.liu@intel.com * prior to acknowledging their readiness to the master, so this routine will 586938Sesaxe * never be executed by multiple CPUs in parallel, thus making updates to 587938Sesaxe * global data safe. 588938Sesaxe * 589359Skucharsk * These workarounds are based on Rev 3.57 of the Revision Guide for 590359Skucharsk * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005. 5910Sstevel@tonic-gate */ 5920Sstevel@tonic-gate 5933446Smrj #if defined(OPTERON_ERRATUM_88) 5943446Smrj int opteron_erratum_88; /* if non-zero -> at least one cpu has it */ 5953446Smrj #endif 5963446Smrj 5970Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_91) 5980Sstevel@tonic-gate int opteron_erratum_91; /* if non-zero -> at least one cpu has it */ 5990Sstevel@tonic-gate #endif 6000Sstevel@tonic-gate 6010Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_93) 6020Sstevel@tonic-gate int opteron_erratum_93; /* if non-zero -> at least one cpu has it */ 6030Sstevel@tonic-gate #endif 6040Sstevel@tonic-gate 6053446Smrj #if defined(OPTERON_ERRATUM_95) 6063446Smrj int opteron_erratum_95; /* if non-zero -> at least one cpu has it */ 6073446Smrj #endif 6083446Smrj 6090Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_100) 6100Sstevel@tonic-gate int opteron_erratum_100; /* if non-zero -> at least one cpu has it */ 6110Sstevel@tonic-gate #endif 6120Sstevel@tonic-gate 6133446Smrj #if defined(OPTERON_ERRATUM_108) 6143446Smrj int opteron_erratum_108; /* if non-zero -> at least one cpu has it */ 6153446Smrj #endif 6163446Smrj 6170Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_109) 6180Sstevel@tonic-gate int opteron_erratum_109; /* if non-zero -> at least one cpu has it */ 6190Sstevel@tonic-gate #endif 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_121) 6220Sstevel@tonic-gate int opteron_erratum_121; /* if non-zero -> at least one cpu has it */ 6230Sstevel@tonic-gate #endif 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_122) 6260Sstevel@tonic-gate int opteron_erratum_122; /* if non-zero -> at least one cpu has it */ 6270Sstevel@tonic-gate #endif 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_123) 6300Sstevel@tonic-gate int opteron_erratum_123; /* if non-zero -> at least one cpu has it */ 6310Sstevel@tonic-gate #endif 6320Sstevel@tonic-gate 633359Skucharsk #if defined(OPTERON_ERRATUM_131) 634359Skucharsk int opteron_erratum_131; /* if non-zero -> at least one cpu has it */ 635359Skucharsk #endif 6360Sstevel@tonic-gate 637938Sesaxe #if defined(OPTERON_WORKAROUND_6336786) 638938Sesaxe int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */ 639938Sesaxe int opteron_workaround_6336786_UP = 0; /* Not needed for UP */ 640938Sesaxe #endif 641938Sesaxe 6421582Skchow #if defined(OPTERON_WORKAROUND_6323525) 6431582Skchow int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */ 6441582Skchow #endif 6451582Skchow 6466691Skchow #if defined(OPTERON_ERRATUM_298) 6476691Skchow int opteron_erratum_298; 6486691Skchow #endif 6496691Skchow 6503446Smrj static void 6513446Smrj workaround_warning(cpu_t *cp, uint_t erratum) 6523446Smrj { 6533446Smrj cmn_err(CE_WARN, "cpu%d: no workaround for erratum %u", 6543446Smrj cp->cpu_id, erratum); 6553446Smrj } 6563446Smrj 6573446Smrj static void 6583446Smrj workaround_applied(uint_t erratum) 6593446Smrj { 6603446Smrj if (erratum > 1000000) 6613446Smrj cmn_err(CE_CONT, "?workaround applied for cpu issue #%d\n", 6623446Smrj erratum); 6633446Smrj else 6643446Smrj cmn_err(CE_CONT, "?workaround applied for cpu erratum #%d\n", 6653446Smrj erratum); 6663446Smrj } 6673446Smrj 6683446Smrj static void 6693446Smrj msr_warning(cpu_t *cp, const char *rw, uint_t msr, int error) 6703446Smrj { 6713446Smrj cmn_err(CE_WARN, "cpu%d: couldn't %smsr 0x%x, error %d", 6723446Smrj cp->cpu_id, rw, msr, error); 6733446Smrj } 6740Sstevel@tonic-gate 6755893Sesaxe /* 6768232SEric.Saxe@Sun.COM * Determine the number of nodes in a Hammer / Greyhound / Griffin family 6778232SEric.Saxe@Sun.COM * system. 6785893Sesaxe */ 6795893Sesaxe static uint_t 6805893Sesaxe opteron_get_nnodes(void) 6815893Sesaxe { 6825893Sesaxe static uint_t nnodes = 0; 6835893Sesaxe 6848232SEric.Saxe@Sun.COM if (nnodes == 0) { 6855893Sesaxe #ifdef DEBUG 6868232SEric.Saxe@Sun.COM uint_t family; 6875893Sesaxe 6888232SEric.Saxe@Sun.COM /* 6898232SEric.Saxe@Sun.COM * This routine uses a PCI config space based mechanism 6908232SEric.Saxe@Sun.COM * for retrieving the number of nodes in the system. 6918232SEric.Saxe@Sun.COM * Device 24, function 0, offset 0x60 as used here is not 6928232SEric.Saxe@Sun.COM * AMD processor architectural, and may not work on processor 6938232SEric.Saxe@Sun.COM * families other than those listed below. 6948232SEric.Saxe@Sun.COM * 6958232SEric.Saxe@Sun.COM * Callers of this routine must ensure that we're running on 6968232SEric.Saxe@Sun.COM * a processor which supports this mechanism. 6978232SEric.Saxe@Sun.COM * The assertion below is meant to catch calls on unsupported 6988232SEric.Saxe@Sun.COM * processors. 6998232SEric.Saxe@Sun.COM */ 7008232SEric.Saxe@Sun.COM family = cpuid_getfamily(CPU); 7018232SEric.Saxe@Sun.COM ASSERT(family == 0xf || family == 0x10 || family == 0x11); 7025893Sesaxe #endif /* DEBUG */ 7035893Sesaxe 7045893Sesaxe /* 7055893Sesaxe * Obtain the number of nodes in the system from 7065893Sesaxe * bits [6:4] of the Node ID register on node 0. 7075893Sesaxe * 7085893Sesaxe * The actual node count is NodeID[6:4] + 1 7095893Sesaxe * 7105893Sesaxe * The Node ID register is accessed via function 0, 7115893Sesaxe * offset 0x60. Node 0 is device 24. 7125893Sesaxe */ 7135893Sesaxe nnodes = ((pci_getl_func(0, 24, 0, 0x60) & 0x70) >> 4) + 1; 7145893Sesaxe } 7155893Sesaxe return (nnodes); 7165893Sesaxe } 7175893Sesaxe 7180Sstevel@tonic-gate uint_t 7196691Skchow do_erratum_298(struct cpu *cpu) 7206691Skchow { 7216691Skchow static int osvwrc = -3; 7226691Skchow extern int osvw_opteron_erratum(cpu_t *, uint_t); 7236691Skchow 7246691Skchow /* 7256691Skchow * L2 Eviction May Occur During Processor Operation To Set 7266691Skchow * Accessed or Dirty Bit. 7276691Skchow */ 7286691Skchow if (osvwrc == -3) { 7296691Skchow osvwrc = osvw_opteron_erratum(cpu, 298); 7306691Skchow } else { 7316691Skchow /* osvw return codes should be consistent for all cpus */ 7326691Skchow ASSERT(osvwrc == osvw_opteron_erratum(cpu, 298)); 7336691Skchow } 7346691Skchow 7356691Skchow switch (osvwrc) { 7366691Skchow case 0: /* erratum is not present: do nothing */ 7376691Skchow break; 7386691Skchow case 1: /* erratum is present: BIOS workaround applied */ 7396691Skchow /* 7406691Skchow * check if workaround is actually in place and issue warning 7416691Skchow * if not. 7426691Skchow */ 7436691Skchow if (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || 7446691Skchow ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0)) { 7456691Skchow #if defined(OPTERON_ERRATUM_298) 7466691Skchow opteron_erratum_298++; 7476691Skchow #else 7486691Skchow workaround_warning(cpu, 298); 7496691Skchow return (1); 7506691Skchow #endif 7516691Skchow } 7526691Skchow break; 7536691Skchow case -1: /* cannot determine via osvw: check cpuid */ 7546691Skchow if ((cpuid_opteron_erratum(cpu, 298) > 0) && 7556691Skchow (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || 7566691Skchow ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0))) { 7576691Skchow #if defined(OPTERON_ERRATUM_298) 7586691Skchow opteron_erratum_298++; 7596691Skchow #else 7606691Skchow workaround_warning(cpu, 298); 7616691Skchow return (1); 7626691Skchow #endif 7636691Skchow } 7646691Skchow break; 7656691Skchow } 7666691Skchow return (0); 7676691Skchow } 7686691Skchow 7696691Skchow uint_t 7700Sstevel@tonic-gate workaround_errata(struct cpu *cpu) 7710Sstevel@tonic-gate { 7720Sstevel@tonic-gate uint_t missing = 0; 7730Sstevel@tonic-gate 7740Sstevel@tonic-gate ASSERT(cpu == CPU); 7750Sstevel@tonic-gate 7760Sstevel@tonic-gate /*LINTED*/ 7770Sstevel@tonic-gate if (cpuid_opteron_erratum(cpu, 88) > 0) { 7780Sstevel@tonic-gate /* 7790Sstevel@tonic-gate * SWAPGS May Fail To Read Correct GS Base 7800Sstevel@tonic-gate */ 7810Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_88) 7820Sstevel@tonic-gate /* 7830Sstevel@tonic-gate * The workaround is an mfence in the relevant assembler code 7840Sstevel@tonic-gate */ 7853446Smrj opteron_erratum_88++; 7860Sstevel@tonic-gate #else 7873446Smrj workaround_warning(cpu, 88); 7880Sstevel@tonic-gate missing++; 7890Sstevel@tonic-gate #endif 7900Sstevel@tonic-gate } 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate if (cpuid_opteron_erratum(cpu, 91) > 0) { 7930Sstevel@tonic-gate /* 7940Sstevel@tonic-gate * Software Prefetches May Report A Page Fault 7950Sstevel@tonic-gate */ 7960Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_91) 7970Sstevel@tonic-gate /* 7980Sstevel@tonic-gate * fix is in trap.c 7990Sstevel@tonic-gate */ 8000Sstevel@tonic-gate opteron_erratum_91++; 8010Sstevel@tonic-gate #else 8023446Smrj workaround_warning(cpu, 91); 8030Sstevel@tonic-gate missing++; 8040Sstevel@tonic-gate #endif 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate if (cpuid_opteron_erratum(cpu, 93) > 0) { 8080Sstevel@tonic-gate /* 8090Sstevel@tonic-gate * RSM Auto-Halt Restart Returns to Incorrect RIP 8100Sstevel@tonic-gate */ 8110Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_93) 8120Sstevel@tonic-gate /* 8130Sstevel@tonic-gate * fix is in trap.c 8140Sstevel@tonic-gate */ 8150Sstevel@tonic-gate opteron_erratum_93++; 8160Sstevel@tonic-gate #else 8173446Smrj workaround_warning(cpu, 93); 8180Sstevel@tonic-gate missing++; 8190Sstevel@tonic-gate #endif 8200Sstevel@tonic-gate } 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate /*LINTED*/ 8230Sstevel@tonic-gate if (cpuid_opteron_erratum(cpu, 95) > 0) { 8240Sstevel@tonic-gate /* 8250Sstevel@tonic-gate * RET Instruction May Return to Incorrect EIP 8260Sstevel@tonic-gate */ 8270Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_95) 8280Sstevel@tonic-gate #if defined(_LP64) 8290Sstevel@tonic-gate /* 8300Sstevel@tonic-gate * Workaround this by ensuring that 32-bit user code and 8310Sstevel@tonic-gate * 64-bit kernel code never occupy the same address 8320Sstevel@tonic-gate * range mod 4G. 8330Sstevel@tonic-gate */ 8340Sstevel@tonic-gate if (_userlimit32 > 0xc0000000ul) 8350Sstevel@tonic-gate *(uintptr_t *)&_userlimit32 = 0xc0000000ul; 8360Sstevel@tonic-gate 8370Sstevel@tonic-gate /*LINTED*/ 8380Sstevel@tonic-gate ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u); 8393446Smrj opteron_erratum_95++; 8400Sstevel@tonic-gate #endif /* _LP64 */ 8410Sstevel@tonic-gate #else 8423446Smrj workaround_warning(cpu, 95); 8430Sstevel@tonic-gate missing++; 8443446Smrj #endif 8450Sstevel@tonic-gate } 8460Sstevel@tonic-gate 8470Sstevel@tonic-gate if (cpuid_opteron_erratum(cpu, 100) > 0) { 8480Sstevel@tonic-gate /* 8490Sstevel@tonic-gate * Compatibility Mode Branches Transfer to Illegal Address 8500Sstevel@tonic-gate */ 8510Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_100) 8520Sstevel@tonic-gate /* 8530Sstevel@tonic-gate * fix is in trap.c 8540Sstevel@tonic-gate */ 8550Sstevel@tonic-gate opteron_erratum_100++; 8560Sstevel@tonic-gate #else 8573446Smrj workaround_warning(cpu, 100); 8580Sstevel@tonic-gate missing++; 8590Sstevel@tonic-gate #endif 8600Sstevel@tonic-gate } 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate /*LINTED*/ 8630Sstevel@tonic-gate if (cpuid_opteron_erratum(cpu, 108) > 0) { 8640Sstevel@tonic-gate /* 8650Sstevel@tonic-gate * CPUID Instruction May Return Incorrect Model Number In 8660Sstevel@tonic-gate * Some Processors 8670Sstevel@tonic-gate */ 8680Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_108) 8690Sstevel@tonic-gate /* 8700Sstevel@tonic-gate * (Our cpuid-handling code corrects the model number on 8710Sstevel@tonic-gate * those processors) 8720Sstevel@tonic-gate */ 8730Sstevel@tonic-gate #else 8743446Smrj workaround_warning(cpu, 108); 8750Sstevel@tonic-gate missing++; 8760Sstevel@tonic-gate #endif 8770Sstevel@tonic-gate } 8780Sstevel@tonic-gate 8790Sstevel@tonic-gate /*LINTED*/ 8803446Smrj if (cpuid_opteron_erratum(cpu, 109) > 0) do { 8810Sstevel@tonic-gate /* 8829637SRandy.Fishel@Sun.COM * Certain Reverse REP MOVS May Produce Unpredictable Behavior 8830Sstevel@tonic-gate */ 8840Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_109) 8853446Smrj /* 8863446Smrj * The "workaround" is to print a warning to upgrade the BIOS 8873446Smrj */ 8883446Smrj uint64_t value; 8893446Smrj const uint_t msr = MSR_AMD_PATCHLEVEL; 8903446Smrj int err; 8910Sstevel@tonic-gate 8923446Smrj if ((err = checked_rdmsr(msr, &value)) != 0) { 8933446Smrj msr_warning(cpu, "rd", msr, err); 8943446Smrj workaround_warning(cpu, 109); 8953446Smrj missing++; 8963446Smrj } 8973446Smrj if (value == 0) 8980Sstevel@tonic-gate opteron_erratum_109++; 8990Sstevel@tonic-gate #else 9003446Smrj workaround_warning(cpu, 109); 9010Sstevel@tonic-gate missing++; 9020Sstevel@tonic-gate #endif 9033446Smrj /*CONSTANTCONDITION*/ 9043446Smrj } while (0); 9053446Smrj 9060Sstevel@tonic-gate /*LINTED*/ 9070Sstevel@tonic-gate if (cpuid_opteron_erratum(cpu, 121) > 0) { 9080Sstevel@tonic-gate /* 9090Sstevel@tonic-gate * Sequential Execution Across Non_Canonical Boundary Caused 9100Sstevel@tonic-gate * Processor Hang 9110Sstevel@tonic-gate */ 9120Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_121) 9133446Smrj #if defined(_LP64) 9140Sstevel@tonic-gate /* 9150Sstevel@tonic-gate * Erratum 121 is only present in long (64 bit) mode. 9160Sstevel@tonic-gate * Workaround is to include the page immediately before the 9170Sstevel@tonic-gate * va hole to eliminate the possibility of system hangs due to 9180Sstevel@tonic-gate * sequential execution across the va hole boundary. 9190Sstevel@tonic-gate */ 9203446Smrj if (opteron_erratum_121) 9213446Smrj opteron_erratum_121++; 9223446Smrj else { 9233446Smrj if (hole_start) { 9243446Smrj hole_start -= PAGESIZE; 9253446Smrj } else { 9263446Smrj /* 9273446Smrj * hole_start not yet initialized by 9283446Smrj * mmu_init. Initialize hole_start 9293446Smrj * with value to be subtracted. 9303446Smrj */ 9313446Smrj hole_start = PAGESIZE; 9320Sstevel@tonic-gate } 9333446Smrj opteron_erratum_121++; 9340Sstevel@tonic-gate } 9353446Smrj #endif /* _LP64 */ 9360Sstevel@tonic-gate #else 9373446Smrj workaround_warning(cpu, 121); 9380Sstevel@tonic-gate missing++; 9390Sstevel@tonic-gate #endif 9400Sstevel@tonic-gate } 9410Sstevel@tonic-gate 9420Sstevel@tonic-gate /*LINTED*/ 9433446Smrj if (cpuid_opteron_erratum(cpu, 122) > 0) do { 9440Sstevel@tonic-gate /* 9453446Smrj * TLB Flush Filter May Cause Coherency Problem in 9460Sstevel@tonic-gate * Multiprocessor Systems 9470Sstevel@tonic-gate */ 9480Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_122) 9493446Smrj uint64_t value; 9503446Smrj const uint_t msr = MSR_AMD_HWCR; 9513446Smrj int error; 9523446Smrj 9530Sstevel@tonic-gate /* 9540Sstevel@tonic-gate * Erratum 122 is only present in MP configurations (multi-core 9550Sstevel@tonic-gate * or multi-processor). 9560Sstevel@tonic-gate */ 9575084Sjohnlev #if defined(__xpv) 9585084Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) 9595084Sjohnlev break; 96010175SStuart.Maybee@Sun.COM if (!opteron_erratum_122 && xpv_nr_phys_cpus() == 1) 9615084Sjohnlev break; 9625084Sjohnlev #else 9635893Sesaxe if (!opteron_erratum_122 && opteron_get_nnodes() == 1 && 9643446Smrj cpuid_get_ncpu_per_chip(cpu) == 1) 9653446Smrj break; 9665084Sjohnlev #endif 9673446Smrj /* disable TLB Flush Filter */ 9683446Smrj 9693446Smrj if ((error = checked_rdmsr(msr, &value)) != 0) { 9703446Smrj msr_warning(cpu, "rd", msr, error); 9713446Smrj workaround_warning(cpu, 122); 9723446Smrj missing++; 9733446Smrj } else { 9743446Smrj value |= (uint64_t)AMD_HWCR_FFDIS; 9753446Smrj if ((error = checked_wrmsr(msr, value)) != 0) { 9763446Smrj msr_warning(cpu, "wr", msr, error); 9773446Smrj workaround_warning(cpu, 122); 9783446Smrj missing++; 9793446Smrj } 9800Sstevel@tonic-gate } 9813446Smrj opteron_erratum_122++; 9820Sstevel@tonic-gate #else 9833446Smrj workaround_warning(cpu, 122); 9840Sstevel@tonic-gate missing++; 9850Sstevel@tonic-gate #endif 9863446Smrj /*CONSTANTCONDITION*/ 9873446Smrj } while (0); 988302Skchow 9890Sstevel@tonic-gate /*LINTED*/ 9903446Smrj if (cpuid_opteron_erratum(cpu, 123) > 0) do { 9910Sstevel@tonic-gate /* 9920Sstevel@tonic-gate * Bypassed Reads May Cause Data Corruption of System Hang in 9930Sstevel@tonic-gate * Dual Core Processors 9940Sstevel@tonic-gate */ 9953446Smrj #if defined(OPTERON_ERRATUM_123) 9963446Smrj uint64_t value; 9973446Smrj const uint_t msr = MSR_AMD_PATCHLEVEL; 9983446Smrj int err; 9993446Smrj 10000Sstevel@tonic-gate /* 10010Sstevel@tonic-gate * Erratum 123 applies only to multi-core cpus. 10020Sstevel@tonic-gate */ 10033446Smrj if (cpuid_get_ncpu_per_chip(cpu) < 2) 10043446Smrj break; 10055084Sjohnlev #if defined(__xpv) 10065084Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) 10075084Sjohnlev break; 10085084Sjohnlev #endif 10093446Smrj /* 10103446Smrj * The "workaround" is to print a warning to upgrade the BIOS 10113446Smrj */ 10123446Smrj if ((err = checked_rdmsr(msr, &value)) != 0) { 10133446Smrj msr_warning(cpu, "rd", msr, err); 10143446Smrj workaround_warning(cpu, 123); 10153446Smrj missing++; 10160Sstevel@tonic-gate } 10173446Smrj if (value == 0) 10183446Smrj opteron_erratum_123++; 10193446Smrj #else 10203446Smrj workaround_warning(cpu, 123); 10213446Smrj missing++; 1022359Skucharsk 10233446Smrj #endif 10243446Smrj /*CONSTANTCONDITION*/ 10253446Smrj } while (0); 10263446Smrj 1027359Skucharsk /*LINTED*/ 10283446Smrj if (cpuid_opteron_erratum(cpu, 131) > 0) do { 1029359Skucharsk /* 1030359Skucharsk * Multiprocessor Systems with Four or More Cores May Deadlock 1031359Skucharsk * Waiting for a Probe Response 1032359Skucharsk */ 10333446Smrj #if defined(OPTERON_ERRATUM_131) 10343446Smrj uint64_t nbcfg; 10353446Smrj const uint_t msr = MSR_AMD_NB_CFG; 10363446Smrj const uint64_t wabits = 10373446Smrj AMD_NB_CFG_SRQ_HEARTBEAT | AMD_NB_CFG_SRQ_SPR; 10383446Smrj int error; 10393446Smrj 1040359Skucharsk /* 1041359Skucharsk * Erratum 131 applies to any system with four or more cores. 1042359Skucharsk */ 10433446Smrj if (opteron_erratum_131) 10443446Smrj break; 10455084Sjohnlev #if defined(__xpv) 10465084Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) 10475084Sjohnlev break; 104810175SStuart.Maybee@Sun.COM if (xpv_nr_phys_cpus() < 4) 10495084Sjohnlev break; 10505084Sjohnlev #else 10515893Sesaxe if (opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu) < 4) 10523446Smrj break; 10535084Sjohnlev #endif 10543446Smrj /* 10553446Smrj * Print a warning if neither of the workarounds for 10563446Smrj * erratum 131 is present. 10573446Smrj */ 10583446Smrj if ((error = checked_rdmsr(msr, &nbcfg)) != 0) { 10593446Smrj msr_warning(cpu, "rd", msr, error); 10603446Smrj workaround_warning(cpu, 131); 10613446Smrj missing++; 10623446Smrj } else if ((nbcfg & wabits) == 0) { 10633446Smrj opteron_erratum_131++; 10643446Smrj } else { 10653446Smrj /* cannot have both workarounds set */ 10663446Smrj ASSERT((nbcfg & wabits) != wabits); 1067359Skucharsk } 10683446Smrj #else 10693446Smrj workaround_warning(cpu, 131); 10703446Smrj missing++; 1071359Skucharsk #endif 10723446Smrj /*CONSTANTCONDITION*/ 10733446Smrj } while (0); 1074938Sesaxe 1075938Sesaxe /* 10763446Smrj * This isn't really an erratum, but for convenience the 1077938Sesaxe * detection/workaround code lives here and in cpuid_opteron_erratum. 1078938Sesaxe */ 1079938Sesaxe if (cpuid_opteron_erratum(cpu, 6336786) > 0) { 10803446Smrj #if defined(OPTERON_WORKAROUND_6336786) 1081938Sesaxe /* 1082938Sesaxe * Disable C1-Clock ramping on multi-core/multi-processor 1083938Sesaxe * K8 platforms to guard against TSC drift. 1084938Sesaxe */ 1085938Sesaxe if (opteron_workaround_6336786) { 1086938Sesaxe opteron_workaround_6336786++; 10875084Sjohnlev #if defined(__xpv) 10885084Sjohnlev } else if ((DOMAIN_IS_INITDOMAIN(xen_info) && 108910175SStuart.Maybee@Sun.COM xpv_nr_phys_cpus() > 1) || 10905084Sjohnlev opteron_workaround_6336786_UP) { 10915084Sjohnlev /* 10925893Sesaxe * XXPV Hmm. We can't walk the Northbridges on 10935084Sjohnlev * the hypervisor; so just complain and drive 10945084Sjohnlev * on. This probably needs to be fixed in 10955084Sjohnlev * the hypervisor itself. 10965084Sjohnlev */ 10975084Sjohnlev opteron_workaround_6336786++; 10985084Sjohnlev workaround_warning(cpu, 6336786); 10995084Sjohnlev #else /* __xpv */ 11005893Sesaxe } else if ((opteron_get_nnodes() * 11015894Sesaxe cpuid_get_ncpu_per_chip(cpu) > 1) || 1102938Sesaxe opteron_workaround_6336786_UP) { 11035893Sesaxe 11045893Sesaxe uint_t node, nnodes; 11053446Smrj uint8_t data; 11063446Smrj 11075893Sesaxe nnodes = opteron_get_nnodes(); 11085893Sesaxe for (node = 0; node < nnodes; node++) { 1109938Sesaxe /* 1110938Sesaxe * Clear PMM7[1:0] (function 3, offset 0x87) 1111938Sesaxe * Northbridge device is the node id + 24. 1112938Sesaxe */ 1113938Sesaxe data = pci_getb_func(0, node + 24, 3, 0x87); 1114938Sesaxe data &= 0xFC; 1115938Sesaxe pci_putb_func(0, node + 24, 3, 0x87, data); 1116938Sesaxe } 1117938Sesaxe opteron_workaround_6336786++; 11185084Sjohnlev #endif /* __xpv */ 1119938Sesaxe } 11203446Smrj #else 11213446Smrj workaround_warning(cpu, 6336786); 11223446Smrj missing++; 1123938Sesaxe #endif 11243446Smrj } 11251582Skchow 11261582Skchow /*LINTED*/ 11271582Skchow /* 11281582Skchow * Mutex primitives don't work as expected. 11291582Skchow */ 11301582Skchow if (cpuid_opteron_erratum(cpu, 6323525) > 0) { 11313446Smrj #if defined(OPTERON_WORKAROUND_6323525) 11321582Skchow /* 11333446Smrj * This problem only occurs with 2 or more cores. If bit in 11346691Skchow * MSR_AMD_BU_CFG set, then not applicable. The workaround 11351582Skchow * is to patch the semaphone routines with the lfence 11361582Skchow * instruction to provide necessary load memory barrier with 11371582Skchow * possible subsequent read-modify-write ops. 11381582Skchow * 11391582Skchow * It is too early in boot to call the patch routine so 11401582Skchow * set erratum variable to be done in startup_end(). 11411582Skchow */ 11421582Skchow if (opteron_workaround_6323525) { 11431582Skchow opteron_workaround_6323525++; 11445084Sjohnlev #if defined(__xpv) 11455084Sjohnlev } else if (x86_feature & X86_SSE2) { 11465084Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 11475084Sjohnlev /* 11485084Sjohnlev * XXPV Use dom0_msr here when extended 11495084Sjohnlev * operations are supported? 11505084Sjohnlev */ 115110175SStuart.Maybee@Sun.COM if (xpv_nr_phys_cpus() > 1) 11525084Sjohnlev opteron_workaround_6323525++; 11535084Sjohnlev } else { 11545084Sjohnlev /* 11555084Sjohnlev * We have no way to tell how many physical 11565084Sjohnlev * cpus there are, or even if this processor 11575084Sjohnlev * has the problem, so enable the workaround 11585084Sjohnlev * unconditionally (at some performance cost). 11595084Sjohnlev */ 11605084Sjohnlev opteron_workaround_6323525++; 11615084Sjohnlev } 11625084Sjohnlev #else /* __xpv */ 11635893Sesaxe } else if ((x86_feature & X86_SSE2) && ((opteron_get_nnodes() * 11643446Smrj cpuid_get_ncpu_per_chip(cpu)) > 1)) { 11658374SKit.Chow@Sun.COM if ((xrdmsr(MSR_AMD_BU_CFG) & (UINT64_C(1) << 33)) == 0) 11661582Skchow opteron_workaround_6323525++; 11675084Sjohnlev #endif /* __xpv */ 11681582Skchow } 11693446Smrj #else 11703446Smrj workaround_warning(cpu, 6323525); 11713446Smrj missing++; 11723446Smrj #endif 11731582Skchow } 11743446Smrj 11756691Skchow missing += do_erratum_298(cpu); 11766691Skchow 11775084Sjohnlev #ifdef __xpv 11785084Sjohnlev return (0); 11795084Sjohnlev #else 11800Sstevel@tonic-gate return (missing); 11815084Sjohnlev #endif 11820Sstevel@tonic-gate } 11830Sstevel@tonic-gate 11840Sstevel@tonic-gate void 11850Sstevel@tonic-gate workaround_errata_end() 11860Sstevel@tonic-gate { 11873446Smrj #if defined(OPTERON_ERRATUM_88) 11883446Smrj if (opteron_erratum_88) 11893446Smrj workaround_applied(88); 11903446Smrj #endif 11913446Smrj #if defined(OPTERON_ERRATUM_91) 11923446Smrj if (opteron_erratum_91) 11933446Smrj workaround_applied(91); 11943446Smrj #endif 11953446Smrj #if defined(OPTERON_ERRATUM_93) 11963446Smrj if (opteron_erratum_93) 11973446Smrj workaround_applied(93); 11983446Smrj #endif 11993446Smrj #if defined(OPTERON_ERRATUM_95) 12003446Smrj if (opteron_erratum_95) 12013446Smrj workaround_applied(95); 12023446Smrj #endif 12033446Smrj #if defined(OPTERON_ERRATUM_100) 12043446Smrj if (opteron_erratum_100) 12053446Smrj workaround_applied(100); 12063446Smrj #endif 12073446Smrj #if defined(OPTERON_ERRATUM_108) 12083446Smrj if (opteron_erratum_108) 12093446Smrj workaround_applied(108); 12103446Smrj #endif 12110Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_109) 12120Sstevel@tonic-gate if (opteron_erratum_109) { 1213359Skucharsk cmn_err(CE_WARN, 1214359Skucharsk "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1215359Skucharsk " processor\nerratum 109 was not detected; updating your" 1216359Skucharsk " system's BIOS to a version\ncontaining this" 1217359Skucharsk " microcode patch is HIGHLY recommended or erroneous" 1218359Skucharsk " system\noperation may occur.\n"); 12190Sstevel@tonic-gate } 12203446Smrj #endif 12213446Smrj #if defined(OPTERON_ERRATUM_121) 12223446Smrj if (opteron_erratum_121) 12233446Smrj workaround_applied(121); 12243446Smrj #endif 12253446Smrj #if defined(OPTERON_ERRATUM_122) 12263446Smrj if (opteron_erratum_122) 12273446Smrj workaround_applied(122); 12283446Smrj #endif 12290Sstevel@tonic-gate #if defined(OPTERON_ERRATUM_123) 12300Sstevel@tonic-gate if (opteron_erratum_123) { 1231359Skucharsk cmn_err(CE_WARN, 1232359Skucharsk "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1233359Skucharsk " processor\nerratum 123 was not detected; updating your" 1234359Skucharsk " system's BIOS to a version\ncontaining this" 1235359Skucharsk " microcode patch is HIGHLY recommended or erroneous" 1236359Skucharsk " system\noperation may occur.\n"); 12370Sstevel@tonic-gate } 12383446Smrj #endif 1239359Skucharsk #if defined(OPTERON_ERRATUM_131) 1240359Skucharsk if (opteron_erratum_131) { 1241359Skucharsk cmn_err(CE_WARN, 1242359Skucharsk "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1243359Skucharsk " processor\nerratum 131 was not detected; updating your" 1244359Skucharsk " system's BIOS to a version\ncontaining this" 1245359Skucharsk " microcode patch is HIGHLY recommended or erroneous" 1246359Skucharsk " system\noperation may occur.\n"); 1247359Skucharsk } 12483446Smrj #endif 12493446Smrj #if defined(OPTERON_WORKAROUND_6336786) 12503446Smrj if (opteron_workaround_6336786) 12513446Smrj workaround_applied(6336786); 12523446Smrj #endif 12533446Smrj #if defined(OPTERON_WORKAROUND_6323525) 12543446Smrj if (opteron_workaround_6323525) 12553446Smrj workaround_applied(6323525); 12563446Smrj #endif 12576691Skchow #if defined(OPTERON_ERRATUM_298) 12586691Skchow if (opteron_erratum_298) { 12596691Skchow cmn_err(CE_WARN, 12606691Skchow "BIOS microcode patch for AMD 64/Opteron(tm)" 12616691Skchow " processor\nerratum 298 was not detected; updating your" 12626691Skchow " system's BIOS to a version\ncontaining this" 12636691Skchow " microcode patch is HIGHLY recommended or erroneous" 12646691Skchow " system\noperation may occur.\n"); 12656691Skchow } 12666691Skchow #endif 12670Sstevel@tonic-gate } 12680Sstevel@tonic-gate 1269*12004Sjiang.liu@intel.com /* 1270*12004Sjiang.liu@intel.com * The procset_slave and procset_master are used to synchronize 1271*12004Sjiang.liu@intel.com * between the control CPU and the target CPU when starting CPUs. 1272*12004Sjiang.liu@intel.com */ 1273*12004Sjiang.liu@intel.com static cpuset_t procset_slave, procset_master; 1274*12004Sjiang.liu@intel.com 1275*12004Sjiang.liu@intel.com static void 1276*12004Sjiang.liu@intel.com mp_startup_wait(cpuset_t *sp, processorid_t cpuid) 1277*12004Sjiang.liu@intel.com { 1278*12004Sjiang.liu@intel.com cpuset_t tempset; 1279*12004Sjiang.liu@intel.com 1280*12004Sjiang.liu@intel.com for (tempset = *sp; !CPU_IN_SET(tempset, cpuid); 1281*12004Sjiang.liu@intel.com tempset = *(volatile cpuset_t *)sp) { 1282*12004Sjiang.liu@intel.com SMT_PAUSE(); 1283*12004Sjiang.liu@intel.com } 1284*12004Sjiang.liu@intel.com CPUSET_ATOMIC_DEL(*(cpuset_t *)sp, cpuid); 1285*12004Sjiang.liu@intel.com } 1286*12004Sjiang.liu@intel.com 1287*12004Sjiang.liu@intel.com static void 1288*12004Sjiang.liu@intel.com mp_startup_signal(cpuset_t *sp, processorid_t cpuid) 1289*12004Sjiang.liu@intel.com { 1290*12004Sjiang.liu@intel.com cpuset_t tempset; 1291*12004Sjiang.liu@intel.com 1292*12004Sjiang.liu@intel.com CPUSET_ATOMIC_ADD(*(cpuset_t *)sp, cpuid); 1293*12004Sjiang.liu@intel.com for (tempset = *sp; CPU_IN_SET(tempset, cpuid); 1294*12004Sjiang.liu@intel.com tempset = *(volatile cpuset_t *)sp) { 1295*12004Sjiang.liu@intel.com SMT_PAUSE(); 1296*12004Sjiang.liu@intel.com } 1297*12004Sjiang.liu@intel.com } 1298*12004Sjiang.liu@intel.com 1299*12004Sjiang.liu@intel.com int 1300*12004Sjiang.liu@intel.com mp_start_cpu_common(cpu_t *cp, boolean_t boot) 1301*12004Sjiang.liu@intel.com { 1302*12004Sjiang.liu@intel.com _NOTE(ARGUNUSED(boot)); 1303*12004Sjiang.liu@intel.com 1304*12004Sjiang.liu@intel.com void *ctx; 1305*12004Sjiang.liu@intel.com int delays; 1306*12004Sjiang.liu@intel.com int error = 0; 1307*12004Sjiang.liu@intel.com cpuset_t tempset; 1308*12004Sjiang.liu@intel.com processorid_t cpuid; 1309*12004Sjiang.liu@intel.com #ifndef __xpv 1310*12004Sjiang.liu@intel.com extern void cpupm_init(cpu_t *); 1311*12004Sjiang.liu@intel.com #endif 1312*12004Sjiang.liu@intel.com 1313*12004Sjiang.liu@intel.com ASSERT(cp != NULL); 1314*12004Sjiang.liu@intel.com cpuid = cp->cpu_id; 1315*12004Sjiang.liu@intel.com ctx = mach_cpucontext_alloc(cp); 1316*12004Sjiang.liu@intel.com if (ctx == NULL) { 1317*12004Sjiang.liu@intel.com cmn_err(CE_WARN, 1318*12004Sjiang.liu@intel.com "cpu%d: failed to allocate context", cp->cpu_id); 1319*12004Sjiang.liu@intel.com return (EAGAIN); 1320*12004Sjiang.liu@intel.com } 1321*12004Sjiang.liu@intel.com error = mach_cpu_start(cp, ctx); 1322*12004Sjiang.liu@intel.com if (error != 0) { 1323*12004Sjiang.liu@intel.com cmn_err(CE_WARN, 1324*12004Sjiang.liu@intel.com "cpu%d: failed to start, error %d", cp->cpu_id, error); 1325*12004Sjiang.liu@intel.com mach_cpucontext_free(cp, ctx, error); 1326*12004Sjiang.liu@intel.com return (error); 1327*12004Sjiang.liu@intel.com } 1328*12004Sjiang.liu@intel.com 1329*12004Sjiang.liu@intel.com for (delays = 0, tempset = procset_slave; !CPU_IN_SET(tempset, cpuid); 1330*12004Sjiang.liu@intel.com delays++) { 1331*12004Sjiang.liu@intel.com if (delays == 500) { 1332*12004Sjiang.liu@intel.com /* 1333*12004Sjiang.liu@intel.com * After five seconds, things are probably looking 1334*12004Sjiang.liu@intel.com * a bit bleak - explain the hang. 1335*12004Sjiang.liu@intel.com */ 1336*12004Sjiang.liu@intel.com cmn_err(CE_NOTE, "cpu%d: started, " 1337*12004Sjiang.liu@intel.com "but not running in the kernel yet", cpuid); 1338*12004Sjiang.liu@intel.com } else if (delays > 2000) { 1339*12004Sjiang.liu@intel.com /* 1340*12004Sjiang.liu@intel.com * We waited at least 20 seconds, bail .. 1341*12004Sjiang.liu@intel.com */ 1342*12004Sjiang.liu@intel.com error = ETIMEDOUT; 1343*12004Sjiang.liu@intel.com cmn_err(CE_WARN, "cpu%d: timed out", cpuid); 1344*12004Sjiang.liu@intel.com mach_cpucontext_free(cp, ctx, error); 1345*12004Sjiang.liu@intel.com return (error); 1346*12004Sjiang.liu@intel.com } 1347*12004Sjiang.liu@intel.com 1348*12004Sjiang.liu@intel.com /* 1349*12004Sjiang.liu@intel.com * wait at least 10ms, then check again.. 1350*12004Sjiang.liu@intel.com */ 1351*12004Sjiang.liu@intel.com delay(USEC_TO_TICK_ROUNDUP(10000)); 1352*12004Sjiang.liu@intel.com tempset = *((volatile cpuset_t *)&procset_slave); 1353*12004Sjiang.liu@intel.com } 1354*12004Sjiang.liu@intel.com CPUSET_ATOMIC_DEL(procset_slave, cpuid); 1355*12004Sjiang.liu@intel.com 1356*12004Sjiang.liu@intel.com mach_cpucontext_free(cp, ctx, 0); 1357*12004Sjiang.liu@intel.com 1358*12004Sjiang.liu@intel.com #ifndef __xpv 1359*12004Sjiang.liu@intel.com if (tsc_gethrtime_enable) 1360*12004Sjiang.liu@intel.com tsc_sync_master(cpuid); 1361*12004Sjiang.liu@intel.com #endif 1362*12004Sjiang.liu@intel.com 1363*12004Sjiang.liu@intel.com if (dtrace_cpu_init != NULL) { 1364*12004Sjiang.liu@intel.com (*dtrace_cpu_init)(cpuid); 1365*12004Sjiang.liu@intel.com } 1366*12004Sjiang.liu@intel.com 1367*12004Sjiang.liu@intel.com /* 1368*12004Sjiang.liu@intel.com * During CPU DR operations, the cpu_lock is held by current 1369*12004Sjiang.liu@intel.com * (the control) thread. We can't release the cpu_lock here 1370*12004Sjiang.liu@intel.com * because that will break the CPU DR logic. 1371*12004Sjiang.liu@intel.com * On the other hand, CPUPM and processor group initialization 1372*12004Sjiang.liu@intel.com * routines need to access the cpu_lock. So we invoke those 1373*12004Sjiang.liu@intel.com * routines here on behalf of mp_startup_common(). 1374*12004Sjiang.liu@intel.com * 1375*12004Sjiang.liu@intel.com * CPUPM and processor group initialization routines depend 1376*12004Sjiang.liu@intel.com * on the cpuid probing results. Wait for mp_startup_common() 1377*12004Sjiang.liu@intel.com * to signal that cpuid probing is done. 1378*12004Sjiang.liu@intel.com */ 1379*12004Sjiang.liu@intel.com mp_startup_wait(&procset_slave, cpuid); 1380*12004Sjiang.liu@intel.com #ifndef __xpv 1381*12004Sjiang.liu@intel.com cpupm_init(cp); 1382*12004Sjiang.liu@intel.com #endif 1383*12004Sjiang.liu@intel.com (void) pg_cpu_init(cp, B_FALSE); 1384*12004Sjiang.liu@intel.com cpu_set_state(cp); 1385*12004Sjiang.liu@intel.com mp_startup_signal(&procset_master, cpuid); 1386*12004Sjiang.liu@intel.com 1387*12004Sjiang.liu@intel.com return (0); 1388*12004Sjiang.liu@intel.com } 13893446Smrj 13903446Smrj /* 13913446Smrj * Start a single cpu, assuming that the kernel context is available 13923446Smrj * to successfully start another cpu. 13933446Smrj * 13943446Smrj * (For example, real mode code is mapped into the right place 13953446Smrj * in memory and is ready to be run.) 13963446Smrj */ 13973446Smrj int 13983446Smrj start_cpu(processorid_t who) 13993446Smrj { 14003446Smrj cpu_t *cp; 14013446Smrj int error = 0; 1402*12004Sjiang.liu@intel.com cpuset_t tempset; 14033446Smrj 14043446Smrj ASSERT(who != 0); 14053446Smrj 14063446Smrj /* 14073446Smrj * Check if there's at least a Mbyte of kmem available 14083446Smrj * before attempting to start the cpu. 14093446Smrj */ 14103446Smrj if (kmem_avail() < 1024 * 1024) { 14113446Smrj /* 14123446Smrj * Kick off a reap in case that helps us with 14133446Smrj * later attempts .. 14143446Smrj */ 14153446Smrj kmem_reap(); 14163446Smrj return (ENOMEM); 14173446Smrj } 14183446Smrj 1419*12004Sjiang.liu@intel.com /* 1420*12004Sjiang.liu@intel.com * First configure cpu. 1421*12004Sjiang.liu@intel.com */ 1422*12004Sjiang.liu@intel.com cp = mp_cpu_configure_common(who, B_TRUE); 1423*12004Sjiang.liu@intel.com ASSERT(cp != NULL); 14243446Smrj 1425*12004Sjiang.liu@intel.com /* 1426*12004Sjiang.liu@intel.com * Then start cpu. 1427*12004Sjiang.liu@intel.com */ 1428*12004Sjiang.liu@intel.com error = mp_start_cpu_common(cp, B_TRUE); 1429*12004Sjiang.liu@intel.com if (error != 0) { 1430*12004Sjiang.liu@intel.com mp_cpu_unconfigure_common(cp, error); 14313446Smrj return (error); 14323446Smrj } 14333446Smrj 1434*12004Sjiang.liu@intel.com mutex_exit(&cpu_lock); 1435*12004Sjiang.liu@intel.com tempset = cpu_ready_set; 1436*12004Sjiang.liu@intel.com while (!CPU_IN_SET(tempset, who)) { 1437*12004Sjiang.liu@intel.com drv_usecwait(1); 1438*12004Sjiang.liu@intel.com tempset = *((volatile cpuset_t *)&cpu_ready_set); 14393446Smrj } 1440*12004Sjiang.liu@intel.com mutex_enter(&cpu_lock); 14413446Smrj 14423446Smrj return (0); 14433446Smrj } 14443446Smrj 14450Sstevel@tonic-gate void 14460Sstevel@tonic-gate start_other_cpus(int cprboot) 14470Sstevel@tonic-gate { 1448*12004Sjiang.liu@intel.com _NOTE(ARGUNUSED(cprboot)); 1449*12004Sjiang.liu@intel.com 14503446Smrj uint_t who; 14513446Smrj uint_t bootcpuid = 0; 14520Sstevel@tonic-gate 14530Sstevel@tonic-gate /* 14540Sstevel@tonic-gate * Initialize our own cpu_info. 14550Sstevel@tonic-gate */ 14560Sstevel@tonic-gate init_cpu_info(CPU); 14570Sstevel@tonic-gate 14587656SSherry.Moore@Sun.COM cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_idstr); 14597656SSherry.Moore@Sun.COM cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_brandstr); 14607656SSherry.Moore@Sun.COM 14610Sstevel@tonic-gate /* 14620Sstevel@tonic-gate * Initialize our syscall handlers 14630Sstevel@tonic-gate */ 14640Sstevel@tonic-gate init_cpu_syscall(CPU); 14650Sstevel@tonic-gate 14660Sstevel@tonic-gate /* 14673446Smrj * Take the boot cpu out of the mp_cpus set because we know 14683446Smrj * it's already running. Add it to the cpu_ready_set for 14693446Smrj * precisely the same reason. 14703446Smrj */ 14713446Smrj CPUSET_DEL(mp_cpus, bootcpuid); 14723446Smrj CPUSET_ADD(cpu_ready_set, bootcpuid); 14733446Smrj 14743446Smrj /* 1475*12004Sjiang.liu@intel.com * skip the rest of this if 1476*12004Sjiang.liu@intel.com * . only 1 cpu dectected and system isn't hotplug-capable 1477*12004Sjiang.liu@intel.com * . not using MP 14780Sstevel@tonic-gate */ 1479*12004Sjiang.liu@intel.com if ((CPUSET_ISNULL(mp_cpus) && plat_dr_support_cpu() == 0) || 1480*12004Sjiang.liu@intel.com use_mp == 0) { 14810Sstevel@tonic-gate if (use_mp == 0) 14820Sstevel@tonic-gate cmn_err(CE_CONT, "?***** Not in MP mode\n"); 14830Sstevel@tonic-gate goto done; 14840Sstevel@tonic-gate } 14850Sstevel@tonic-gate 14860Sstevel@tonic-gate /* 14870Sstevel@tonic-gate * perform such initialization as is needed 14880Sstevel@tonic-gate * to be able to take CPUs on- and off-line. 14890Sstevel@tonic-gate */ 14900Sstevel@tonic-gate cpu_pause_init(); 14910Sstevel@tonic-gate 14929489SJoe.Bonasera@sun.com xc_init_cpu(CPU); /* initialize processor crosscalls */ 14930Sstevel@tonic-gate 14943446Smrj if (mach_cpucontext_init() != 0) 14950Sstevel@tonic-gate goto done; 14960Sstevel@tonic-gate 14970Sstevel@tonic-gate flushes_require_xcalls = 1; 14980Sstevel@tonic-gate 14992575Snf202958 /* 15002575Snf202958 * We lock our affinity to the master CPU to ensure that all slave CPUs 15012575Snf202958 * do their TSC syncs with the same CPU. 15022575Snf202958 */ 15030Sstevel@tonic-gate affinity_set(CPU_CURRENT); 15040Sstevel@tonic-gate 15050Sstevel@tonic-gate for (who = 0; who < NCPU; who++) { 15062006Sandrei if (!CPU_IN_SET(mp_cpus, who)) 15072006Sandrei continue; 15083446Smrj ASSERT(who != bootcpuid); 1509*12004Sjiang.liu@intel.com 1510*12004Sjiang.liu@intel.com mutex_enter(&cpu_lock); 15113446Smrj if (start_cpu(who) != 0) 15123446Smrj CPUSET_DEL(mp_cpus, who); 15138803SJonathan.Haslam@Sun.COM cpu_state_change_notify(who, CPU_SETUP); 15148803SJonathan.Haslam@Sun.COM mutex_exit(&cpu_lock); 15150Sstevel@tonic-gate } 15160Sstevel@tonic-gate 15174581Ssherrym /* Free the space allocated to hold the microcode file */ 15187605SMark.Johnson@Sun.COM ucode_cleanup(); 15194581Ssherrym 15200Sstevel@tonic-gate affinity_clear(); 15210Sstevel@tonic-gate 1522*12004Sjiang.liu@intel.com mach_cpucontext_fini(); 15232006Sandrei 15240Sstevel@tonic-gate done: 152511120SMark.Johnson@Sun.COM if (get_hwenv() == HW_NATIVE) 152611120SMark.Johnson@Sun.COM workaround_errata_end(); 1527*12004Sjiang.liu@intel.com cmi_post_mpstartup(); 15281642Sgavinm 1529*12004Sjiang.liu@intel.com if (use_mp && ncpus != boot_max_ncpus) { 1530*12004Sjiang.liu@intel.com cmn_err(CE_NOTE, 1531*12004Sjiang.liu@intel.com "System detected %d cpus, but " 1532*12004Sjiang.liu@intel.com "only %d cpu(s) were enabled during boot.", 1533*12004Sjiang.liu@intel.com boot_max_ncpus, ncpus); 1534*12004Sjiang.liu@intel.com cmn_err(CE_NOTE, 1535*12004Sjiang.liu@intel.com "Use \"boot-ncpus\" parameter to enable more CPU(s). " 1536*12004Sjiang.liu@intel.com "See eeprom(1M)."); 1537*12004Sjiang.liu@intel.com } 15380Sstevel@tonic-gate } 15390Sstevel@tonic-gate 15400Sstevel@tonic-gate int 15410Sstevel@tonic-gate mp_cpu_configure(int cpuid) 15420Sstevel@tonic-gate { 1543*12004Sjiang.liu@intel.com cpu_t *cp; 1544*12004Sjiang.liu@intel.com 1545*12004Sjiang.liu@intel.com if (use_mp == 0 || plat_dr_support_cpu() == 0) { 1546*12004Sjiang.liu@intel.com return (ENOTSUP); 1547*12004Sjiang.liu@intel.com } 1548*12004Sjiang.liu@intel.com 1549*12004Sjiang.liu@intel.com cp = cpu_get(cpuid); 1550*12004Sjiang.liu@intel.com if (cp != NULL) { 1551*12004Sjiang.liu@intel.com return (EALREADY); 1552*12004Sjiang.liu@intel.com } 1553*12004Sjiang.liu@intel.com 1554*12004Sjiang.liu@intel.com /* 1555*12004Sjiang.liu@intel.com * Check if there's at least a Mbyte of kmem available 1556*12004Sjiang.liu@intel.com * before attempting to start the cpu. 1557*12004Sjiang.liu@intel.com */ 1558*12004Sjiang.liu@intel.com if (kmem_avail() < 1024 * 1024) { 1559*12004Sjiang.liu@intel.com /* 1560*12004Sjiang.liu@intel.com * Kick off a reap in case that helps us with 1561*12004Sjiang.liu@intel.com * later attempts .. 1562*12004Sjiang.liu@intel.com */ 1563*12004Sjiang.liu@intel.com kmem_reap(); 1564*12004Sjiang.liu@intel.com return (ENOMEM); 1565*12004Sjiang.liu@intel.com } 1566*12004Sjiang.liu@intel.com 1567*12004Sjiang.liu@intel.com cp = mp_cpu_configure_common(cpuid, B_FALSE); 1568*12004Sjiang.liu@intel.com ASSERT(cp != NULL && cpu_get(cpuid) == cp); 1569*12004Sjiang.liu@intel.com 1570*12004Sjiang.liu@intel.com return (cp != NULL ? 0 : EAGAIN); 15710Sstevel@tonic-gate } 15720Sstevel@tonic-gate 15730Sstevel@tonic-gate int 15740Sstevel@tonic-gate mp_cpu_unconfigure(int cpuid) 15750Sstevel@tonic-gate { 1576*12004Sjiang.liu@intel.com cpu_t *cp; 1577*12004Sjiang.liu@intel.com 1578*12004Sjiang.liu@intel.com if (use_mp == 0 || plat_dr_support_cpu() == 0) { 1579*12004Sjiang.liu@intel.com return (ENOTSUP); 1580*12004Sjiang.liu@intel.com } else if (cpuid < 0 || cpuid >= max_ncpus) { 1581*12004Sjiang.liu@intel.com return (EINVAL); 1582*12004Sjiang.liu@intel.com } 1583*12004Sjiang.liu@intel.com 1584*12004Sjiang.liu@intel.com cp = cpu_get(cpuid); 1585*12004Sjiang.liu@intel.com if (cp == NULL) { 1586*12004Sjiang.liu@intel.com return (ENODEV); 1587*12004Sjiang.liu@intel.com } 1588*12004Sjiang.liu@intel.com mp_cpu_unconfigure_common(cp, 0); 1589*12004Sjiang.liu@intel.com 1590*12004Sjiang.liu@intel.com return (0); 15910Sstevel@tonic-gate } 15920Sstevel@tonic-gate 15930Sstevel@tonic-gate /* 15940Sstevel@tonic-gate * Startup function for 'other' CPUs (besides boot cpu). 15952985Sdmick * Called from real_mode_start. 15961251Skchow * 1597*12004Sjiang.liu@intel.com * WARNING: until CPU_READY is set, mp_startup_common and routines called by 1598*12004Sjiang.liu@intel.com * mp_startup_common should not call routines (e.g. kmem_free) that could call 15991251Skchow * hat_unload which requires CPU_READY to be set. 16000Sstevel@tonic-gate */ 1601*12004Sjiang.liu@intel.com static void 1602*12004Sjiang.liu@intel.com mp_startup_common(boolean_t boot) 16030Sstevel@tonic-gate { 1604*12004Sjiang.liu@intel.com cpu_t *cp = CPU; 16050Sstevel@tonic-gate uint_t new_x86_feature; 1606*12004Sjiang.liu@intel.com const char *fmt = "?cpu%d: %b\n"; 16079637SRandy.Fishel@Sun.COM extern void cpu_event_init_cpu(cpu_t *); 16080Sstevel@tonic-gate 16092985Sdmick /* 16103021Sdmick * We need to get TSC on this proc synced (i.e., any delta 16113021Sdmick * from cpu0 accounted for) as soon as we can, because many 16123021Sdmick * many things use gethrtime/pc_gethrestime, including 16133021Sdmick * interrupts, cmn_err, etc. 16143021Sdmick */ 16153021Sdmick 1616*12004Sjiang.liu@intel.com /* Let the control CPU continue into tsc_sync_master() */ 1617*12004Sjiang.liu@intel.com mp_startup_signal(&procset_slave, cp->cpu_id); 16183021Sdmick 16195084Sjohnlev #ifndef __xpv 16203021Sdmick if (tsc_gethrtime_enable) 16213021Sdmick tsc_sync_slave(); 16225084Sjohnlev #endif 16233021Sdmick 16243021Sdmick /* 16252985Sdmick * Once this was done from assembly, but it's safer here; if 16262985Sdmick * it blocks, we need to be able to swtch() to and from, and 16272985Sdmick * since we get here by calling t_pc, we need to do that call 16282985Sdmick * before swtch() overwrites it. 16292985Sdmick */ 16302985Sdmick (void) (*ap_mlsetup)(); 16312985Sdmick 16320Sstevel@tonic-gate new_x86_feature = cpuid_pass1(cp); 16330Sstevel@tonic-gate 16345084Sjohnlev #ifndef __xpv 16350Sstevel@tonic-gate /* 16365159Sjohnlev * Program this cpu's PAT 16370Sstevel@tonic-gate */ 16385159Sjohnlev if (x86_feature & X86_PAT) 16395159Sjohnlev pat_sync(); 16405084Sjohnlev #endif 16410Sstevel@tonic-gate 16420Sstevel@tonic-gate /* 16433446Smrj * Set up TSC_AUX to contain the cpuid for this processor 16443446Smrj * for the rdtscp instruction. 16453446Smrj */ 16463446Smrj if (x86_feature & X86_TSCP) 16473446Smrj (void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id); 16483446Smrj 16493446Smrj /* 16500Sstevel@tonic-gate * Initialize this CPU's syscall handlers 16510Sstevel@tonic-gate */ 16520Sstevel@tonic-gate init_cpu_syscall(cp); 16530Sstevel@tonic-gate 16540Sstevel@tonic-gate /* 16550Sstevel@tonic-gate * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the 16560Sstevel@tonic-gate * highest level at which a routine is permitted to block on 16570Sstevel@tonic-gate * an adaptive mutex (allows for cpu poke interrupt in case 16580Sstevel@tonic-gate * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks 16590Sstevel@tonic-gate * device interrupts that may end up in the hat layer issuing cross 16600Sstevel@tonic-gate * calls before CPU_READY is set. 16610Sstevel@tonic-gate */ 16623446Smrj splx(ipltospl(LOCK_LEVEL)); 16633446Smrj sti(); 16640Sstevel@tonic-gate 16650Sstevel@tonic-gate /* 16660Sstevel@tonic-gate * Do a sanity check to make sure this new CPU is a sane thing 16670Sstevel@tonic-gate * to add to the collection of processors running this system. 16680Sstevel@tonic-gate * 16690Sstevel@tonic-gate * XXX Clearly this needs to get more sophisticated, if x86 16700Sstevel@tonic-gate * systems start to get built out of heterogenous CPUs; as is 16710Sstevel@tonic-gate * likely to happen once the number of processors in a configuration 16720Sstevel@tonic-gate * gets large enough. 16730Sstevel@tonic-gate */ 16740Sstevel@tonic-gate if ((x86_feature & new_x86_feature) != x86_feature) { 16759370SKuriakose.Kuruvilla@Sun.COM cmn_err(CE_CONT, fmt, cp->cpu_id, new_x86_feature, 16769370SKuriakose.Kuruvilla@Sun.COM FMT_X86_FEATURE); 16770Sstevel@tonic-gate cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id); 16780Sstevel@tonic-gate } 16790Sstevel@tonic-gate 16800Sstevel@tonic-gate /* 16814481Sbholler * We do not support cpus with mixed monitor/mwait support if the 16824481Sbholler * boot cpu supports monitor/mwait. 16834481Sbholler */ 16844481Sbholler if ((x86_feature & ~new_x86_feature) & X86_MWAIT) 16854481Sbholler panic("unsupported mixed cpu monitor/mwait support detected"); 16864481Sbholler 16874481Sbholler /* 16880Sstevel@tonic-gate * We could be more sophisticated here, and just mark the CPU 16890Sstevel@tonic-gate * as "faulted" but at this point we'll opt for the easier 16909637SRandy.Fishel@Sun.COM * answer of dying horribly. Provided the boot cpu is ok, 16910Sstevel@tonic-gate * the system can be recovered by booting with use_mp set to zero. 16920Sstevel@tonic-gate */ 16930Sstevel@tonic-gate if (workaround_errata(cp) != 0) 16940Sstevel@tonic-gate panic("critical workaround(s) missing for cpu%d", cp->cpu_id); 16950Sstevel@tonic-gate 1696*12004Sjiang.liu@intel.com /* 1697*12004Sjiang.liu@intel.com * We can touch cpu_flags here without acquiring the cpu_lock here 1698*12004Sjiang.liu@intel.com * because the cpu_lock is held by the control CPU which is running 1699*12004Sjiang.liu@intel.com * mp_start_cpu_common(). 1700*12004Sjiang.liu@intel.com * Need to clear CPU_QUIESCED flag before calling any function which 1701*12004Sjiang.liu@intel.com * may cause thread context switching, such as kmem_alloc() etc. 1702*12004Sjiang.liu@intel.com * The idle thread checks for CPU_QUIESCED flag and loops for ever if 1703*12004Sjiang.liu@intel.com * it's set. So the startup thread may have no chance to switch back 1704*12004Sjiang.liu@intel.com * again if it's switched away with CPU_QUIESCED set. 1705*12004Sjiang.liu@intel.com */ 1706*12004Sjiang.liu@intel.com cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED); 1707*12004Sjiang.liu@intel.com 17080Sstevel@tonic-gate cpuid_pass2(cp); 17090Sstevel@tonic-gate cpuid_pass3(cp); 17100Sstevel@tonic-gate (void) cpuid_pass4(cp); 17110Sstevel@tonic-gate 1712*12004Sjiang.liu@intel.com /* 1713*12004Sjiang.liu@intel.com * Correct cpu_idstr and cpu_brandstr on target CPU after 1714*12004Sjiang.liu@intel.com * cpuid_pass1() is done. 1715*12004Sjiang.liu@intel.com */ 1716*12004Sjiang.liu@intel.com (void) cpuid_getidstr(cp, cp->cpu_idstr, CPU_IDSTRLEN); 1717*12004Sjiang.liu@intel.com (void) cpuid_getbrandstr(cp, cp->cpu_brandstr, CPU_IDSTRLEN); 17180Sstevel@tonic-gate 17196749Ssherrym cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS; 17202575Snf202958 1721*12004Sjiang.liu@intel.com post_startup_cpu_fixups(); 17222575Snf202958 1723*12004Sjiang.liu@intel.com cpu_event_init_cpu(cp); 17248930SBill.Holler@Sun.COM 17253029Ssethg /* 17263029Ssethg * Enable preemption here so that contention for any locks acquired 1727*12004Sjiang.liu@intel.com * later in mp_startup_common may be preempted if the thread owning 1728*12004Sjiang.liu@intel.com * those locks is continuously executing on other CPUs (for example, 1729*12004Sjiang.liu@intel.com * this CPU must be preemptible to allow other CPUs to pause it during 1730*12004Sjiang.liu@intel.com * their startup phases). It's safe to enable preemption here because 1731*12004Sjiang.liu@intel.com * the CPU state is pretty-much fully constructed. 17323029Ssethg */ 17333029Ssethg curthread->t_preempt = 0; 17343029Ssethg 17351482Ssethg /* The base spl should still be at LOCK LEVEL here */ 17361482Ssethg ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL)); 17371482Ssethg set_base_spl(); /* Restore the spl to its proper value */ 17381482Ssethg 1739*12004Sjiang.liu@intel.com pghw_physid_create(cp); 17408906SEric.Saxe@Sun.COM /* 1741*12004Sjiang.liu@intel.com * Delegate initialization tasks, which need to access the cpu_lock, 1742*12004Sjiang.liu@intel.com * to mp_start_cpu_common() because we can't acquire the cpu_lock here 1743*12004Sjiang.liu@intel.com * during CPU DR operations. 17448906SEric.Saxe@Sun.COM */ 1745*12004Sjiang.liu@intel.com mp_startup_signal(&procset_slave, cp->cpu_id); 1746*12004Sjiang.liu@intel.com mp_startup_wait(&procset_master, cp->cpu_id); 17478906SEric.Saxe@Sun.COM pg_cmt_cpu_startup(cp); 1748*12004Sjiang.liu@intel.com 1749*12004Sjiang.liu@intel.com if (boot) { 1750*12004Sjiang.liu@intel.com mutex_enter(&cpu_lock); 1751*12004Sjiang.liu@intel.com cp->cpu_flags &= ~CPU_OFFLINE; 1752*12004Sjiang.liu@intel.com cpu_enable_intr(cp); 1753*12004Sjiang.liu@intel.com cpu_add_active(cp); 1754*12004Sjiang.liu@intel.com mutex_exit(&cpu_lock); 1755*12004Sjiang.liu@intel.com } 17568906SEric.Saxe@Sun.COM 17576749Ssherrym /* Enable interrupts */ 17586749Ssherrym (void) spl0(); 17598906SEric.Saxe@Sun.COM 1760*12004Sjiang.liu@intel.com /* 1761*12004Sjiang.liu@intel.com * Fill out cpu_ucode_info. Update microcode if necessary. 1762*12004Sjiang.liu@intel.com */ 1763*12004Sjiang.liu@intel.com ucode_check(cp); 17646749Ssherrym 17655254Sgavinm #ifndef __xpv 17665254Sgavinm { 17675254Sgavinm /* 17685254Sgavinm * Set up the CPU module for this CPU. This can't be done 17695254Sgavinm * before this CPU is made CPU_READY, because we may (in 17705254Sgavinm * heterogeneous systems) need to go load another CPU module. 17715254Sgavinm * The act of attempting to load a module may trigger a 17725254Sgavinm * cross-call, which will ASSERT unless this cpu is CPU_READY. 17735254Sgavinm */ 17745254Sgavinm cmi_hdl_t hdl; 17751414Scindi 17765254Sgavinm if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU), 17777532SSean.Ye@Sun.COM cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) { 17785254Sgavinm if (x86_feature & X86_MCA) 17795254Sgavinm cmi_mca_init(hdl); 1780*12004Sjiang.liu@intel.com cp->cpu_m.mcpu_cmi_hdl = hdl; 17815254Sgavinm } 17825254Sgavinm } 17835254Sgavinm #endif /* __xpv */ 17841414Scindi 17850Sstevel@tonic-gate if (boothowto & RB_DEBUG) 17863446Smrj kdi_cpu_init(); 17870Sstevel@tonic-gate 17880Sstevel@tonic-gate /* 17890Sstevel@tonic-gate * Setting the bit in cpu_ready_set must be the last operation in 17900Sstevel@tonic-gate * processor initialization; the boot CPU will continue to boot once 17910Sstevel@tonic-gate * it sees this bit set for all active CPUs. 17920Sstevel@tonic-gate */ 17930Sstevel@tonic-gate CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id); 17940Sstevel@tonic-gate 1795*12004Sjiang.liu@intel.com (void) mach_cpu_create_device_node(cp, NULL); 1796*12004Sjiang.liu@intel.com 1797*12004Sjiang.liu@intel.com cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr); 1798*12004Sjiang.liu@intel.com cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr); 1799*12004Sjiang.liu@intel.com cmn_err(CE_CONT, "?cpu%d initialization complete - online\n", 18000Sstevel@tonic-gate cp->cpu_id); 18010Sstevel@tonic-gate 18020Sstevel@tonic-gate /* 18030Sstevel@tonic-gate * Now we are done with the startup thread, so free it up. 18040Sstevel@tonic-gate */ 18050Sstevel@tonic-gate thread_exit(); 18060Sstevel@tonic-gate panic("mp_startup: cannot return"); 18070Sstevel@tonic-gate /*NOTREACHED*/ 18080Sstevel@tonic-gate } 18090Sstevel@tonic-gate 1810*12004Sjiang.liu@intel.com /* 1811*12004Sjiang.liu@intel.com * Startup function for 'other' CPUs at boot time (besides boot cpu). 1812*12004Sjiang.liu@intel.com */ 1813*12004Sjiang.liu@intel.com static void 1814*12004Sjiang.liu@intel.com mp_startup_boot(void) 1815*12004Sjiang.liu@intel.com { 1816*12004Sjiang.liu@intel.com mp_startup_common(B_TRUE); 1817*12004Sjiang.liu@intel.com } 1818*12004Sjiang.liu@intel.com 1819*12004Sjiang.liu@intel.com /* 1820*12004Sjiang.liu@intel.com * Startup function for hotplug CPUs at runtime. 1821*12004Sjiang.liu@intel.com */ 1822*12004Sjiang.liu@intel.com void 1823*12004Sjiang.liu@intel.com mp_startup_hotplug(void) 1824*12004Sjiang.liu@intel.com { 1825*12004Sjiang.liu@intel.com mp_startup_common(B_FALSE); 1826*12004Sjiang.liu@intel.com } 18270Sstevel@tonic-gate 18280Sstevel@tonic-gate /* 18290Sstevel@tonic-gate * Start CPU on user request. 18300Sstevel@tonic-gate */ 18310Sstevel@tonic-gate /* ARGSUSED */ 18320Sstevel@tonic-gate int 18330Sstevel@tonic-gate mp_cpu_start(struct cpu *cp) 18340Sstevel@tonic-gate { 18350Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 18360Sstevel@tonic-gate return (0); 18370Sstevel@tonic-gate } 18380Sstevel@tonic-gate 18390Sstevel@tonic-gate /* 18400Sstevel@tonic-gate * Stop CPU on user request. 18410Sstevel@tonic-gate */ 18420Sstevel@tonic-gate int 18430Sstevel@tonic-gate mp_cpu_stop(struct cpu *cp) 18440Sstevel@tonic-gate { 18451389Sdmick extern int cbe_psm_timer_mode; 18460Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 18471389Sdmick 18485084Sjohnlev #ifdef __xpv 18495084Sjohnlev /* 18505084Sjohnlev * We can't offline vcpu0. 18515084Sjohnlev */ 18525084Sjohnlev if (cp->cpu_id == 0) 18535084Sjohnlev return (EBUSY); 18545084Sjohnlev #endif 18555084Sjohnlev 18561389Sdmick /* 18571389Sdmick * If TIMER_PERIODIC mode is used, CPU0 is the one running it; 18581389Sdmick * can't stop it. (This is true only for machines with no TSC.) 18591389Sdmick */ 18601389Sdmick 18611389Sdmick if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0)) 18625084Sjohnlev return (EBUSY); 18630Sstevel@tonic-gate 18640Sstevel@tonic-gate return (0); 18650Sstevel@tonic-gate } 18660Sstevel@tonic-gate 18670Sstevel@tonic-gate /* 18680Sstevel@tonic-gate * Take the specified CPU out of participation in interrupts. 18690Sstevel@tonic-gate */ 18700Sstevel@tonic-gate int 18710Sstevel@tonic-gate cpu_disable_intr(struct cpu *cp) 18720Sstevel@tonic-gate { 18730Sstevel@tonic-gate if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS) 18740Sstevel@tonic-gate return (EBUSY); 18750Sstevel@tonic-gate 18760Sstevel@tonic-gate cp->cpu_flags &= ~CPU_ENABLE; 18770Sstevel@tonic-gate return (0); 18780Sstevel@tonic-gate } 18790Sstevel@tonic-gate 18800Sstevel@tonic-gate /* 18810Sstevel@tonic-gate * Allow the specified CPU to participate in interrupts. 18820Sstevel@tonic-gate */ 18830Sstevel@tonic-gate void 18840Sstevel@tonic-gate cpu_enable_intr(struct cpu *cp) 18850Sstevel@tonic-gate { 18860Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 18870Sstevel@tonic-gate cp->cpu_flags |= CPU_ENABLE; 18880Sstevel@tonic-gate psm_enable_intr(cp->cpu_id); 18890Sstevel@tonic-gate } 18900Sstevel@tonic-gate 18910Sstevel@tonic-gate void 18920Sstevel@tonic-gate mp_cpu_faulted_enter(struct cpu *cp) 18931414Scindi { 1894*12004Sjiang.liu@intel.com #ifdef __xpv 1895*12004Sjiang.liu@intel.com _NOTE(ARGUNUSED(cp)); 1896*12004Sjiang.liu@intel.com #else 1897*12004Sjiang.liu@intel.com cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl; 18985254Sgavinm 18995254Sgavinm if (hdl != NULL) { 1900*12004Sjiang.liu@intel.com cmi_hdl_hold(hdl); 1901*12004Sjiang.liu@intel.com } else { 1902*12004Sjiang.liu@intel.com hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1903*12004Sjiang.liu@intel.com cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1904*12004Sjiang.liu@intel.com } 1905*12004Sjiang.liu@intel.com if (hdl != NULL) { 19065254Sgavinm cmi_faulted_enter(hdl); 19075254Sgavinm cmi_hdl_rele(hdl); 19085254Sgavinm } 19095254Sgavinm #endif 19101414Scindi } 19110Sstevel@tonic-gate 19120Sstevel@tonic-gate void 19130Sstevel@tonic-gate mp_cpu_faulted_exit(struct cpu *cp) 19141414Scindi { 1915*12004Sjiang.liu@intel.com #ifdef __xpv 1916*12004Sjiang.liu@intel.com _NOTE(ARGUNUSED(cp)); 1917*12004Sjiang.liu@intel.com #else 1918*12004Sjiang.liu@intel.com cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl; 19195254Sgavinm 19205254Sgavinm if (hdl != NULL) { 1921*12004Sjiang.liu@intel.com cmi_hdl_hold(hdl); 1922*12004Sjiang.liu@intel.com } else { 1923*12004Sjiang.liu@intel.com hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1924*12004Sjiang.liu@intel.com cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1925*12004Sjiang.liu@intel.com } 1926*12004Sjiang.liu@intel.com if (hdl != NULL) { 19275254Sgavinm cmi_faulted_exit(hdl); 19285254Sgavinm cmi_hdl_rele(hdl); 19295254Sgavinm } 19305254Sgavinm #endif 19311414Scindi } 19320Sstevel@tonic-gate 19330Sstevel@tonic-gate /* 19340Sstevel@tonic-gate * The following two routines are used as context operators on threads belonging 19350Sstevel@tonic-gate * to processes with a private LDT (see sysi86). Due to the rarity of such 19360Sstevel@tonic-gate * processes, these routines are currently written for best code readability and 19370Sstevel@tonic-gate * organization rather than speed. We could avoid checking x86_feature at every 19380Sstevel@tonic-gate * context switch by installing different context ops, depending on the 19390Sstevel@tonic-gate * x86_feature flags, at LDT creation time -- one for each combination of fast 19400Sstevel@tonic-gate * syscall feature flags. 19410Sstevel@tonic-gate */ 19420Sstevel@tonic-gate 19430Sstevel@tonic-gate /*ARGSUSED*/ 19440Sstevel@tonic-gate void 19450Sstevel@tonic-gate cpu_fast_syscall_disable(void *arg) 19460Sstevel@tonic-gate { 19473446Smrj if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) 19480Sstevel@tonic-gate cpu_sep_disable(); 19493446Smrj if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) 19500Sstevel@tonic-gate cpu_asysc_disable(); 19510Sstevel@tonic-gate } 19520Sstevel@tonic-gate 19530Sstevel@tonic-gate /*ARGSUSED*/ 19540Sstevel@tonic-gate void 19550Sstevel@tonic-gate cpu_fast_syscall_enable(void *arg) 19560Sstevel@tonic-gate { 19573446Smrj if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) 19580Sstevel@tonic-gate cpu_sep_enable(); 19593446Smrj if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) 19600Sstevel@tonic-gate cpu_asysc_enable(); 19610Sstevel@tonic-gate } 19620Sstevel@tonic-gate 19630Sstevel@tonic-gate static void 19640Sstevel@tonic-gate cpu_sep_enable(void) 19650Sstevel@tonic-gate { 19660Sstevel@tonic-gate ASSERT(x86_feature & X86_SEP); 19670Sstevel@tonic-gate ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 19680Sstevel@tonic-gate 1969770Skucharsk wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL); 19700Sstevel@tonic-gate } 19710Sstevel@tonic-gate 19720Sstevel@tonic-gate static void 19730Sstevel@tonic-gate cpu_sep_disable(void) 19740Sstevel@tonic-gate { 19750Sstevel@tonic-gate ASSERT(x86_feature & X86_SEP); 19760Sstevel@tonic-gate ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 19770Sstevel@tonic-gate 19780Sstevel@tonic-gate /* 19790Sstevel@tonic-gate * Setting the SYSENTER_CS_MSR register to 0 causes software executing 19800Sstevel@tonic-gate * the sysenter or sysexit instruction to trigger a #gp fault. 19810Sstevel@tonic-gate */ 19823446Smrj wrmsr(MSR_INTC_SEP_CS, 0); 19830Sstevel@tonic-gate } 19840Sstevel@tonic-gate 19850Sstevel@tonic-gate static void 19860Sstevel@tonic-gate cpu_asysc_enable(void) 19870Sstevel@tonic-gate { 19880Sstevel@tonic-gate ASSERT(x86_feature & X86_ASYSC); 19890Sstevel@tonic-gate ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 19900Sstevel@tonic-gate 1991770Skucharsk wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) | 1992770Skucharsk (uint64_t)(uintptr_t)AMD_EFER_SCE); 19930Sstevel@tonic-gate } 19940Sstevel@tonic-gate 19950Sstevel@tonic-gate static void 19960Sstevel@tonic-gate cpu_asysc_disable(void) 19970Sstevel@tonic-gate { 19980Sstevel@tonic-gate ASSERT(x86_feature & X86_ASYSC); 19990Sstevel@tonic-gate ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 20000Sstevel@tonic-gate 20010Sstevel@tonic-gate /* 20020Sstevel@tonic-gate * Turn off the SCE (syscall enable) bit in the EFER register. Software 20030Sstevel@tonic-gate * executing syscall or sysret with this bit off will incur a #ud trap. 20040Sstevel@tonic-gate */ 2005770Skucharsk wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) & 2006770Skucharsk ~((uint64_t)(uintptr_t)AMD_EFER_SCE)); 20070Sstevel@tonic-gate } 2008