13446Smrj /*
23446Smrj * CDDL HEADER START
33446Smrj *
43446Smrj * The contents of this file are subject to the terms of the
53446Smrj * Common Development and Distribution License (the "License").
63446Smrj * You may not use this file except in compliance with the License.
73446Smrj *
83446Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93446Smrj * or http://www.opensolaris.org/os/licensing.
103446Smrj * See the License for the specific language governing permissions
113446Smrj * and limitations under the License.
123446Smrj *
133446Smrj * When distributing Covered Code, include this CDDL HEADER in each
143446Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153446Smrj * If applicable, add the following below this CDDL HEADER, with the
163446Smrj * fields enclosed by brackets "[]" replaced with your own identifying
173446Smrj * information: Portions Copyright [yyyy] [name of copyright owner]
183446Smrj *
193446Smrj * CDDL HEADER END
203446Smrj */
213446Smrj /*
22*12826Skuriakose.kuruvilla@oracle.com * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
233446Smrj */
2412004Sjiang.liu@intel.com /*
2512004Sjiang.liu@intel.com * Copyright (c) 2010, Intel Corporation.
2612004Sjiang.liu@intel.com * All rights reserved.
2712004Sjiang.liu@intel.com */
283446Smrj
293446Smrj /*
303446Smrj * Welcome to the world of the "real mode platter".
313446Smrj * See also startup.c, mpcore.s and apic.c for related routines.
323446Smrj */
333446Smrj
343446Smrj #include <sys/types.h>
353446Smrj #include <sys/systm.h>
363446Smrj #include <sys/cpuvar.h>
3712004Sjiang.liu@intel.com #include <sys/cpu_module.h>
383446Smrj #include <sys/kmem.h>
393446Smrj #include <sys/archsystm.h>
403446Smrj #include <sys/machsystm.h>
413446Smrj #include <sys/controlregs.h>
423446Smrj #include <sys/x86_archext.h>
433446Smrj #include <sys/smp_impldefs.h>
443446Smrj #include <sys/sysmacros.h>
453446Smrj #include <sys/mach_mmu.h>
463446Smrj #include <sys/promif.h>
473446Smrj #include <sys/cpu.h>
4812004Sjiang.liu@intel.com #include <sys/cpu_event.h>
4912004Sjiang.liu@intel.com #include <sys/sunndi.h>
5012004Sjiang.liu@intel.com #include <sys/fs/dv_node.h>
514191Sjosephb #include <vm/hat_i86.h>
5212004Sjiang.liu@intel.com #include <vm/as.h>
5312004Sjiang.liu@intel.com
5412004Sjiang.liu@intel.com extern cpuset_t cpu_ready_set;
553446Smrj
5612004Sjiang.liu@intel.com extern int mp_start_cpu_common(cpu_t *cp, boolean_t boot);
5712004Sjiang.liu@intel.com extern void real_mode_start_cpu(void);
5812004Sjiang.liu@intel.com extern void real_mode_start_cpu_end(void);
5912004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage1(void);
6012004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage1_end(void);
6112004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage2(void);
6212004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage2_end(void);
635295Srandyf extern void *(*cpu_pause_func)(void *);
645295Srandyf
655295Srandyf void rmp_gdt_init(rm_platter_t *);
663446Smrj
673446Smrj /*
683446Smrj * Fill up the real mode platter to make it easy for real mode code to
693446Smrj * kick it off. This area should really be one passed by boot to kernel
703446Smrj * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
713446Smrj * have identical physical and virtual address in paged mode.
723446Smrj */
733446Smrj static ushort_t *warm_reset_vector = NULL;
743446Smrj
753446Smrj int
mach_cpucontext_init(void)763446Smrj mach_cpucontext_init(void)
773446Smrj {
783446Smrj ushort_t *vec;
7912004Sjiang.liu@intel.com ulong_t addr;
8012004Sjiang.liu@intel.com struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
813446Smrj
823446Smrj if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
833446Smrj sizeof (vec), PROT_READ | PROT_WRITE)))
843446Smrj return (-1);
8512004Sjiang.liu@intel.com
863446Smrj /*
873446Smrj * setup secondary cpu bios boot up vector
8812004Sjiang.liu@intel.com * Write page offset to 0x467 and page frame number to 0x469.
893446Smrj */
9012004Sjiang.liu@intel.com addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
9112004Sjiang.liu@intel.com vec[0] = (ushort_t)(addr & PAGEOFFSET);
9212004Sjiang.liu@intel.com vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
933446Smrj warm_reset_vector = vec;
943446Smrj
9512004Sjiang.liu@intel.com /* Map real mode platter into kas so kernel can access it. */
9612004Sjiang.liu@intel.com hat_devload(kas.a_hat,
9712004Sjiang.liu@intel.com (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
9812004Sjiang.liu@intel.com btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
9912004Sjiang.liu@intel.com HAT_LOAD_NOCONSIST);
10012004Sjiang.liu@intel.com
10112004Sjiang.liu@intel.com /* Copy CPU startup code to rm_platter if it's still during boot. */
10212004Sjiang.liu@intel.com if (!plat_dr_enabled()) {
10312004Sjiang.liu@intel.com ASSERT((size_t)real_mode_start_cpu_end -
10412004Sjiang.liu@intel.com (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
10512004Sjiang.liu@intel.com bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
10612004Sjiang.liu@intel.com (size_t)real_mode_start_cpu_end -
10712004Sjiang.liu@intel.com (size_t)real_mode_start_cpu);
10812004Sjiang.liu@intel.com }
1093446Smrj
1103446Smrj return (0);
1113446Smrj }
1123446Smrj
1133446Smrj void
mach_cpucontext_fini(void)1143446Smrj mach_cpucontext_fini(void)
1153446Smrj {
1163446Smrj if (warm_reset_vector)
1173446Smrj psm_unmap_phys((caddr_t)warm_reset_vector,
1183446Smrj sizeof (warm_reset_vector));
1193446Smrj hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
1203446Smrj HAT_UNLOAD);
1213446Smrj }
1223446Smrj
1233446Smrj #if defined(__amd64)
1243446Smrj extern void *long_mode_64(void);
1253446Smrj #endif /* __amd64 */
1263446Smrj
12712004Sjiang.liu@intel.com /*ARGSUSED*/
12812004Sjiang.liu@intel.com void
rmp_gdt_init(rm_platter_t * rm)12912004Sjiang.liu@intel.com rmp_gdt_init(rm_platter_t *rm)
1303446Smrj {
13112004Sjiang.liu@intel.com
13212004Sjiang.liu@intel.com #if defined(__amd64)
13312004Sjiang.liu@intel.com /* Use the kas address space for the CPU startup thread. */
13412004Sjiang.liu@intel.com if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
13512004Sjiang.liu@intel.com panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
13612004Sjiang.liu@intel.com "located above 4G in physical memory (@ 0x%lx)",
13712004Sjiang.liu@intel.com MAKECR3(kas.a_hat->hat_htable->ht_pfn));
13812004Sjiang.liu@intel.com
13912004Sjiang.liu@intel.com /*
14012004Sjiang.liu@intel.com * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
14112004Sjiang.liu@intel.com * by code in real_mode_start_cpu():
14212004Sjiang.liu@intel.com *
14312004Sjiang.liu@intel.com * GDT[0]: NULL selector
14412004Sjiang.liu@intel.com * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
14512004Sjiang.liu@intel.com *
14612004Sjiang.liu@intel.com * Clear the IDT as interrupts will be off and a limit of 0 will cause
14712004Sjiang.liu@intel.com * the CPU to triple fault and reset on an NMI, seemingly as reasonable
14812004Sjiang.liu@intel.com * a course of action as any other, though it may cause the entire
14912004Sjiang.liu@intel.com * platform to reset in some cases...
15012004Sjiang.liu@intel.com */
15112004Sjiang.liu@intel.com rm->rm_temp_gdt[0] = 0;
15212004Sjiang.liu@intel.com rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
15312004Sjiang.liu@intel.com
15412004Sjiang.liu@intel.com rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
15512004Sjiang.liu@intel.com rm->rm_temp_gdt_base = rm_platter_pa +
15612004Sjiang.liu@intel.com (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
15712004Sjiang.liu@intel.com rm->rm_temp_idt_lim = 0;
15812004Sjiang.liu@intel.com rm->rm_temp_idt_base = 0;
15912004Sjiang.liu@intel.com
16012004Sjiang.liu@intel.com /*
16112004Sjiang.liu@intel.com * Since the CPU needs to jump to protected mode using an identity
16212004Sjiang.liu@intel.com * mapped address, we need to calculate it here.
16312004Sjiang.liu@intel.com */
16412004Sjiang.liu@intel.com rm->rm_longmode64_addr = rm_platter_pa +
16512004Sjiang.liu@intel.com ((uint32_t)long_mode_64 - (uint32_t)real_mode_start_cpu);
16612004Sjiang.liu@intel.com #endif /* __amd64 */
16712004Sjiang.liu@intel.com }
16812004Sjiang.liu@intel.com
16912004Sjiang.liu@intel.com static void *
mach_cpucontext_alloc_tables(struct cpu * cp)17012004Sjiang.liu@intel.com mach_cpucontext_alloc_tables(struct cpu *cp)
17112004Sjiang.liu@intel.com {
17212004Sjiang.liu@intel.com struct tss *ntss;
1733446Smrj struct cpu_tables *ct;
1743446Smrj
1753446Smrj /*
1765460Sjosephb * Allocate space for stack, tss, gdt and idt. We round the size
1779637SRandy.Fishel@Sun.COM * allotted for cpu_tables up, so that the TSS is on a unique page.
1785460Sjosephb * This is more efficient when running in virtual machines.
1793446Smrj */
1805460Sjosephb ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
1815460Sjosephb if ((uintptr_t)ct & PAGEOFFSET)
18212004Sjiang.liu@intel.com panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
18312004Sjiang.liu@intel.com cp->cpu_id);
1843446Smrj
1853446Smrj ntss = cp->cpu_tss = &ct->ct_tss;
1863446Smrj
1873446Smrj #if defined(__amd64)
1883446Smrj
1893446Smrj /*
1903446Smrj * #DF (double fault).
1913446Smrj */
1923446Smrj ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)];
1933446Smrj
1943446Smrj #elif defined(__i386)
1953446Smrj
1963446Smrj ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
1973446Smrj (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)];
1983446Smrj
1993446Smrj ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
2003446Smrj
2013446Smrj ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
2023446Smrj
2033446Smrj ntss->tss_cs = KCS_SEL;
2043446Smrj ntss->tss_ds = ntss->tss_es = KDS_SEL;
2053446Smrj ntss->tss_fs = KFS_SEL;
2063446Smrj ntss->tss_gs = KGS_SEL;
2073446Smrj
2083446Smrj #endif /* __i386 */
2093446Smrj
2103446Smrj /*
2113446Smrj * Set I/O bit map offset equal to size of TSS segment limit
2123446Smrj * for no I/O permission map. This will cause all user I/O
2133446Smrj * instructions to generate #gp fault.
2143446Smrj */
2153446Smrj ntss->tss_bitmapbase = sizeof (*ntss);
2163446Smrj
2173446Smrj /*
2183446Smrj * Setup kernel tss.
2193446Smrj */
2203446Smrj set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
2215460Sjosephb sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
2223446Smrj
22312004Sjiang.liu@intel.com return (ct);
22412004Sjiang.liu@intel.com }
22512004Sjiang.liu@intel.com
22612004Sjiang.liu@intel.com void *
mach_cpucontext_xalloc(struct cpu * cp,int optype)22712004Sjiang.liu@intel.com mach_cpucontext_xalloc(struct cpu *cp, int optype)
22812004Sjiang.liu@intel.com {
22912004Sjiang.liu@intel.com size_t len;
23012004Sjiang.liu@intel.com struct cpu_tables *ct;
23112004Sjiang.liu@intel.com rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
23212004Sjiang.liu@intel.com static int cpu_halt_code_ready;
23312004Sjiang.liu@intel.com
23412004Sjiang.liu@intel.com if (optype == MACH_CPUCONTEXT_OP_STOP) {
23512004Sjiang.liu@intel.com ASSERT(plat_dr_enabled());
23612004Sjiang.liu@intel.com
23712004Sjiang.liu@intel.com /*
23812004Sjiang.liu@intel.com * The WARM_RESET_VECTOR has a limitation that the physical
23912004Sjiang.liu@intel.com * address written to it must be page-aligned. To work around
24012004Sjiang.liu@intel.com * this limitation, the CPU stop code has been splitted into
24112004Sjiang.liu@intel.com * two stages.
24212004Sjiang.liu@intel.com * The stage 2 code, which implements the real logic to halt
24312004Sjiang.liu@intel.com * CPUs, is copied to the rm_cpu_halt_code field in the real
24412004Sjiang.liu@intel.com * mode platter. The stage 1 code, which simply jumps to the
24512004Sjiang.liu@intel.com * stage 2 code in the rm_cpu_halt_code field, is copied to
24612004Sjiang.liu@intel.com * rm_code field in the real mode platter and it may be
24712004Sjiang.liu@intel.com * overwritten after the CPU has been stopped.
24812004Sjiang.liu@intel.com */
24912004Sjiang.liu@intel.com if (!cpu_halt_code_ready) {
25012004Sjiang.liu@intel.com /*
25112004Sjiang.liu@intel.com * The rm_cpu_halt_code field in the real mode platter
25212004Sjiang.liu@intel.com * is used by the CPU stop code only. So only copy the
25312004Sjiang.liu@intel.com * CPU stop stage 2 code into the rm_cpu_halt_code
25412004Sjiang.liu@intel.com * field on the first call.
25512004Sjiang.liu@intel.com */
25612004Sjiang.liu@intel.com len = (size_t)real_mode_stop_cpu_stage2_end -
25712004Sjiang.liu@intel.com (size_t)real_mode_stop_cpu_stage2;
25812004Sjiang.liu@intel.com ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
25912004Sjiang.liu@intel.com bcopy((caddr_t)real_mode_stop_cpu_stage2,
26012004Sjiang.liu@intel.com (caddr_t)rm->rm_cpu_halt_code, len);
26112004Sjiang.liu@intel.com cpu_halt_code_ready = 1;
26212004Sjiang.liu@intel.com }
26312004Sjiang.liu@intel.com
26412004Sjiang.liu@intel.com /*
26512004Sjiang.liu@intel.com * The rm_code field in the real mode platter is shared by
26612004Sjiang.liu@intel.com * the CPU start, CPU stop, CPR and fast reboot code. So copy
26712004Sjiang.liu@intel.com * the CPU stop stage 1 code into the rm_code field every time.
26812004Sjiang.liu@intel.com */
26912004Sjiang.liu@intel.com len = (size_t)real_mode_stop_cpu_stage1_end -
27012004Sjiang.liu@intel.com (size_t)real_mode_stop_cpu_stage1;
27112004Sjiang.liu@intel.com ASSERT(len <= RM_PLATTER_CODE_SIZE);
27212004Sjiang.liu@intel.com bcopy((caddr_t)real_mode_stop_cpu_stage1,
27312004Sjiang.liu@intel.com (caddr_t)rm->rm_code, len);
27412004Sjiang.liu@intel.com rm->rm_cpu_halted = 0;
27512004Sjiang.liu@intel.com
27612004Sjiang.liu@intel.com return (cp->cpu_m.mcpu_mach_ctx_ptr);
27712004Sjiang.liu@intel.com } else if (optype != MACH_CPUCONTEXT_OP_START) {
27812004Sjiang.liu@intel.com return (NULL);
27912004Sjiang.liu@intel.com }
28012004Sjiang.liu@intel.com
28112004Sjiang.liu@intel.com /*
28212004Sjiang.liu@intel.com * Only need to allocate tables when starting CPU.
28312004Sjiang.liu@intel.com * Tables allocated when starting CPU will be reused when stopping CPU.
28412004Sjiang.liu@intel.com */
28512004Sjiang.liu@intel.com ct = mach_cpucontext_alloc_tables(cp);
28612004Sjiang.liu@intel.com if (ct == NULL) {
28712004Sjiang.liu@intel.com return (NULL);
28812004Sjiang.liu@intel.com }
28912004Sjiang.liu@intel.com
29012004Sjiang.liu@intel.com /* Copy CPU startup code to rm_platter for CPU hot-add operations. */
29112004Sjiang.liu@intel.com if (plat_dr_enabled()) {
29212004Sjiang.liu@intel.com bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
29312004Sjiang.liu@intel.com (size_t)real_mode_start_cpu_end -
29412004Sjiang.liu@intel.com (size_t)real_mode_start_cpu);
29512004Sjiang.liu@intel.com }
29612004Sjiang.liu@intel.com
2973446Smrj /*
2983446Smrj * Now copy all that we've set up onto the real mode platter
2993446Smrj * for the real mode code to digest as part of starting the cpu.
3003446Smrj */
3013446Smrj rm->rm_idt_base = cp->cpu_idt;
3025460Sjosephb rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
3033446Smrj rm->rm_gdt_base = cp->cpu_gdt;
3045460Sjosephb rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
3053446Smrj
30612004Sjiang.liu@intel.com /*
30712004Sjiang.liu@intel.com * CPU needs to access kernel address space after powering on.
30812004Sjiang.liu@intel.com * When hot-adding CPU at runtime, directly use top level page table
30912004Sjiang.liu@intel.com * of kas other than the return value of getcr3(). getcr3() returns
31012004Sjiang.liu@intel.com * current process's top level page table, which may be different from
31112004Sjiang.liu@intel.com * the one of kas.
31212004Sjiang.liu@intel.com */
31312004Sjiang.liu@intel.com rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
3143446Smrj rm->rm_cpu = cp->cpu_id;
31512004Sjiang.liu@intel.com
31612004Sjiang.liu@intel.com /*
31712004Sjiang.liu@intel.com * For hot-adding CPU at runtime, Machine Check and Performance Counter
31812004Sjiang.liu@intel.com * should be disabled. They will be enabled on demand after CPU powers
31912004Sjiang.liu@intel.com * on successfully
32012004Sjiang.liu@intel.com */
3213446Smrj rm->rm_cr4 = getcr4();
32212004Sjiang.liu@intel.com rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
3233446Smrj
3245295Srandyf rmp_gdt_init(rm);
3255295Srandyf
3265295Srandyf return (ct);
3275295Srandyf }
3285295Srandyf
3295295Srandyf void
mach_cpucontext_xfree(struct cpu * cp,void * arg,int err,int optype)33012004Sjiang.liu@intel.com mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
3313446Smrj {
3323446Smrj struct cpu_tables *ct = arg;
3333446Smrj
3343446Smrj ASSERT(&ct->ct_tss == cp->cpu_tss);
33512004Sjiang.liu@intel.com if (optype == MACH_CPUCONTEXT_OP_START) {
33612004Sjiang.liu@intel.com switch (err) {
33712004Sjiang.liu@intel.com case 0:
33812004Sjiang.liu@intel.com /*
33912004Sjiang.liu@intel.com * Save pointer for reuse when stopping CPU.
34012004Sjiang.liu@intel.com */
34112004Sjiang.liu@intel.com cp->cpu_m.mcpu_mach_ctx_ptr = arg;
34212004Sjiang.liu@intel.com break;
34312004Sjiang.liu@intel.com case ETIMEDOUT:
34412004Sjiang.liu@intel.com /*
34512004Sjiang.liu@intel.com * The processor was poked, but failed to start before
34612004Sjiang.liu@intel.com * we gave up waiting for it. In case it starts later,
34712004Sjiang.liu@intel.com * don't free anything.
34812004Sjiang.liu@intel.com */
34912004Sjiang.liu@intel.com cp->cpu_m.mcpu_mach_ctx_ptr = arg;
35012004Sjiang.liu@intel.com break;
35112004Sjiang.liu@intel.com default:
35212004Sjiang.liu@intel.com /*
35312004Sjiang.liu@intel.com * Some other, passive, error occurred.
35412004Sjiang.liu@intel.com */
35512004Sjiang.liu@intel.com kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
35612004Sjiang.liu@intel.com cp->cpu_tss = NULL;
35712004Sjiang.liu@intel.com break;
35812004Sjiang.liu@intel.com }
35912004Sjiang.liu@intel.com } else if (optype == MACH_CPUCONTEXT_OP_STOP) {
36012004Sjiang.liu@intel.com switch (err) {
36112004Sjiang.liu@intel.com case 0:
36212004Sjiang.liu@intel.com /*
36312004Sjiang.liu@intel.com * Free resources allocated when starting CPU.
36412004Sjiang.liu@intel.com */
36512004Sjiang.liu@intel.com kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
36612004Sjiang.liu@intel.com cp->cpu_tss = NULL;
36712004Sjiang.liu@intel.com cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
36812004Sjiang.liu@intel.com break;
36912004Sjiang.liu@intel.com default:
37012004Sjiang.liu@intel.com /*
37112004Sjiang.liu@intel.com * Don't touch table pointer in case of failure.
37212004Sjiang.liu@intel.com */
37312004Sjiang.liu@intel.com break;
37412004Sjiang.liu@intel.com }
37512004Sjiang.liu@intel.com } else {
37612004Sjiang.liu@intel.com ASSERT(0);
37712004Sjiang.liu@intel.com }
37812004Sjiang.liu@intel.com }
3793446Smrj
38012004Sjiang.liu@intel.com void *
mach_cpucontext_alloc(struct cpu * cp)38112004Sjiang.liu@intel.com mach_cpucontext_alloc(struct cpu *cp)
38212004Sjiang.liu@intel.com {
38312004Sjiang.liu@intel.com return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
38412004Sjiang.liu@intel.com }
38512004Sjiang.liu@intel.com
38612004Sjiang.liu@intel.com void
mach_cpucontext_free(struct cpu * cp,void * arg,int err)38712004Sjiang.liu@intel.com mach_cpucontext_free(struct cpu *cp, void *arg, int err)
38812004Sjiang.liu@intel.com {
38912004Sjiang.liu@intel.com mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
3903446Smrj }
3913446Smrj
3923446Smrj /*
3933446Smrj * "Enter monitor." Called via cross-call from stop_other_cpus().
3943446Smrj */
3953446Smrj void
mach_cpu_halt(char * msg)3963446Smrj mach_cpu_halt(char *msg)
3973446Smrj {
3983446Smrj if (msg)
3993446Smrj prom_printf("%s\n", msg);
4003446Smrj
4013446Smrj /*CONSTANTCONDITION*/
4023446Smrj while (1)
4033446Smrj ;
4043446Smrj }
4053446Smrj
4063446Smrj void
mach_cpu_idle(void)4073446Smrj mach_cpu_idle(void)
4083446Smrj {
4093446Smrj i86_halt();
4103446Smrj }
4113446Smrj
4123446Smrj void
mach_cpu_pause(volatile char * safe)4133446Smrj mach_cpu_pause(volatile char *safe)
4143446Smrj {
4153446Smrj /*
4163446Smrj * This cpu is now safe.
4173446Smrj */
4183446Smrj *safe = PAUSE_WAIT;
4193446Smrj membar_enter(); /* make sure stores are flushed */
4203446Smrj
4213446Smrj /*
4223446Smrj * Now we wait. When we are allowed to continue, safe
4233446Smrj * will be set to PAUSE_IDLE.
4243446Smrj */
4253446Smrj while (*safe != PAUSE_IDLE)
4263446Smrj SMT_PAUSE();
4273446Smrj }
4283446Smrj
4293446Smrj /*
43012004Sjiang.liu@intel.com * Power on the target CPU.
4313446Smrj */
4323446Smrj int
mp_cpu_poweron(struct cpu * cp)4333446Smrj mp_cpu_poweron(struct cpu *cp)
4343446Smrj {
43512004Sjiang.liu@intel.com int error;
43612004Sjiang.liu@intel.com cpuset_t tempset;
43712004Sjiang.liu@intel.com processorid_t cpuid;
43812004Sjiang.liu@intel.com
43912004Sjiang.liu@intel.com ASSERT(cp != NULL);
44012004Sjiang.liu@intel.com cpuid = cp->cpu_id;
44112004Sjiang.liu@intel.com if (use_mp == 0 || plat_dr_support_cpu() == 0) {
44212004Sjiang.liu@intel.com return (ENOTSUP);
44312004Sjiang.liu@intel.com } else if (cpuid < 0 || cpuid >= max_ncpus) {
44412004Sjiang.liu@intel.com return (EINVAL);
44512004Sjiang.liu@intel.com }
44612004Sjiang.liu@intel.com
44712004Sjiang.liu@intel.com /*
44812004Sjiang.liu@intel.com * The currrent x86 implementaiton of mp_cpu_configure() and
44912004Sjiang.liu@intel.com * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
45012004Sjiang.liu@intel.com * be called once after calling mp_cpu_configure() for a specific CPU.
45112004Sjiang.liu@intel.com * It's because mp_cpu_poweron() will destroy data structure created
45212004Sjiang.liu@intel.com * by mp_cpu_configure(). So reject the request if the CPU has already
45312004Sjiang.liu@intel.com * been powered on once after calling mp_cpu_configure().
45412004Sjiang.liu@intel.com * This limitaiton only affects the p_online syscall and the DR driver
45512004Sjiang.liu@intel.com * won't be affected because the DR driver always invoke public CPU
45612004Sjiang.liu@intel.com * management interfaces in the predefined order:
45712004Sjiang.liu@intel.com * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
45812004Sjiang.liu@intel.com */
45912004Sjiang.liu@intel.com if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
46012004Sjiang.liu@intel.com return (ENOTSUP);
46112004Sjiang.liu@intel.com }
46212004Sjiang.liu@intel.com
46312004Sjiang.liu@intel.com /*
46412004Sjiang.liu@intel.com * Check if there's at least a Mbyte of kmem available
46512004Sjiang.liu@intel.com * before attempting to start the cpu.
46612004Sjiang.liu@intel.com */
46712004Sjiang.liu@intel.com if (kmem_avail() < 1024 * 1024) {
46812004Sjiang.liu@intel.com /*
46912004Sjiang.liu@intel.com * Kick off a reap in case that helps us with
47012004Sjiang.liu@intel.com * later attempts ..
47112004Sjiang.liu@intel.com */
47212004Sjiang.liu@intel.com kmem_reap();
47312004Sjiang.liu@intel.com return (ENOMEM);
47412004Sjiang.liu@intel.com }
47512004Sjiang.liu@intel.com
47612004Sjiang.liu@intel.com affinity_set(CPU->cpu_id);
47712004Sjiang.liu@intel.com
47812004Sjiang.liu@intel.com /*
47912004Sjiang.liu@intel.com * Start the target CPU. No need to call mach_cpucontext_fini()
48012004Sjiang.liu@intel.com * if mach_cpucontext_init() fails.
48112004Sjiang.liu@intel.com */
48212004Sjiang.liu@intel.com if ((error = mach_cpucontext_init()) == 0) {
48312004Sjiang.liu@intel.com error = mp_start_cpu_common(cp, B_FALSE);
48412004Sjiang.liu@intel.com mach_cpucontext_fini();
48512004Sjiang.liu@intel.com }
48612004Sjiang.liu@intel.com if (error != 0) {
48712004Sjiang.liu@intel.com affinity_clear();
48812004Sjiang.liu@intel.com return (error);
48912004Sjiang.liu@intel.com }
49012004Sjiang.liu@intel.com
49112004Sjiang.liu@intel.com /* Wait for the target cpu to reach READY state. */
49212004Sjiang.liu@intel.com tempset = cpu_ready_set;
49312004Sjiang.liu@intel.com while (!CPU_IN_SET(tempset, cpuid)) {
49412004Sjiang.liu@intel.com delay(1);
49512004Sjiang.liu@intel.com tempset = *((volatile cpuset_t *)&cpu_ready_set);
49612004Sjiang.liu@intel.com }
49712004Sjiang.liu@intel.com
49812004Sjiang.liu@intel.com /* Mark the target CPU as available for mp operation. */
49912004Sjiang.liu@intel.com CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
50012004Sjiang.liu@intel.com
50112004Sjiang.liu@intel.com /* Free the space allocated to hold the microcode file */
50212004Sjiang.liu@intel.com ucode_cleanup();
50312004Sjiang.liu@intel.com
50412004Sjiang.liu@intel.com affinity_clear();
50512004Sjiang.liu@intel.com
50612004Sjiang.liu@intel.com return (0);
50712004Sjiang.liu@intel.com }
50812004Sjiang.liu@intel.com
50912004Sjiang.liu@intel.com #define MP_CPU_DETACH_MAX_TRIES 5
51012004Sjiang.liu@intel.com #define MP_CPU_DETACH_DELAY 100
51112004Sjiang.liu@intel.com
51212004Sjiang.liu@intel.com static int
mp_cpu_detach_driver(dev_info_t * dip)51312004Sjiang.liu@intel.com mp_cpu_detach_driver(dev_info_t *dip)
51412004Sjiang.liu@intel.com {
51512004Sjiang.liu@intel.com int i;
51612004Sjiang.liu@intel.com int rv = EBUSY;
51712004Sjiang.liu@intel.com dev_info_t *pdip;
51812004Sjiang.liu@intel.com
51912004Sjiang.liu@intel.com pdip = ddi_get_parent(dip);
52012004Sjiang.liu@intel.com ASSERT(pdip != NULL);
52112004Sjiang.liu@intel.com /*
52212004Sjiang.liu@intel.com * Check if caller holds pdip busy - can cause deadlocks in
52312004Sjiang.liu@intel.com * e_ddi_branch_unconfigure(), which calls devfs_clean().
52412004Sjiang.liu@intel.com */
52512004Sjiang.liu@intel.com if (DEVI_BUSY_OWNED(pdip)) {
52612004Sjiang.liu@intel.com return (EDEADLOCK);
52712004Sjiang.liu@intel.com }
52812004Sjiang.liu@intel.com
52912004Sjiang.liu@intel.com for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
53012004Sjiang.liu@intel.com if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
53112004Sjiang.liu@intel.com rv = 0;
53212004Sjiang.liu@intel.com break;
53312004Sjiang.liu@intel.com }
53412004Sjiang.liu@intel.com DELAY(MP_CPU_DETACH_DELAY);
53512004Sjiang.liu@intel.com }
53612004Sjiang.liu@intel.com
53712004Sjiang.liu@intel.com return (rv);
5383446Smrj }
5393446Smrj
5403446Smrj /*
54112004Sjiang.liu@intel.com * Power off the target CPU.
54212004Sjiang.liu@intel.com * Note: cpu_lock will be released and then reacquired.
5433446Smrj */
5443446Smrj int
mp_cpu_poweroff(struct cpu * cp)5453446Smrj mp_cpu_poweroff(struct cpu *cp)
5463446Smrj {
54712004Sjiang.liu@intel.com int rv = 0;
54812004Sjiang.liu@intel.com void *ctx;
54912004Sjiang.liu@intel.com dev_info_t *dip = NULL;
55012004Sjiang.liu@intel.com rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
55112004Sjiang.liu@intel.com extern void cpupm_start(cpu_t *);
55212004Sjiang.liu@intel.com extern void cpupm_stop(cpu_t *);
55312004Sjiang.liu@intel.com
55412004Sjiang.liu@intel.com ASSERT(cp != NULL);
55512004Sjiang.liu@intel.com ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
55612004Sjiang.liu@intel.com ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
55712004Sjiang.liu@intel.com
55812004Sjiang.liu@intel.com if (use_mp == 0 || plat_dr_support_cpu() == 0) {
55912004Sjiang.liu@intel.com return (ENOTSUP);
56012004Sjiang.liu@intel.com }
56112004Sjiang.liu@intel.com /*
56212004Sjiang.liu@intel.com * There is no support for powering off cpu0 yet.
56312004Sjiang.liu@intel.com * There are many pieces of code which have a hard dependency on cpu0.
56412004Sjiang.liu@intel.com */
56512004Sjiang.liu@intel.com if (cp->cpu_id == 0) {
56612004Sjiang.liu@intel.com return (ENOTSUP);
56712004Sjiang.liu@intel.com };
56812004Sjiang.liu@intel.com
56912004Sjiang.liu@intel.com if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
57012004Sjiang.liu@intel.com return (ENXIO);
57112004Sjiang.liu@intel.com }
57212004Sjiang.liu@intel.com ASSERT(dip != NULL);
57312004Sjiang.liu@intel.com if (mp_cpu_detach_driver(dip) != 0) {
57412004Sjiang.liu@intel.com rv = EBUSY;
57512004Sjiang.liu@intel.com goto out_online;
57612004Sjiang.liu@intel.com }
57712004Sjiang.liu@intel.com
57812004Sjiang.liu@intel.com /* Allocate CPU context for stopping */
57912004Sjiang.liu@intel.com if (mach_cpucontext_init() != 0) {
58012004Sjiang.liu@intel.com rv = ENXIO;
58112004Sjiang.liu@intel.com goto out_online;
58212004Sjiang.liu@intel.com }
58312004Sjiang.liu@intel.com ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
58412004Sjiang.liu@intel.com if (ctx == NULL) {
58512004Sjiang.liu@intel.com rv = ENXIO;
58612004Sjiang.liu@intel.com goto out_context_fini;
58712004Sjiang.liu@intel.com }
58812004Sjiang.liu@intel.com
58912004Sjiang.liu@intel.com cpupm_stop(cp);
59012004Sjiang.liu@intel.com cpu_event_fini_cpu(cp);
59112004Sjiang.liu@intel.com
59212004Sjiang.liu@intel.com if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
59312004Sjiang.liu@intel.com cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
59412004Sjiang.liu@intel.com cp->cpu_m.mcpu_cmi_hdl = NULL;
59512004Sjiang.liu@intel.com }
59612004Sjiang.liu@intel.com
59712004Sjiang.liu@intel.com rv = mach_cpu_stop(cp, ctx);
59812004Sjiang.liu@intel.com if (rv != 0) {
59912004Sjiang.liu@intel.com goto out_enable_cmi;
60012004Sjiang.liu@intel.com }
60112004Sjiang.liu@intel.com
60212004Sjiang.liu@intel.com /* Wait until the target CPU has been halted. */
60312004Sjiang.liu@intel.com while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
60412004Sjiang.liu@intel.com delay(1);
60512004Sjiang.liu@intel.com }
60612004Sjiang.liu@intel.com rm->rm_cpu_halted = 0xffff;
60712004Sjiang.liu@intel.com
60812004Sjiang.liu@intel.com /* CPU_READY has been cleared by mach_cpu_stop. */
60912004Sjiang.liu@intel.com ASSERT((cp->cpu_flags & CPU_READY) == 0);
61012004Sjiang.liu@intel.com ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
61112004Sjiang.liu@intel.com cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
61212004Sjiang.liu@intel.com CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
61312004Sjiang.liu@intel.com
61412004Sjiang.liu@intel.com mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
61512004Sjiang.liu@intel.com mach_cpucontext_fini();
61612004Sjiang.liu@intel.com
61712004Sjiang.liu@intel.com return (0);
61812004Sjiang.liu@intel.com
61912004Sjiang.liu@intel.com out_enable_cmi:
62012004Sjiang.liu@intel.com {
62112004Sjiang.liu@intel.com cmi_hdl_t hdl;
62212004Sjiang.liu@intel.com
62312004Sjiang.liu@intel.com if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
62412004Sjiang.liu@intel.com cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
625*12826Skuriakose.kuruvilla@oracle.com if (is_x86_feature(x86_featureset, X86FSET_MCA))
62612004Sjiang.liu@intel.com cmi_mca_init(hdl);
62712004Sjiang.liu@intel.com cp->cpu_m.mcpu_cmi_hdl = hdl;
62812004Sjiang.liu@intel.com }
62912004Sjiang.liu@intel.com }
63012004Sjiang.liu@intel.com cpu_event_init_cpu(cp);
63112004Sjiang.liu@intel.com cpupm_start(cp);
63212004Sjiang.liu@intel.com mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
63312004Sjiang.liu@intel.com
63412004Sjiang.liu@intel.com out_context_fini:
63512004Sjiang.liu@intel.com mach_cpucontext_fini();
63612004Sjiang.liu@intel.com
63712004Sjiang.liu@intel.com out_online:
63812004Sjiang.liu@intel.com (void) e_ddi_branch_configure(dip, NULL, 0);
63912004Sjiang.liu@intel.com
64012004Sjiang.liu@intel.com if (rv != EAGAIN && rv != ETIME) {
64112004Sjiang.liu@intel.com rv = ENXIO;
64212004Sjiang.liu@intel.com }
64312004Sjiang.liu@intel.com
64412004Sjiang.liu@intel.com return (rv);
6453446Smrj }
6465529Ssmaybe
6475529Ssmaybe /*
6485529Ssmaybe * Return vcpu state, since this could be a virtual environment that we
6495529Ssmaybe * are unaware of, return "unknown".
6505529Ssmaybe */
6515529Ssmaybe /* ARGSUSED */
6525529Ssmaybe int
vcpu_on_pcpu(processorid_t cpu)6535529Ssmaybe vcpu_on_pcpu(processorid_t cpu)
6545529Ssmaybe {
6555529Ssmaybe return (VCPU_STATE_UNKNOWN);
6565529Ssmaybe }
657