xref: /onnv-gate/usr/src/uts/i86pc/os/mp_pc.c (revision 12826:fca99d9e3f2f)
13446Smrj /*
23446Smrj  * CDDL HEADER START
33446Smrj  *
43446Smrj  * The contents of this file are subject to the terms of the
53446Smrj  * Common Development and Distribution License (the "License").
63446Smrj  * You may not use this file except in compliance with the License.
73446Smrj  *
83446Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93446Smrj  * or http://www.opensolaris.org/os/licensing.
103446Smrj  * See the License for the specific language governing permissions
113446Smrj  * and limitations under the License.
123446Smrj  *
133446Smrj  * When distributing Covered Code, include this CDDL HEADER in each
143446Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153446Smrj  * If applicable, add the following below this CDDL HEADER, with the
163446Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
173446Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
183446Smrj  *
193446Smrj  * CDDL HEADER END
203446Smrj  */
213446Smrj /*
22*12826Skuriakose.kuruvilla@oracle.com  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
233446Smrj  */
2412004Sjiang.liu@intel.com /*
2512004Sjiang.liu@intel.com  * Copyright (c) 2010, Intel Corporation.
2612004Sjiang.liu@intel.com  * All rights reserved.
2712004Sjiang.liu@intel.com  */
283446Smrj 
293446Smrj /*
303446Smrj  * Welcome to the world of the "real mode platter".
313446Smrj  * See also startup.c, mpcore.s and apic.c for related routines.
323446Smrj  */
333446Smrj 
343446Smrj #include <sys/types.h>
353446Smrj #include <sys/systm.h>
363446Smrj #include <sys/cpuvar.h>
3712004Sjiang.liu@intel.com #include <sys/cpu_module.h>
383446Smrj #include <sys/kmem.h>
393446Smrj #include <sys/archsystm.h>
403446Smrj #include <sys/machsystm.h>
413446Smrj #include <sys/controlregs.h>
423446Smrj #include <sys/x86_archext.h>
433446Smrj #include <sys/smp_impldefs.h>
443446Smrj #include <sys/sysmacros.h>
453446Smrj #include <sys/mach_mmu.h>
463446Smrj #include <sys/promif.h>
473446Smrj #include <sys/cpu.h>
4812004Sjiang.liu@intel.com #include <sys/cpu_event.h>
4912004Sjiang.liu@intel.com #include <sys/sunndi.h>
5012004Sjiang.liu@intel.com #include <sys/fs/dv_node.h>
514191Sjosephb #include <vm/hat_i86.h>
5212004Sjiang.liu@intel.com #include <vm/as.h>
5312004Sjiang.liu@intel.com 
5412004Sjiang.liu@intel.com extern cpuset_t cpu_ready_set;
553446Smrj 
5612004Sjiang.liu@intel.com extern int  mp_start_cpu_common(cpu_t *cp, boolean_t boot);
5712004Sjiang.liu@intel.com extern void real_mode_start_cpu(void);
5812004Sjiang.liu@intel.com extern void real_mode_start_cpu_end(void);
5912004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage1(void);
6012004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage1_end(void);
6112004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage2(void);
6212004Sjiang.liu@intel.com extern void real_mode_stop_cpu_stage2_end(void);
635295Srandyf extern void *(*cpu_pause_func)(void *);
645295Srandyf 
655295Srandyf void rmp_gdt_init(rm_platter_t *);
663446Smrj 
673446Smrj /*
683446Smrj  * Fill up the real mode platter to make it easy for real mode code to
693446Smrj  * kick it off. This area should really be one passed by boot to kernel
703446Smrj  * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
713446Smrj  * have identical physical and virtual address in paged mode.
723446Smrj  */
733446Smrj static ushort_t *warm_reset_vector = NULL;
743446Smrj 
753446Smrj int
mach_cpucontext_init(void)763446Smrj mach_cpucontext_init(void)
773446Smrj {
783446Smrj 	ushort_t *vec;
7912004Sjiang.liu@intel.com 	ulong_t addr;
8012004Sjiang.liu@intel.com 	struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
813446Smrj 
823446Smrj 	if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
833446Smrj 	    sizeof (vec), PROT_READ | PROT_WRITE)))
843446Smrj 		return (-1);
8512004Sjiang.liu@intel.com 
863446Smrj 	/*
873446Smrj 	 * setup secondary cpu bios boot up vector
8812004Sjiang.liu@intel.com 	 * Write page offset to 0x467 and page frame number to 0x469.
893446Smrj 	 */
9012004Sjiang.liu@intel.com 	addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
9112004Sjiang.liu@intel.com 	vec[0] = (ushort_t)(addr & PAGEOFFSET);
9212004Sjiang.liu@intel.com 	vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
933446Smrj 	warm_reset_vector = vec;
943446Smrj 
9512004Sjiang.liu@intel.com 	/* Map real mode platter into kas so kernel can access it. */
9612004Sjiang.liu@intel.com 	hat_devload(kas.a_hat,
9712004Sjiang.liu@intel.com 	    (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
9812004Sjiang.liu@intel.com 	    btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
9912004Sjiang.liu@intel.com 	    HAT_LOAD_NOCONSIST);
10012004Sjiang.liu@intel.com 
10112004Sjiang.liu@intel.com 	/* Copy CPU startup code to rm_platter if it's still during boot. */
10212004Sjiang.liu@intel.com 	if (!plat_dr_enabled()) {
10312004Sjiang.liu@intel.com 		ASSERT((size_t)real_mode_start_cpu_end -
10412004Sjiang.liu@intel.com 		    (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
10512004Sjiang.liu@intel.com 		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
10612004Sjiang.liu@intel.com 		    (size_t)real_mode_start_cpu_end -
10712004Sjiang.liu@intel.com 		    (size_t)real_mode_start_cpu);
10812004Sjiang.liu@intel.com 	}
1093446Smrj 
1103446Smrj 	return (0);
1113446Smrj }
1123446Smrj 
1133446Smrj void
mach_cpucontext_fini(void)1143446Smrj mach_cpucontext_fini(void)
1153446Smrj {
1163446Smrj 	if (warm_reset_vector)
1173446Smrj 		psm_unmap_phys((caddr_t)warm_reset_vector,
1183446Smrj 		    sizeof (warm_reset_vector));
1193446Smrj 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
1203446Smrj 	    HAT_UNLOAD);
1213446Smrj }
1223446Smrj 
1233446Smrj #if defined(__amd64)
1243446Smrj extern void *long_mode_64(void);
1253446Smrj #endif	/* __amd64 */
1263446Smrj 
12712004Sjiang.liu@intel.com /*ARGSUSED*/
12812004Sjiang.liu@intel.com void
rmp_gdt_init(rm_platter_t * rm)12912004Sjiang.liu@intel.com rmp_gdt_init(rm_platter_t *rm)
1303446Smrj {
13112004Sjiang.liu@intel.com 
13212004Sjiang.liu@intel.com #if defined(__amd64)
13312004Sjiang.liu@intel.com 	/* Use the kas address space for the CPU startup thread. */
13412004Sjiang.liu@intel.com 	if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
13512004Sjiang.liu@intel.com 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
13612004Sjiang.liu@intel.com 		    "located above 4G in physical memory (@ 0x%lx)",
13712004Sjiang.liu@intel.com 		    MAKECR3(kas.a_hat->hat_htable->ht_pfn));
13812004Sjiang.liu@intel.com 
13912004Sjiang.liu@intel.com 	/*
14012004Sjiang.liu@intel.com 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
14112004Sjiang.liu@intel.com 	 * by code in real_mode_start_cpu():
14212004Sjiang.liu@intel.com 	 *
14312004Sjiang.liu@intel.com 	 * GDT[0]:  NULL selector
14412004Sjiang.liu@intel.com 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
14512004Sjiang.liu@intel.com 	 *
14612004Sjiang.liu@intel.com 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
14712004Sjiang.liu@intel.com 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
14812004Sjiang.liu@intel.com 	 * a course of action as any other, though it may cause the entire
14912004Sjiang.liu@intel.com 	 * platform to reset in some cases...
15012004Sjiang.liu@intel.com 	 */
15112004Sjiang.liu@intel.com 	rm->rm_temp_gdt[0] = 0;
15212004Sjiang.liu@intel.com 	rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
15312004Sjiang.liu@intel.com 
15412004Sjiang.liu@intel.com 	rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
15512004Sjiang.liu@intel.com 	rm->rm_temp_gdt_base = rm_platter_pa +
15612004Sjiang.liu@intel.com 	    (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
15712004Sjiang.liu@intel.com 	rm->rm_temp_idt_lim = 0;
15812004Sjiang.liu@intel.com 	rm->rm_temp_idt_base = 0;
15912004Sjiang.liu@intel.com 
16012004Sjiang.liu@intel.com 	/*
16112004Sjiang.liu@intel.com 	 * Since the CPU needs to jump to protected mode using an identity
16212004Sjiang.liu@intel.com 	 * mapped address, we need to calculate it here.
16312004Sjiang.liu@intel.com 	 */
16412004Sjiang.liu@intel.com 	rm->rm_longmode64_addr = rm_platter_pa +
16512004Sjiang.liu@intel.com 	    ((uint32_t)long_mode_64 - (uint32_t)real_mode_start_cpu);
16612004Sjiang.liu@intel.com #endif	/* __amd64 */
16712004Sjiang.liu@intel.com }
16812004Sjiang.liu@intel.com 
16912004Sjiang.liu@intel.com static void *
mach_cpucontext_alloc_tables(struct cpu * cp)17012004Sjiang.liu@intel.com mach_cpucontext_alloc_tables(struct cpu *cp)
17112004Sjiang.liu@intel.com {
17212004Sjiang.liu@intel.com 	struct tss *ntss;
1733446Smrj 	struct cpu_tables *ct;
1743446Smrj 
1753446Smrj 	/*
1765460Sjosephb 	 * Allocate space for stack, tss, gdt and idt. We round the size
1779637SRandy.Fishel@Sun.COM 	 * allotted for cpu_tables up, so that the TSS is on a unique page.
1785460Sjosephb 	 * This is more efficient when running in virtual machines.
1793446Smrj 	 */
1805460Sjosephb 	ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
1815460Sjosephb 	if ((uintptr_t)ct & PAGEOFFSET)
18212004Sjiang.liu@intel.com 		panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
18312004Sjiang.liu@intel.com 		    cp->cpu_id);
1843446Smrj 
1853446Smrj 	ntss = cp->cpu_tss = &ct->ct_tss;
1863446Smrj 
1873446Smrj #if defined(__amd64)
1883446Smrj 
1893446Smrj 	/*
1903446Smrj 	 * #DF (double fault).
1913446Smrj 	 */
1923446Smrj 	ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)];
1933446Smrj 
1943446Smrj #elif defined(__i386)
1953446Smrj 
1963446Smrj 	ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
1973446Smrj 	    (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)];
1983446Smrj 
1993446Smrj 	ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
2003446Smrj 
2013446Smrj 	ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
2023446Smrj 
2033446Smrj 	ntss->tss_cs = KCS_SEL;
2043446Smrj 	ntss->tss_ds = ntss->tss_es = KDS_SEL;
2053446Smrj 	ntss->tss_fs = KFS_SEL;
2063446Smrj 	ntss->tss_gs = KGS_SEL;
2073446Smrj 
2083446Smrj #endif	/* __i386 */
2093446Smrj 
2103446Smrj 	/*
2113446Smrj 	 * Set I/O bit map offset equal to size of TSS segment limit
2123446Smrj 	 * for no I/O permission map. This will cause all user I/O
2133446Smrj 	 * instructions to generate #gp fault.
2143446Smrj 	 */
2153446Smrj 	ntss->tss_bitmapbase = sizeof (*ntss);
2163446Smrj 
2173446Smrj 	/*
2183446Smrj 	 * Setup kernel tss.
2193446Smrj 	 */
2203446Smrj 	set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
2215460Sjosephb 	    sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
2223446Smrj 
22312004Sjiang.liu@intel.com 	return (ct);
22412004Sjiang.liu@intel.com }
22512004Sjiang.liu@intel.com 
22612004Sjiang.liu@intel.com void *
mach_cpucontext_xalloc(struct cpu * cp,int optype)22712004Sjiang.liu@intel.com mach_cpucontext_xalloc(struct cpu *cp, int optype)
22812004Sjiang.liu@intel.com {
22912004Sjiang.liu@intel.com 	size_t len;
23012004Sjiang.liu@intel.com 	struct cpu_tables *ct;
23112004Sjiang.liu@intel.com 	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
23212004Sjiang.liu@intel.com 	static int cpu_halt_code_ready;
23312004Sjiang.liu@intel.com 
23412004Sjiang.liu@intel.com 	if (optype == MACH_CPUCONTEXT_OP_STOP) {
23512004Sjiang.liu@intel.com 		ASSERT(plat_dr_enabled());
23612004Sjiang.liu@intel.com 
23712004Sjiang.liu@intel.com 		/*
23812004Sjiang.liu@intel.com 		 * The WARM_RESET_VECTOR has a limitation that the physical
23912004Sjiang.liu@intel.com 		 * address written to it must be page-aligned. To work around
24012004Sjiang.liu@intel.com 		 * this limitation, the CPU stop code has been splitted into
24112004Sjiang.liu@intel.com 		 * two stages.
24212004Sjiang.liu@intel.com 		 * The stage 2 code, which implements the real logic to halt
24312004Sjiang.liu@intel.com 		 * CPUs, is copied to the rm_cpu_halt_code field in the real
24412004Sjiang.liu@intel.com 		 * mode platter. The stage 1 code, which simply jumps to the
24512004Sjiang.liu@intel.com 		 * stage 2 code in the rm_cpu_halt_code field, is copied to
24612004Sjiang.liu@intel.com 		 * rm_code field in the real mode platter and it may be
24712004Sjiang.liu@intel.com 		 * overwritten after the CPU has been stopped.
24812004Sjiang.liu@intel.com 		 */
24912004Sjiang.liu@intel.com 		if (!cpu_halt_code_ready) {
25012004Sjiang.liu@intel.com 			/*
25112004Sjiang.liu@intel.com 			 * The rm_cpu_halt_code field in the real mode platter
25212004Sjiang.liu@intel.com 			 * is used by the CPU stop code only. So only copy the
25312004Sjiang.liu@intel.com 			 * CPU stop stage 2 code into the rm_cpu_halt_code
25412004Sjiang.liu@intel.com 			 * field on the first call.
25512004Sjiang.liu@intel.com 			 */
25612004Sjiang.liu@intel.com 			len = (size_t)real_mode_stop_cpu_stage2_end -
25712004Sjiang.liu@intel.com 			    (size_t)real_mode_stop_cpu_stage2;
25812004Sjiang.liu@intel.com 			ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
25912004Sjiang.liu@intel.com 			bcopy((caddr_t)real_mode_stop_cpu_stage2,
26012004Sjiang.liu@intel.com 			    (caddr_t)rm->rm_cpu_halt_code, len);
26112004Sjiang.liu@intel.com 			cpu_halt_code_ready = 1;
26212004Sjiang.liu@intel.com 		}
26312004Sjiang.liu@intel.com 
26412004Sjiang.liu@intel.com 		/*
26512004Sjiang.liu@intel.com 		 * The rm_code field in the real mode platter is shared by
26612004Sjiang.liu@intel.com 		 * the CPU start, CPU stop, CPR and fast reboot code. So copy
26712004Sjiang.liu@intel.com 		 * the CPU stop stage 1 code into the rm_code field every time.
26812004Sjiang.liu@intel.com 		 */
26912004Sjiang.liu@intel.com 		len = (size_t)real_mode_stop_cpu_stage1_end -
27012004Sjiang.liu@intel.com 		    (size_t)real_mode_stop_cpu_stage1;
27112004Sjiang.liu@intel.com 		ASSERT(len <= RM_PLATTER_CODE_SIZE);
27212004Sjiang.liu@intel.com 		bcopy((caddr_t)real_mode_stop_cpu_stage1,
27312004Sjiang.liu@intel.com 		    (caddr_t)rm->rm_code, len);
27412004Sjiang.liu@intel.com 		rm->rm_cpu_halted = 0;
27512004Sjiang.liu@intel.com 
27612004Sjiang.liu@intel.com 		return (cp->cpu_m.mcpu_mach_ctx_ptr);
27712004Sjiang.liu@intel.com 	} else if (optype != MACH_CPUCONTEXT_OP_START) {
27812004Sjiang.liu@intel.com 		return (NULL);
27912004Sjiang.liu@intel.com 	}
28012004Sjiang.liu@intel.com 
28112004Sjiang.liu@intel.com 	/*
28212004Sjiang.liu@intel.com 	 * Only need to allocate tables when starting CPU.
28312004Sjiang.liu@intel.com 	 * Tables allocated when starting CPU will be reused when stopping CPU.
28412004Sjiang.liu@intel.com 	 */
28512004Sjiang.liu@intel.com 	ct = mach_cpucontext_alloc_tables(cp);
28612004Sjiang.liu@intel.com 	if (ct == NULL) {
28712004Sjiang.liu@intel.com 		return (NULL);
28812004Sjiang.liu@intel.com 	}
28912004Sjiang.liu@intel.com 
29012004Sjiang.liu@intel.com 	/* Copy CPU startup code to rm_platter for CPU hot-add operations. */
29112004Sjiang.liu@intel.com 	if (plat_dr_enabled()) {
29212004Sjiang.liu@intel.com 		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
29312004Sjiang.liu@intel.com 		    (size_t)real_mode_start_cpu_end -
29412004Sjiang.liu@intel.com 		    (size_t)real_mode_start_cpu);
29512004Sjiang.liu@intel.com 	}
29612004Sjiang.liu@intel.com 
2973446Smrj 	/*
2983446Smrj 	 * Now copy all that we've set up onto the real mode platter
2993446Smrj 	 * for the real mode code to digest as part of starting the cpu.
3003446Smrj 	 */
3013446Smrj 	rm->rm_idt_base = cp->cpu_idt;
3025460Sjosephb 	rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
3033446Smrj 	rm->rm_gdt_base = cp->cpu_gdt;
3045460Sjosephb 	rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
3053446Smrj 
30612004Sjiang.liu@intel.com 	/*
30712004Sjiang.liu@intel.com 	 * CPU needs to access kernel address space after powering on.
30812004Sjiang.liu@intel.com 	 * When hot-adding CPU at runtime, directly use top level page table
30912004Sjiang.liu@intel.com 	 * of kas other than the return value of getcr3(). getcr3() returns
31012004Sjiang.liu@intel.com 	 * current process's top level page table, which may be different from
31112004Sjiang.liu@intel.com 	 * the one of kas.
31212004Sjiang.liu@intel.com 	 */
31312004Sjiang.liu@intel.com 	rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
3143446Smrj 	rm->rm_cpu = cp->cpu_id;
31512004Sjiang.liu@intel.com 
31612004Sjiang.liu@intel.com 	/*
31712004Sjiang.liu@intel.com 	 * For hot-adding CPU at runtime, Machine Check and Performance Counter
31812004Sjiang.liu@intel.com 	 * should be disabled. They will be enabled on demand after CPU powers
31912004Sjiang.liu@intel.com 	 * on successfully
32012004Sjiang.liu@intel.com 	 */
3213446Smrj 	rm->rm_cr4 = getcr4();
32212004Sjiang.liu@intel.com 	rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
3233446Smrj 
3245295Srandyf 	rmp_gdt_init(rm);
3255295Srandyf 
3265295Srandyf 	return (ct);
3275295Srandyf }
3285295Srandyf 
3295295Srandyf void
mach_cpucontext_xfree(struct cpu * cp,void * arg,int err,int optype)33012004Sjiang.liu@intel.com mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
3313446Smrj {
3323446Smrj 	struct cpu_tables *ct = arg;
3333446Smrj 
3343446Smrj 	ASSERT(&ct->ct_tss == cp->cpu_tss);
33512004Sjiang.liu@intel.com 	if (optype == MACH_CPUCONTEXT_OP_START) {
33612004Sjiang.liu@intel.com 		switch (err) {
33712004Sjiang.liu@intel.com 		case 0:
33812004Sjiang.liu@intel.com 			/*
33912004Sjiang.liu@intel.com 			 * Save pointer for reuse when stopping CPU.
34012004Sjiang.liu@intel.com 			 */
34112004Sjiang.liu@intel.com 			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
34212004Sjiang.liu@intel.com 			break;
34312004Sjiang.liu@intel.com 		case ETIMEDOUT:
34412004Sjiang.liu@intel.com 			/*
34512004Sjiang.liu@intel.com 			 * The processor was poked, but failed to start before
34612004Sjiang.liu@intel.com 			 * we gave up waiting for it.  In case it starts later,
34712004Sjiang.liu@intel.com 			 * don't free anything.
34812004Sjiang.liu@intel.com 			 */
34912004Sjiang.liu@intel.com 			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
35012004Sjiang.liu@intel.com 			break;
35112004Sjiang.liu@intel.com 		default:
35212004Sjiang.liu@intel.com 			/*
35312004Sjiang.liu@intel.com 			 * Some other, passive, error occurred.
35412004Sjiang.liu@intel.com 			 */
35512004Sjiang.liu@intel.com 			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
35612004Sjiang.liu@intel.com 			cp->cpu_tss = NULL;
35712004Sjiang.liu@intel.com 			break;
35812004Sjiang.liu@intel.com 		}
35912004Sjiang.liu@intel.com 	} else if (optype == MACH_CPUCONTEXT_OP_STOP) {
36012004Sjiang.liu@intel.com 		switch (err) {
36112004Sjiang.liu@intel.com 		case 0:
36212004Sjiang.liu@intel.com 			/*
36312004Sjiang.liu@intel.com 			 * Free resources allocated when starting CPU.
36412004Sjiang.liu@intel.com 			 */
36512004Sjiang.liu@intel.com 			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
36612004Sjiang.liu@intel.com 			cp->cpu_tss = NULL;
36712004Sjiang.liu@intel.com 			cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
36812004Sjiang.liu@intel.com 			break;
36912004Sjiang.liu@intel.com 		default:
37012004Sjiang.liu@intel.com 			/*
37112004Sjiang.liu@intel.com 			 * Don't touch table pointer in case of failure.
37212004Sjiang.liu@intel.com 			 */
37312004Sjiang.liu@intel.com 			break;
37412004Sjiang.liu@intel.com 		}
37512004Sjiang.liu@intel.com 	} else {
37612004Sjiang.liu@intel.com 		ASSERT(0);
37712004Sjiang.liu@intel.com 	}
37812004Sjiang.liu@intel.com }
3793446Smrj 
38012004Sjiang.liu@intel.com void *
mach_cpucontext_alloc(struct cpu * cp)38112004Sjiang.liu@intel.com mach_cpucontext_alloc(struct cpu *cp)
38212004Sjiang.liu@intel.com {
38312004Sjiang.liu@intel.com 	return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
38412004Sjiang.liu@intel.com }
38512004Sjiang.liu@intel.com 
38612004Sjiang.liu@intel.com void
mach_cpucontext_free(struct cpu * cp,void * arg,int err)38712004Sjiang.liu@intel.com mach_cpucontext_free(struct cpu *cp, void *arg, int err)
38812004Sjiang.liu@intel.com {
38912004Sjiang.liu@intel.com 	mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
3903446Smrj }
3913446Smrj 
3923446Smrj /*
3933446Smrj  * "Enter monitor."  Called via cross-call from stop_other_cpus().
3943446Smrj  */
3953446Smrj void
mach_cpu_halt(char * msg)3963446Smrj mach_cpu_halt(char *msg)
3973446Smrj {
3983446Smrj 	if (msg)
3993446Smrj 		prom_printf("%s\n", msg);
4003446Smrj 
4013446Smrj 	/*CONSTANTCONDITION*/
4023446Smrj 	while (1)
4033446Smrj 		;
4043446Smrj }
4053446Smrj 
4063446Smrj void
mach_cpu_idle(void)4073446Smrj mach_cpu_idle(void)
4083446Smrj {
4093446Smrj 	i86_halt();
4103446Smrj }
4113446Smrj 
4123446Smrj void
mach_cpu_pause(volatile char * safe)4133446Smrj mach_cpu_pause(volatile char *safe)
4143446Smrj {
4153446Smrj 	/*
4163446Smrj 	 * This cpu is now safe.
4173446Smrj 	 */
4183446Smrj 	*safe = PAUSE_WAIT;
4193446Smrj 	membar_enter(); /* make sure stores are flushed */
4203446Smrj 
4213446Smrj 	/*
4223446Smrj 	 * Now we wait.  When we are allowed to continue, safe
4233446Smrj 	 * will be set to PAUSE_IDLE.
4243446Smrj 	 */
4253446Smrj 	while (*safe != PAUSE_IDLE)
4263446Smrj 		SMT_PAUSE();
4273446Smrj }
4283446Smrj 
4293446Smrj /*
43012004Sjiang.liu@intel.com  * Power on the target CPU.
4313446Smrj  */
4323446Smrj int
mp_cpu_poweron(struct cpu * cp)4333446Smrj mp_cpu_poweron(struct cpu *cp)
4343446Smrj {
43512004Sjiang.liu@intel.com 	int error;
43612004Sjiang.liu@intel.com 	cpuset_t tempset;
43712004Sjiang.liu@intel.com 	processorid_t cpuid;
43812004Sjiang.liu@intel.com 
43912004Sjiang.liu@intel.com 	ASSERT(cp != NULL);
44012004Sjiang.liu@intel.com 	cpuid = cp->cpu_id;
44112004Sjiang.liu@intel.com 	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
44212004Sjiang.liu@intel.com 		return (ENOTSUP);
44312004Sjiang.liu@intel.com 	} else if (cpuid < 0 || cpuid >= max_ncpus) {
44412004Sjiang.liu@intel.com 		return (EINVAL);
44512004Sjiang.liu@intel.com 	}
44612004Sjiang.liu@intel.com 
44712004Sjiang.liu@intel.com 	/*
44812004Sjiang.liu@intel.com 	 * The currrent x86 implementaiton of mp_cpu_configure() and
44912004Sjiang.liu@intel.com 	 * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
45012004Sjiang.liu@intel.com 	 * be called once after calling mp_cpu_configure() for a specific CPU.
45112004Sjiang.liu@intel.com 	 * It's because mp_cpu_poweron() will destroy data structure created
45212004Sjiang.liu@intel.com 	 * by mp_cpu_configure(). So reject the request if the CPU has already
45312004Sjiang.liu@intel.com 	 * been powered on once after calling mp_cpu_configure().
45412004Sjiang.liu@intel.com 	 * This limitaiton only affects the p_online syscall and the DR driver
45512004Sjiang.liu@intel.com 	 * won't be affected because the DR driver always invoke public CPU
45612004Sjiang.liu@intel.com 	 * management interfaces in the predefined order:
45712004Sjiang.liu@intel.com 	 * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
45812004Sjiang.liu@intel.com 	 */
45912004Sjiang.liu@intel.com 	if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
46012004Sjiang.liu@intel.com 		return (ENOTSUP);
46112004Sjiang.liu@intel.com 	}
46212004Sjiang.liu@intel.com 
46312004Sjiang.liu@intel.com 	/*
46412004Sjiang.liu@intel.com 	 * Check if there's at least a Mbyte of kmem available
46512004Sjiang.liu@intel.com 	 * before attempting to start the cpu.
46612004Sjiang.liu@intel.com 	 */
46712004Sjiang.liu@intel.com 	if (kmem_avail() < 1024 * 1024) {
46812004Sjiang.liu@intel.com 		/*
46912004Sjiang.liu@intel.com 		 * Kick off a reap in case that helps us with
47012004Sjiang.liu@intel.com 		 * later attempts ..
47112004Sjiang.liu@intel.com 		 */
47212004Sjiang.liu@intel.com 		kmem_reap();
47312004Sjiang.liu@intel.com 		return (ENOMEM);
47412004Sjiang.liu@intel.com 	}
47512004Sjiang.liu@intel.com 
47612004Sjiang.liu@intel.com 	affinity_set(CPU->cpu_id);
47712004Sjiang.liu@intel.com 
47812004Sjiang.liu@intel.com 	/*
47912004Sjiang.liu@intel.com 	 * Start the target CPU. No need to call mach_cpucontext_fini()
48012004Sjiang.liu@intel.com 	 * if mach_cpucontext_init() fails.
48112004Sjiang.liu@intel.com 	 */
48212004Sjiang.liu@intel.com 	if ((error = mach_cpucontext_init()) == 0) {
48312004Sjiang.liu@intel.com 		error = mp_start_cpu_common(cp, B_FALSE);
48412004Sjiang.liu@intel.com 		mach_cpucontext_fini();
48512004Sjiang.liu@intel.com 	}
48612004Sjiang.liu@intel.com 	if (error != 0) {
48712004Sjiang.liu@intel.com 		affinity_clear();
48812004Sjiang.liu@intel.com 		return (error);
48912004Sjiang.liu@intel.com 	}
49012004Sjiang.liu@intel.com 
49112004Sjiang.liu@intel.com 	/* Wait for the target cpu to reach READY state. */
49212004Sjiang.liu@intel.com 	tempset = cpu_ready_set;
49312004Sjiang.liu@intel.com 	while (!CPU_IN_SET(tempset, cpuid)) {
49412004Sjiang.liu@intel.com 		delay(1);
49512004Sjiang.liu@intel.com 		tempset = *((volatile cpuset_t *)&cpu_ready_set);
49612004Sjiang.liu@intel.com 	}
49712004Sjiang.liu@intel.com 
49812004Sjiang.liu@intel.com 	/* Mark the target CPU as available for mp operation. */
49912004Sjiang.liu@intel.com 	CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
50012004Sjiang.liu@intel.com 
50112004Sjiang.liu@intel.com 	/* Free the space allocated to hold the microcode file */
50212004Sjiang.liu@intel.com 	ucode_cleanup();
50312004Sjiang.liu@intel.com 
50412004Sjiang.liu@intel.com 	affinity_clear();
50512004Sjiang.liu@intel.com 
50612004Sjiang.liu@intel.com 	return (0);
50712004Sjiang.liu@intel.com }
50812004Sjiang.liu@intel.com 
50912004Sjiang.liu@intel.com #define	MP_CPU_DETACH_MAX_TRIES		5
51012004Sjiang.liu@intel.com #define	MP_CPU_DETACH_DELAY		100
51112004Sjiang.liu@intel.com 
51212004Sjiang.liu@intel.com static int
mp_cpu_detach_driver(dev_info_t * dip)51312004Sjiang.liu@intel.com mp_cpu_detach_driver(dev_info_t *dip)
51412004Sjiang.liu@intel.com {
51512004Sjiang.liu@intel.com 	int i;
51612004Sjiang.liu@intel.com 	int rv = EBUSY;
51712004Sjiang.liu@intel.com 	dev_info_t *pdip;
51812004Sjiang.liu@intel.com 
51912004Sjiang.liu@intel.com 	pdip = ddi_get_parent(dip);
52012004Sjiang.liu@intel.com 	ASSERT(pdip != NULL);
52112004Sjiang.liu@intel.com 	/*
52212004Sjiang.liu@intel.com 	 * Check if caller holds pdip busy - can cause deadlocks in
52312004Sjiang.liu@intel.com 	 * e_ddi_branch_unconfigure(), which calls devfs_clean().
52412004Sjiang.liu@intel.com 	 */
52512004Sjiang.liu@intel.com 	if (DEVI_BUSY_OWNED(pdip)) {
52612004Sjiang.liu@intel.com 		return (EDEADLOCK);
52712004Sjiang.liu@intel.com 	}
52812004Sjiang.liu@intel.com 
52912004Sjiang.liu@intel.com 	for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
53012004Sjiang.liu@intel.com 		if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
53112004Sjiang.liu@intel.com 			rv = 0;
53212004Sjiang.liu@intel.com 			break;
53312004Sjiang.liu@intel.com 		}
53412004Sjiang.liu@intel.com 		DELAY(MP_CPU_DETACH_DELAY);
53512004Sjiang.liu@intel.com 	}
53612004Sjiang.liu@intel.com 
53712004Sjiang.liu@intel.com 	return (rv);
5383446Smrj }
5393446Smrj 
5403446Smrj /*
54112004Sjiang.liu@intel.com  * Power off the target CPU.
54212004Sjiang.liu@intel.com  * Note: cpu_lock will be released and then reacquired.
5433446Smrj  */
5443446Smrj int
mp_cpu_poweroff(struct cpu * cp)5453446Smrj mp_cpu_poweroff(struct cpu *cp)
5463446Smrj {
54712004Sjiang.liu@intel.com 	int rv = 0;
54812004Sjiang.liu@intel.com 	void *ctx;
54912004Sjiang.liu@intel.com 	dev_info_t *dip = NULL;
55012004Sjiang.liu@intel.com 	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
55112004Sjiang.liu@intel.com 	extern void cpupm_start(cpu_t *);
55212004Sjiang.liu@intel.com 	extern void cpupm_stop(cpu_t *);
55312004Sjiang.liu@intel.com 
55412004Sjiang.liu@intel.com 	ASSERT(cp != NULL);
55512004Sjiang.liu@intel.com 	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
55612004Sjiang.liu@intel.com 	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
55712004Sjiang.liu@intel.com 
55812004Sjiang.liu@intel.com 	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
55912004Sjiang.liu@intel.com 		return (ENOTSUP);
56012004Sjiang.liu@intel.com 	}
56112004Sjiang.liu@intel.com 	/*
56212004Sjiang.liu@intel.com 	 * There is no support for powering off cpu0 yet.
56312004Sjiang.liu@intel.com 	 * There are many pieces of code which have a hard dependency on cpu0.
56412004Sjiang.liu@intel.com 	 */
56512004Sjiang.liu@intel.com 	if (cp->cpu_id == 0) {
56612004Sjiang.liu@intel.com 		return (ENOTSUP);
56712004Sjiang.liu@intel.com 	};
56812004Sjiang.liu@intel.com 
56912004Sjiang.liu@intel.com 	if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
57012004Sjiang.liu@intel.com 		return (ENXIO);
57112004Sjiang.liu@intel.com 	}
57212004Sjiang.liu@intel.com 	ASSERT(dip != NULL);
57312004Sjiang.liu@intel.com 	if (mp_cpu_detach_driver(dip) != 0) {
57412004Sjiang.liu@intel.com 		rv = EBUSY;
57512004Sjiang.liu@intel.com 		goto out_online;
57612004Sjiang.liu@intel.com 	}
57712004Sjiang.liu@intel.com 
57812004Sjiang.liu@intel.com 	/* Allocate CPU context for stopping */
57912004Sjiang.liu@intel.com 	if (mach_cpucontext_init() != 0) {
58012004Sjiang.liu@intel.com 		rv = ENXIO;
58112004Sjiang.liu@intel.com 		goto out_online;
58212004Sjiang.liu@intel.com 	}
58312004Sjiang.liu@intel.com 	ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
58412004Sjiang.liu@intel.com 	if (ctx == NULL) {
58512004Sjiang.liu@intel.com 		rv = ENXIO;
58612004Sjiang.liu@intel.com 		goto out_context_fini;
58712004Sjiang.liu@intel.com 	}
58812004Sjiang.liu@intel.com 
58912004Sjiang.liu@intel.com 	cpupm_stop(cp);
59012004Sjiang.liu@intel.com 	cpu_event_fini_cpu(cp);
59112004Sjiang.liu@intel.com 
59212004Sjiang.liu@intel.com 	if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
59312004Sjiang.liu@intel.com 		cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
59412004Sjiang.liu@intel.com 		cp->cpu_m.mcpu_cmi_hdl = NULL;
59512004Sjiang.liu@intel.com 	}
59612004Sjiang.liu@intel.com 
59712004Sjiang.liu@intel.com 	rv = mach_cpu_stop(cp, ctx);
59812004Sjiang.liu@intel.com 	if (rv != 0) {
59912004Sjiang.liu@intel.com 		goto out_enable_cmi;
60012004Sjiang.liu@intel.com 	}
60112004Sjiang.liu@intel.com 
60212004Sjiang.liu@intel.com 	/* Wait until the target CPU has been halted. */
60312004Sjiang.liu@intel.com 	while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
60412004Sjiang.liu@intel.com 		delay(1);
60512004Sjiang.liu@intel.com 	}
60612004Sjiang.liu@intel.com 	rm->rm_cpu_halted = 0xffff;
60712004Sjiang.liu@intel.com 
60812004Sjiang.liu@intel.com 	/* CPU_READY has been cleared by mach_cpu_stop. */
60912004Sjiang.liu@intel.com 	ASSERT((cp->cpu_flags & CPU_READY) == 0);
61012004Sjiang.liu@intel.com 	ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
61112004Sjiang.liu@intel.com 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
61212004Sjiang.liu@intel.com 	CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
61312004Sjiang.liu@intel.com 
61412004Sjiang.liu@intel.com 	mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
61512004Sjiang.liu@intel.com 	mach_cpucontext_fini();
61612004Sjiang.liu@intel.com 
61712004Sjiang.liu@intel.com 	return (0);
61812004Sjiang.liu@intel.com 
61912004Sjiang.liu@intel.com out_enable_cmi:
62012004Sjiang.liu@intel.com 	{
62112004Sjiang.liu@intel.com 		cmi_hdl_t hdl;
62212004Sjiang.liu@intel.com 
62312004Sjiang.liu@intel.com 		if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
62412004Sjiang.liu@intel.com 		    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
625*12826Skuriakose.kuruvilla@oracle.com 			if (is_x86_feature(x86_featureset, X86FSET_MCA))
62612004Sjiang.liu@intel.com 				cmi_mca_init(hdl);
62712004Sjiang.liu@intel.com 			cp->cpu_m.mcpu_cmi_hdl = hdl;
62812004Sjiang.liu@intel.com 		}
62912004Sjiang.liu@intel.com 	}
63012004Sjiang.liu@intel.com 	cpu_event_init_cpu(cp);
63112004Sjiang.liu@intel.com 	cpupm_start(cp);
63212004Sjiang.liu@intel.com 	mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
63312004Sjiang.liu@intel.com 
63412004Sjiang.liu@intel.com out_context_fini:
63512004Sjiang.liu@intel.com 	mach_cpucontext_fini();
63612004Sjiang.liu@intel.com 
63712004Sjiang.liu@intel.com out_online:
63812004Sjiang.liu@intel.com 	(void) e_ddi_branch_configure(dip, NULL, 0);
63912004Sjiang.liu@intel.com 
64012004Sjiang.liu@intel.com 	if (rv != EAGAIN && rv != ETIME) {
64112004Sjiang.liu@intel.com 		rv = ENXIO;
64212004Sjiang.liu@intel.com 	}
64312004Sjiang.liu@intel.com 
64412004Sjiang.liu@intel.com 	return (rv);
6453446Smrj }
6465529Ssmaybe 
6475529Ssmaybe /*
6485529Ssmaybe  * Return vcpu state, since this could be a virtual environment that we
6495529Ssmaybe  * are unaware of, return "unknown".
6505529Ssmaybe  */
6515529Ssmaybe /* ARGSUSED */
6525529Ssmaybe int
vcpu_on_pcpu(processorid_t cpu)6535529Ssmaybe vcpu_on_pcpu(processorid_t cpu)
6545529Ssmaybe {
6555529Ssmaybe 	return (VCPU_STATE_UNKNOWN);
6565529Ssmaybe }
657