1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Platform specific implementation code
27 * Currently only suspend to RAM is supported (ACPI S3)
28 */
29
30 #define SUNDDI_IMPL
31
32 #include <sys/types.h>
33 #include <sys/promif.h>
34 #include <sys/prom_isa.h>
35 #include <sys/prom_plat.h>
36 #include <sys/cpuvar.h>
37 #include <sys/pte.h>
38 #include <vm/hat.h>
39 #include <vm/page.h>
40 #include <vm/as.h>
41 #include <sys/cpr.h>
42 #include <sys/kmem.h>
43 #include <sys/clock.h>
44 #include <sys/kmem.h>
45 #include <sys/panic.h>
46 #include <vm/seg_kmem.h>
47 #include <sys/cpu_module.h>
48 #include <sys/callb.h>
49 #include <sys/machsystm.h>
50 #include <sys/vmsystm.h>
51 #include <sys/systm.h>
52 #include <sys/archsystm.h>
53 #include <sys/stack.h>
54 #include <sys/fs/ufs_fs.h>
55 #include <sys/memlist.h>
56 #include <sys/bootconf.h>
57 #include <sys/thread.h>
58 #include <sys/x_call.h>
59 #include <sys/smp_impldefs.h>
60 #include <vm/vm_dep.h>
61 #include <sys/psm.h>
62 #include <sys/epm.h>
63 #include <sys/cpr_wakecode.h>
64 #include <sys/x86_archext.h>
65 #include <sys/reboot.h>
66 #include <sys/acpi/acpi.h>
67 #include <sys/acpica.h>
68 #include <sys/fp.h>
69
70 #define AFMT "%lx"
71
72 extern int flushes_require_xcalls;
73 extern cpuset_t cpu_ready_set;
74
75 #if defined(__amd64)
76 extern void *wc_long_mode_64(void);
77 #endif /* __amd64 */
78 extern int tsc_gethrtime_enable;
79 extern void i_cpr_start_cpu(void);
80
81 ushort_t cpr_mach_type = CPR_MACHTYPE_X86;
82 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
83
84 static wc_cpu_t *wc_other_cpus = NULL;
85 static cpuset_t procset;
86
87 static void
88 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
89
90 static int i_cpr_platform_alloc(psm_state_request_t *req);
91 static void i_cpr_platform_free(psm_state_request_t *req);
92 static int i_cpr_save_apic(psm_state_request_t *req);
93 static int i_cpr_restore_apic(psm_state_request_t *req);
94 static int wait_for_set(cpuset_t *set, int who);
95
96 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu);
97 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack);
98
99 #ifdef STACK_GROWTH_DOWN
100 #define CPR_GET_STACK_START(t) ((t)->t_stkbase)
101 #define CPR_GET_STACK_END(t) ((t)->t_stk)
102 #else
103 #define CPR_GET_STACK_START(t) ((t)->t_stk)
104 #define CPR_GET_STACK_END(t) ((t)->t_stkbase)
105 #endif /* STACK_GROWTH_DOWN */
106
107 /*
108 * restart paused slave cpus
109 */
110 void
i_cpr_machdep_setup(void)111 i_cpr_machdep_setup(void)
112 {
113 if (ncpus > 1) {
114 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
115 mutex_enter(&cpu_lock);
116 start_cpus();
117 mutex_exit(&cpu_lock);
118 }
119 }
120
121
122 /*
123 * Stop all interrupt activities in the system
124 */
125 void
i_cpr_stop_intr(void)126 i_cpr_stop_intr(void)
127 {
128 (void) spl7();
129 }
130
131 /*
132 * Set machine up to take interrupts
133 */
134 void
i_cpr_enable_intr(void)135 i_cpr_enable_intr(void)
136 {
137 (void) spl0();
138 }
139
140 /*
141 * Save miscellaneous information which needs to be written to the
142 * state file. This information is required to re-initialize
143 * kernel/prom handshaking.
144 */
145 void
i_cpr_save_machdep_info(void)146 i_cpr_save_machdep_info(void)
147 {
148 int notcalled = 0;
149 ASSERT(notcalled);
150 }
151
152
153 void
i_cpr_set_tbr(void)154 i_cpr_set_tbr(void)
155 {
156 }
157
158
159 processorid_t
i_cpr_bootcpuid(void)160 i_cpr_bootcpuid(void)
161 {
162 return (0);
163 }
164
165 /*
166 * cpu0 should contain bootcpu info
167 */
168 cpu_t *
i_cpr_bootcpu(void)169 i_cpr_bootcpu(void)
170 {
171 ASSERT(MUTEX_HELD(&cpu_lock));
172
173 return (cpu_get(i_cpr_bootcpuid()));
174 }
175
176 /*
177 * Save context for the specified CPU
178 */
179 void *
i_cpr_save_context(void * arg)180 i_cpr_save_context(void *arg)
181 {
182 long index = (long)arg;
183 psm_state_request_t *papic_state;
184 int resuming;
185 int ret;
186 wc_cpu_t *wc_cpu = wc_other_cpus + index;
187
188 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
189
190 ASSERT(index < NCPU);
191
192 papic_state = &(wc_cpu)->wc_apic_state;
193
194 ret = i_cpr_platform_alloc(papic_state);
195 ASSERT(ret == 0);
196
197 ret = i_cpr_save_apic(papic_state);
198 ASSERT(ret == 0);
199
200 i_cpr_save_stack(curthread, wc_cpu);
201
202 /*
203 * wc_save_context returns twice, once when susending and
204 * once when resuming, wc_save_context() returns 0 when
205 * suspending and non-zero upon resume
206 */
207 resuming = (wc_save_context(wc_cpu) == 0);
208
209 /*
210 * do NOT call any functions after this point, because doing so
211 * will modify the stack that we are running on
212 */
213
214 if (resuming) {
215
216 ret = i_cpr_restore_apic(papic_state);
217 ASSERT(ret == 0);
218
219 i_cpr_platform_free(papic_state);
220
221 /*
222 * Enable interrupts on this cpu.
223 * Do not bind interrupts to this CPU's local APIC until
224 * the CPU is ready to receive interrupts.
225 */
226 ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
227 mutex_enter(&cpu_lock);
228 cpu_enable_intr(CPU);
229 mutex_exit(&cpu_lock);
230
231 /*
232 * Setting the bit in cpu_ready_set must be the last operation
233 * in processor initialization; the boot CPU will continue to
234 * boot once it sees this bit set for all active CPUs.
235 */
236 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
237
238 PMD(PMD_SX,
239 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
240 CPU->cpu_id))
241 } else {
242 /*
243 * Disable interrupts on this CPU so that PSM knows not to bind
244 * interrupts here on resume until the CPU has executed
245 * cpu_enable_intr() (above) in the resume path.
246 * We explicitly do not grab cpu_lock here because at this point
247 * in the suspend process, the boot cpu owns cpu_lock and all
248 * other cpus are also executing in the pause thread (only
249 * modifying their respective CPU structure).
250 */
251 (void) cpu_disable_intr(CPU);
252 }
253
254 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
255 resuming))
256
257 return (NULL);
258 }
259
260 static ushort_t *warm_reset_vector = NULL;
261
262 static ushort_t *
map_warm_reset_vector()263 map_warm_reset_vector()
264 {
265 /*LINTED*/
266 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
267 sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
268 return (NULL);
269
270 /*
271 * setup secondary cpu bios boot up vector
272 */
273 *warm_reset_vector = (ushort_t)((caddr_t)
274 /*LINTED*/
275 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
276 + ((ulong_t)rm_platter_va & 0xf));
277 warm_reset_vector++;
278 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
279
280 --warm_reset_vector;
281 return (warm_reset_vector);
282 }
283
284 void
i_cpr_pre_resume_cpus()285 i_cpr_pre_resume_cpus()
286 {
287 /*
288 * this is a cut down version of start_other_cpus()
289 * just do the initialization to wake the other cpus
290 */
291 unsigned who;
292 int boot_cpuid = i_cpr_bootcpuid();
293 uint32_t code_length = 0;
294 caddr_t wakevirt = rm_platter_va;
295 /*LINTED*/
296 wakecode_t *wp = (wakecode_t *)wakevirt;
297 char *str = "i_cpr_pre_resume_cpus";
298 extern int get_tsc_ready();
299 int err;
300
301 /*LINTED*/
302 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
303
304 /*
305 * If startup wasn't able to find a page under 1M, we cannot
306 * proceed.
307 */
308 if (rm_platter_va == 0) {
309 cmn_err(CE_WARN, "Cannot suspend the system because no "
310 "memory below 1M could be found for processor startup");
311 return;
312 }
313
314 /*
315 * Copy the real mode code at "real_mode_start" to the
316 * page at rm_platter_va.
317 */
318 warm_reset_vector = map_warm_reset_vector();
319 if (warm_reset_vector == NULL) {
320 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
321 return;
322 }
323
324 flushes_require_xcalls = 1;
325
326 /*
327 * We lock our affinity to the master CPU to ensure that all slave CPUs
328 * do their TSC syncs with the same CPU.
329 */
330
331 affinity_set(CPU_CURRENT);
332
333 /*
334 * Mark the boot cpu as being ready and in the procset, since we are
335 * running on that cpu.
336 */
337 CPUSET_ONLY(cpu_ready_set, boot_cpuid);
338 CPUSET_ONLY(procset, boot_cpuid);
339
340 for (who = 0; who < max_ncpus; who++) {
341
342 wc_cpu_t *cpup = wc_other_cpus + who;
343 wc_desctbr_t gdt;
344
345 if (who == boot_cpuid)
346 continue;
347
348 if (!CPU_IN_SET(mp_cpus, who))
349 continue;
350
351 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
352
353 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
354
355 gdt.base = cpup->wc_gdt_base;
356 gdt.limit = cpup->wc_gdt_limit;
357
358 #if defined(__amd64)
359 code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start;
360 #else
361 code_length = 0;
362 #endif
363
364 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
365
366 mutex_enter(&cpu_lock);
367 err = mach_cpuid_start(who, rm_platter_va);
368 mutex_exit(&cpu_lock);
369 if (err != 0) {
370 cmn_err(CE_WARN, "cpu%d: failed to start during "
371 "suspend/resume error %d", who, err);
372 continue;
373 }
374
375 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
376
377 if (!wait_for_set(&procset, who))
378 continue;
379
380 PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
381
382 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
383
384 if (tsc_gethrtime_enable) {
385 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
386 tsc_sync_master(who);
387 }
388
389 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
390 who))
391 /*
392 * Wait for cpu to declare that it is ready, we want the
393 * cpus to start serially instead of in parallel, so that
394 * they do not contend with each other in wc_rm_start()
395 */
396 if (!wait_for_set(&cpu_ready_set, who))
397 continue;
398
399 /*
400 * do not need to re-initialize dtrace using dtrace_cpu_init
401 * function
402 */
403 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
404 }
405
406 affinity_clear();
407
408 PMD(PMD_SX, ("%s() all cpus now ready\n", str))
409
410 }
411
412 static void
unmap_warm_reset_vector(ushort_t * warm_reset_vector)413 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
414 {
415 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
416 }
417
418 /*
419 * We need to setup a 1:1 (virtual to physical) mapping for the
420 * page containing the wakeup code.
421 */
422 static struct as *save_as; /* when switching to kas */
423
424 static void
unmap_wakeaddr_1to1(uint64_t wakephys)425 unmap_wakeaddr_1to1(uint64_t wakephys)
426 {
427 uintptr_t wp = (uintptr_t)wakephys;
428 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */
429 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
430 }
431
432 void
i_cpr_post_resume_cpus()433 i_cpr_post_resume_cpus()
434 {
435 uint64_t wakephys = rm_platter_pa;
436
437 if (warm_reset_vector != NULL)
438 unmap_warm_reset_vector(warm_reset_vector);
439
440 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
441 HAT_UNLOAD);
442
443 /*
444 * cmi_post_mpstartup() is only required upon boot not upon
445 * resume from RAM
446 */
447
448 PT(PT_UNDO1to1);
449 /* Tear down 1:1 mapping for wakeup code */
450 unmap_wakeaddr_1to1(wakephys);
451 }
452
453 /* ARGSUSED */
454 void
i_cpr_handle_xc(int flag)455 i_cpr_handle_xc(int flag)
456 {
457 }
458
459 int
i_cpr_reusable_supported(void)460 i_cpr_reusable_supported(void)
461 {
462 return (0);
463 }
464 static void
map_wakeaddr_1to1(uint64_t wakephys)465 map_wakeaddr_1to1(uint64_t wakephys)
466 {
467 uintptr_t wp = (uintptr_t)wakephys;
468 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
469 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
470 HAT_LOAD);
471 save_as = curthread->t_procp->p_as;
472 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */
473 }
474
475
476 void
prt_other_cpus()477 prt_other_cpus()
478 {
479 int who;
480
481 if (ncpus == 1) {
482 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
483 "uniprocessor machine\n"))
484 return;
485 }
486
487 for (who = 0; who < max_ncpus; who++) {
488
489 wc_cpu_t *cpup = wc_other_cpus + who;
490
491 if (!CPU_IN_SET(mp_cpus, who))
492 continue;
493
494 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
495 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
496 AFMT ", sp=%lx\n", who,
497 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
498 (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
499 (long)cpup->wc_ldt, (long)cpup->wc_tr,
500 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
501 }
502 }
503
504 /*
505 * Power down the system.
506 */
507 int
i_cpr_power_down(int sleeptype)508 i_cpr_power_down(int sleeptype)
509 {
510 caddr_t wakevirt = rm_platter_va;
511 uint64_t wakephys = rm_platter_pa;
512 ulong_t saved_intr;
513 uint32_t code_length = 0;
514 wc_desctbr_t gdt;
515 /*LINTED*/
516 wakecode_t *wp = (wakecode_t *)wakevirt;
517 /*LINTED*/
518 rm_platter_t *wcpp = (rm_platter_t *)wakevirt;
519 wc_cpu_t *cpup = &(wp->wc_cpu);
520 dev_info_t *ppm;
521 int ret = 0;
522 power_req_t power_req;
523 char *str = "i_cpr_power_down";
524 #if defined(__amd64)
525 /*LINTED*/
526 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
527 #endif
528 extern int cpr_suspend_succeeded;
529 extern void kernel_wc_code();
530
531 ASSERT(sleeptype == CPR_TORAM);
532 ASSERT(CPU->cpu_id == 0);
533
534 if ((ppm = PPM(ddi_root_node())) == NULL) {
535 PMD(PMD_SX, ("%s: root node not claimed\n", str))
536 return (ENOTTY);
537 }
538
539 PMD(PMD_SX, ("Entering %s()\n", str))
540
541 PT(PT_IC);
542 saved_intr = intr_clear();
543
544 PT(PT_1to1);
545 /* Setup 1:1 mapping for wakeup code */
546 map_wakeaddr_1to1(wakephys);
547
548 PMD(PMD_SX, ("ncpus=%d\n", ncpus))
549
550 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
551 ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE))
552
553 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
554 (void *)wakevirt, (uint_t)wakephys))
555
556 ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) <
557 WC_CODESIZE);
558
559 bzero(wakevirt, PAGESIZE);
560
561 /* Copy code to rm_platter */
562 bcopy((caddr_t)wc_rm_start, wakevirt,
563 (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start));
564
565 prt_other_cpus();
566
567 #if defined(__amd64)
568
569 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
570 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
571 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
572 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
573
574 real_mode_platter->rm_cr4 = getcr4();
575 real_mode_platter->rm_pdbr = getcr3();
576
577 rmp_gdt_init(real_mode_platter);
578
579 /*
580 * Since the CPU needs to jump to protected mode using an identity
581 * mapped address, we need to calculate it here.
582 */
583 real_mode_platter->rm_longmode64_addr = rm_platter_pa +
584 ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start);
585
586 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
587 (ulong_t)real_mode_platter->rm_cr4, getcr4()))
588
589 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
590 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
591
592 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
593 (ulong_t)real_mode_platter->rm_longmode64_addr))
594
595 #endif
596
597 PT(PT_SC);
598 if (wc_save_context(cpup)) {
599
600 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
601 if (ret != 0)
602 return (ret);
603
604 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
605 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
606 if (ret != 0)
607 return (ret);
608
609 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
610 (uint_t)wakephys, (void *)&kernel_wc_code))
611 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
612 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
613 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
614 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
615 cpup->wc_esp))
616 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
617 (long)cpup->wc_cr0, (long)cpup->wc_cr3,
618 (long)cpup->wc_cr4))
619 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
620 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
621 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
622 (long)cpup->wc_eflags))
623
624 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
625 "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
626 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
627 cpup->wc_idt_limit, (long)cpup->wc_ldt,
628 (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
629
630 gdt.base = cpup->wc_gdt_base;
631 gdt.limit = cpup->wc_gdt_limit;
632
633 #if defined(__amd64)
634 code_length = (uint32_t)wc_long_mode_64 -
635 (uint32_t)wc_rm_start;
636 #else
637 code_length = 0;
638 #endif
639
640 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
641
642 #if defined(__amd64)
643 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
644 (ulong_t)wcpp->rm_cr4, getcr4()))
645
646 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
647 (ulong_t)wcpp->rm_pdbr, getcr3()))
648
649 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
650 (ulong_t)wcpp->rm_longmode64_addr))
651
652 PMD(PMD_SX,
653 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
654 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
655 #endif
656
657 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
658 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
659 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
660 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
661 (long)cpup->wc_kgsbase))
662
663 power_req.request_type = PMR_PPM_ENTER_SX;
664 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
665 power_req.req.ppm_power_enter_sx_req.test_point =
666 cpr_test_point;
667 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
668
669 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
670 PT(PT_PPMCTLOP);
671 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
672 &power_req, &ret);
673 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
674
675 /*
676 * If it works, we get control back to the else branch below
677 * If we get control back here, it didn't work.
678 * XXX return EINVAL here?
679 */
680
681 unmap_wakeaddr_1to1(wakephys);
682 intr_restore(saved_intr);
683
684 return (ret);
685 } else {
686 cpr_suspend_succeeded = 1;
687
688 power_req.request_type = PMR_PPM_EXIT_SX;
689 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
690
691 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
692 PT(PT_PPMCTLOP);
693 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
694 &power_req, &ret);
695 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
696
697 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
698 /*
699 * the restore should never fail, if the saved suceeded
700 */
701 ASSERT(ret == 0);
702
703 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
704
705 /*
706 * Enable interrupts on boot cpu.
707 */
708 ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
709 mutex_enter(&cpu_lock);
710 cpu_enable_intr(CPU);
711 mutex_exit(&cpu_lock);
712
713 PT(PT_INTRRESTORE);
714 intr_restore(saved_intr);
715 PT(PT_CPU);
716
717 return (ret);
718 }
719 }
720
721 /*
722 * Stop all other cpu's before halting or rebooting. We pause the cpu's
723 * instead of sending a cross call.
724 * Stolen from sun4/os/mp_states.c
725 */
726
727 static int cpu_are_paused; /* sic */
728
729 void
i_cpr_stop_other_cpus(void)730 i_cpr_stop_other_cpus(void)
731 {
732 mutex_enter(&cpu_lock);
733 if (cpu_are_paused) {
734 mutex_exit(&cpu_lock);
735 return;
736 }
737 pause_cpus(NULL);
738 cpu_are_paused = 1;
739
740 mutex_exit(&cpu_lock);
741 }
742
743 int
i_cpr_is_supported(int sleeptype)744 i_cpr_is_supported(int sleeptype)
745 {
746 extern int cpr_supported_override;
747 extern int cpr_platform_enable;
748 extern int pm_S3_enabled;
749
750 if (sleeptype != CPR_TORAM)
751 return (0);
752
753 /*
754 * The next statement tests if a specific platform has turned off
755 * cpr support.
756 */
757 if (cpr_supported_override)
758 return (0);
759
760 /*
761 * If a platform has specifically turned on cpr support ...
762 */
763 if (cpr_platform_enable)
764 return (1);
765
766 return (pm_S3_enabled);
767 }
768
769 void
i_cpr_bitmap_cleanup(void)770 i_cpr_bitmap_cleanup(void)
771 {
772 }
773
774 void
i_cpr_free_memory_resources(void)775 i_cpr_free_memory_resources(void)
776 {
777 }
778
779 /*
780 * Needed only for S3 so far
781 */
782 static int
i_cpr_platform_alloc(psm_state_request_t * req)783 i_cpr_platform_alloc(psm_state_request_t *req)
784 {
785 #ifdef DEBUG
786 char *str = "i_cpr_platform_alloc";
787 #endif
788
789 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
790
791 if (psm_state == NULL) {
792 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
793 return (0);
794 }
795
796 req->psr_cmd = PSM_STATE_ALLOC;
797 return ((*psm_state)(req));
798 }
799
800 /*
801 * Needed only for S3 so far
802 */
803 static void
i_cpr_platform_free(psm_state_request_t * req)804 i_cpr_platform_free(psm_state_request_t *req)
805 {
806 #ifdef DEBUG
807 char *str = "i_cpr_platform_free";
808 #endif
809
810 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
811
812 if (psm_state == NULL) {
813 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
814 return;
815 }
816
817 req->psr_cmd = PSM_STATE_FREE;
818 (void) (*psm_state)(req);
819 }
820
821 static int
i_cpr_save_apic(psm_state_request_t * req)822 i_cpr_save_apic(psm_state_request_t *req)
823 {
824 #ifdef DEBUG
825 char *str = "i_cpr_save_apic";
826 #endif
827
828 if (psm_state == NULL) {
829 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
830 return (0);
831 }
832
833 req->psr_cmd = PSM_STATE_SAVE;
834 return ((*psm_state)(req));
835 }
836
837 static int
i_cpr_restore_apic(psm_state_request_t * req)838 i_cpr_restore_apic(psm_state_request_t *req)
839 {
840 #ifdef DEBUG
841 char *str = "i_cpr_restore_apic";
842 #endif
843
844 if (psm_state == NULL) {
845 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
846 return (0);
847 }
848
849 req->psr_cmd = PSM_STATE_RESTORE;
850 return ((*psm_state)(req));
851 }
852
853
854 /* stop lint complaining about offset not being used in 32bit mode */
855 #if !defined(__amd64)
856 /*ARGSUSED*/
857 #endif
858 static void
init_real_mode_platter(int cpun,uint32_t offset,uint_t cr4,wc_desctbr_t gdt)859 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
860 {
861 /*LINTED*/
862 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
863
864 /*
865 * Fill up the real mode platter to make it easy for real mode code to
866 * kick it off. This area should really be one passed by boot to kernel
867 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
868 * have identical physical and virtual address in paged mode.
869 */
870
871 real_mode_platter->rm_pdbr = getcr3();
872 real_mode_platter->rm_cpu = cpun;
873 real_mode_platter->rm_cr4 = cr4;
874
875 real_mode_platter->rm_gdt_base = gdt.base;
876 real_mode_platter->rm_gdt_lim = gdt.limit;
877
878 #if defined(__amd64)
879 if (getcr3() > 0xffffffffUL)
880 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
881 "located above 4G in physical memory (@ 0x%llx).",
882 (unsigned long long)getcr3());
883
884 /*
885 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
886 * by code in real_mode_start():
887 *
888 * GDT[0]: NULL selector
889 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
890 *
891 * Clear the IDT as interrupts will be off and a limit of 0 will cause
892 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
893 * a course of action as any other, though it may cause the entire
894 * platform to reset in some cases...
895 */
896 real_mode_platter->rm_temp_gdt[0] = 0ULL;
897 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
898
899 real_mode_platter->rm_temp_gdt_lim = (ushort_t)
900 (sizeof (real_mode_platter->rm_temp_gdt) - 1);
901 real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
902 (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
903
904 real_mode_platter->rm_temp_idt_lim = 0;
905 real_mode_platter->rm_temp_idt_base = 0;
906
907 /*
908 * Since the CPU needs to jump to protected mode using an identity
909 * mapped address, we need to calculate it here.
910 */
911 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
912 #endif /* __amd64 */
913
914 /* return; */
915 }
916
917 void
i_cpr_start_cpu(void)918 i_cpr_start_cpu(void)
919 {
920
921 struct cpu *cp = CPU;
922
923 char *str = "i_cpr_start_cpu";
924 extern void init_cpu_syscall(struct cpu *cp);
925
926 PMD(PMD_SX, ("%s() called\n", str))
927
928 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
929 cp->cpu_base_spl))
930
931 mutex_enter(&cpu_lock);
932 if (cp == i_cpr_bootcpu()) {
933 mutex_exit(&cpu_lock);
934 PMD(PMD_SX,
935 ("%s() called on bootcpu nothing to do!\n", str))
936 return;
937 }
938 mutex_exit(&cpu_lock);
939
940 /*
941 * We need to Sync PAT with cpu0's PAT. We have to do
942 * this with interrupts disabled.
943 */
944 if (is_x86_feature(x86_featureset, X86FSET_PAT))
945 pat_sync();
946
947 /*
948 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
949 */
950 if (fp_save_mech == FP_XSAVE) {
951 setup_xfem();
952 }
953
954 /*
955 * Initialize this CPU's syscall handlers
956 */
957 init_cpu_syscall(cp);
958
959 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
960
961 /*
962 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
963 * init_cpu_info(), since the work that they do is only needed to
964 * be done once at boot time
965 */
966
967
968 mutex_enter(&cpu_lock);
969 CPUSET_ADD(procset, cp->cpu_id);
970 mutex_exit(&cpu_lock);
971
972 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
973 cp->cpu_base_spl))
974
975 if (tsc_gethrtime_enable) {
976 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
977 tsc_sync_slave();
978 }
979
980 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
981 cp->cpu_id, cp->cpu_intr_actv))
982 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
983 cp->cpu_base_spl))
984
985 (void) spl0(); /* enable interrupts */
986
987 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
988 cp->cpu_base_spl))
989
990 /*
991 * Set up the CPU module for this CPU. This can't be done before
992 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
993 * need to go load another CPU module. The act of attempting to load
994 * a module may trigger a cross-call, which will ASSERT unless this
995 * cpu is CPU_READY.
996 */
997
998 /*
999 * cmi already been init'd (during boot), so do not need to do it again
1000 */
1001 #ifdef PM_REINITMCAONRESUME
1002 if (is_x86_feature(x86_featureset, X86FSET_MCA))
1003 cmi_mca_init();
1004 #endif
1005
1006 PMD(PMD_SX, ("%s() returning\n", str))
1007
1008 /* return; */
1009 }
1010
1011 void
i_cpr_alloc_cpus(void)1012 i_cpr_alloc_cpus(void)
1013 {
1014 char *str = "i_cpr_alloc_cpus";
1015
1016 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1017 /*
1018 * we allocate this only when we actually need it to save on
1019 * kernel memory
1020 */
1021
1022 if (wc_other_cpus == NULL) {
1023 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
1024 KM_SLEEP);
1025 }
1026
1027 }
1028
1029 void
i_cpr_free_cpus(void)1030 i_cpr_free_cpus(void)
1031 {
1032 int index;
1033 wc_cpu_t *wc_cpu;
1034
1035 if (wc_other_cpus != NULL) {
1036 for (index = 0; index < max_ncpus; index++) {
1037 wc_cpu = wc_other_cpus + index;
1038 if (wc_cpu->wc_saved_stack != NULL) {
1039 kmem_free(wc_cpu->wc_saved_stack,
1040 wc_cpu->wc_saved_stack_size);
1041 }
1042 }
1043
1044 kmem_free((void *) wc_other_cpus,
1045 max_ncpus * sizeof (wc_cpu_t));
1046 wc_other_cpus = NULL;
1047 }
1048 }
1049
1050 /*
1051 * wrapper for acpica_ddi_save_resources()
1052 */
1053 void
i_cpr_save_configuration(dev_info_t * dip)1054 i_cpr_save_configuration(dev_info_t *dip)
1055 {
1056 acpica_ddi_save_resources(dip);
1057 }
1058
1059 /*
1060 * wrapper for acpica_ddi_restore_resources()
1061 */
1062 void
i_cpr_restore_configuration(dev_info_t * dip)1063 i_cpr_restore_configuration(dev_info_t *dip)
1064 {
1065 acpica_ddi_restore_resources(dip);
1066 }
1067
1068 static int
wait_for_set(cpuset_t * set,int who)1069 wait_for_set(cpuset_t *set, int who)
1070 {
1071 int delays;
1072 char *str = "wait_for_set";
1073
1074 for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1075 if (delays == 500) {
1076 /*
1077 * After five seconds, things are probably
1078 * looking a bit bleak - explain the hang.
1079 */
1080 cmn_err(CE_NOTE, "cpu%d: started, "
1081 "but not running in the kernel yet", who);
1082 PMD(PMD_SX, ("%s() %d cpu started "
1083 "but not running in the kernel yet\n",
1084 str, who))
1085 } else if (delays > 2000) {
1086 /*
1087 * We waited at least 20 seconds, bail ..
1088 */
1089 cmn_err(CE_WARN, "cpu%d: timed out", who);
1090 PMD(PMD_SX, ("%s() %d cpu timed out\n",
1091 str, who))
1092 return (0);
1093 }
1094
1095 /*
1096 * wait at least 10ms, then check again..
1097 */
1098 drv_usecwait(10000);
1099 }
1100
1101 return (1);
1102 }
1103
1104 static void
i_cpr_save_stack(kthread_t * t,wc_cpu_t * wc_cpu)1105 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu)
1106 {
1107 size_t stack_size; /* size of stack */
1108 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1109 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1110
1111 stack_size = (size_t)end - (size_t)start;
1112
1113 if (wc_cpu->wc_saved_stack_size < stack_size) {
1114 if (wc_cpu->wc_saved_stack != NULL) {
1115 kmem_free(wc_cpu->wc_saved_stack,
1116 wc_cpu->wc_saved_stack_size);
1117 }
1118 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP);
1119 wc_cpu->wc_saved_stack_size = stack_size;
1120 }
1121
1122 bcopy(start, wc_cpu->wc_saved_stack, stack_size);
1123 }
1124
1125 void
i_cpr_restore_stack(kthread_t * t,greg_t * save_stack)1126 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack)
1127 {
1128 size_t stack_size; /* size of stack */
1129 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1130 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1131
1132 stack_size = (size_t)end - (size_t)start;
1133
1134 bcopy(save_stack, start, stack_size);
1135 }
1136