11341Sstevel /* 21341Sstevel * CDDL HEADER START 31341Sstevel * 41341Sstevel * The contents of this file are subject to the terms of the 51341Sstevel * Common Development and Distribution License (the "License"). 61341Sstevel * You may not use this file except in compliance with the License. 71341Sstevel * 81341Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91341Sstevel * or http://www.opensolaris.org/os/licensing. 101341Sstevel * See the License for the specific language governing permissions 111341Sstevel * and limitations under the License. 121341Sstevel * 131341Sstevel * When distributing Covered Code, include this CDDL HEADER in each 141341Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151341Sstevel * If applicable, add the following below this CDDL HEADER, with the 161341Sstevel * fields enclosed by brackets "[]" replaced with your own identifying 171341Sstevel * information: Portions Copyright [yyyy] [name of copyright owner] 181341Sstevel * 191341Sstevel * CDDL HEADER END 201341Sstevel */ 211341Sstevel 221341Sstevel /* 23*3792Sakolb * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 241341Sstevel * Use is subject to license terms. 251341Sstevel */ 261341Sstevel 271341Sstevel #pragma ident "%Z%%M% %I% %E% SMI" 281341Sstevel 291341Sstevel /* 301341Sstevel * This workaround inhibits prom_printf after the cpus are grabbed. 311341Sstevel * This can be removed when 4154263 is corrected. 321341Sstevel */ 331341Sstevel #define Bug_4154263 341341Sstevel 351341Sstevel /* 361341Sstevel * A CPR derivative specifically for sunfire 371341Sstevel */ 381341Sstevel 391341Sstevel #include <sys/types.h> 401341Sstevel #include <sys/systm.h> 411341Sstevel #include <sys/machparam.h> 421341Sstevel #include <sys/machsystm.h> 431341Sstevel #include <sys/ddi.h> 441341Sstevel #define SUNDDI_IMPL 451341Sstevel #include <sys/sunddi.h> 461341Sstevel #include <sys/time.h> 471341Sstevel #include <sys/kmem.h> 481341Sstevel #include <nfs/lm.h> 491341Sstevel #include <sys/ddi_impldefs.h> 501341Sstevel #include <sys/obpdefs.h> 511341Sstevel #include <sys/cmn_err.h> 521341Sstevel #include <sys/debug.h> 531341Sstevel #include <sys/errno.h> 541341Sstevel #include <sys/callb.h> 551341Sstevel #include <sys/clock.h> 561341Sstevel #include <sys/x_call.h> 571341Sstevel #include <sys/cpuvar.h> 581341Sstevel #include <sys/epm.h> 591341Sstevel #include <sys/vfs.h> 601341Sstevel #include <sys/fhc.h> 611341Sstevel #include <sys/sysctrl.h> 621341Sstevel #include <sys/promif.h> 631341Sstevel #include <sys/conf.h> 641341Sstevel #include <sys/modctl.h> 651341Sstevel #include <sys/cyclic.h> 661341Sstevel #include <sys/sunndi.h> 671341Sstevel #include <sys/machsystm.h> 681341Sstevel 691341Sstevel static enum sysctrl_suspend_state { 701341Sstevel SYSC_STATE_BEGIN = 0, 711341Sstevel SYSC_STATE_USER, 721341Sstevel SYSC_STATE_DAEMON, 731341Sstevel SYSC_STATE_DRIVER, 741341Sstevel SYSC_STATE_FULL } suspend_state; 751341Sstevel 761341Sstevel static int pstate_save; 771341Sstevel static uint_t sysctrl_gate[NCPU]; 781341Sstevel int sysctrl_quiesce_debug = FALSE; 791341Sstevel static int sysctrl_skip_kernel_threads = TRUE; 801341Sstevel 811341Sstevel /* 821341Sstevel * sysctrl_skip_user_threads is used to control if user threads should 831341Sstevel * be suspended. If sysctrl_skip_user_threads is true, the rest of the 841341Sstevel * flags are not used; if it is false, sysctrl_check_user_stop_result 851341Sstevel * will be used to control whether or not we need to check suspend 861341Sstevel * result, and sysctrl_allow_blocked_threads will be used to control 871341Sstevel * whether or not we allow suspend to continue if there are blocked 881341Sstevel * threads. We allow all combinations of sysctrl_check_user_stop_result 891341Sstevel * and sysctrl_allow_block_threads, even though it might not make much 901341Sstevel * sense to not allow block threads when we don't even check stop 911341Sstevel * result. 921341Sstevel */ 931341Sstevel static int sysctrl_skip_user_threads = 0; /* default to FALSE */ 941341Sstevel static int sysctrl_check_user_stop_result = 1; /* default to TRUE */ 951341Sstevel static int sysctrl_allow_blocked_threads = 1; /* default to TRUE */ 961341Sstevel 971341Sstevel static int sysc_watchdog_suspended; 981341Sstevel 991341Sstevel extern int sysctrl_enable_detach_suspend; 1001341Sstevel static int sysc_lastval; 1011341Sstevel 1021341Sstevel #define DEBUGP(p) { if (sysctrl_quiesce_debug) p; } 1031341Sstevel #define errp prom_printf 1041341Sstevel 1051341Sstevel #define SYSC_CPU_LOOP_MSEC 1000 1061341Sstevel 1071341Sstevel static void 1081341Sstevel sysctrl_grab_cpus(void) 1091341Sstevel { 1101341Sstevel int i; 1111341Sstevel cpuset_t others; 1121341Sstevel extern cpuset_t cpu_ready_set; 1131341Sstevel extern void sysctrl_freeze(void); 1141341Sstevel uint64_t sysc_tick_limit; 1151341Sstevel uint64_t sysc_current_tick; 1161341Sstevel uint64_t sysc_tick_deadline; 1171341Sstevel 1181341Sstevel extern u_longlong_t gettick(void); 1191341Sstevel 1201341Sstevel for (i = 0; i < NCPU; i++) 1211341Sstevel sysctrl_gate[i] = 0; 1221341Sstevel 1231341Sstevel /* tell other cpus to go quiet and wait for continue signal */ 1241341Sstevel others = cpu_ready_set; 1251341Sstevel CPUSET_DEL(others, CPU->cpu_id); 1261341Sstevel xt_some(others, (xcfunc_t *)sysctrl_freeze, (uint64_t)sysctrl_gate, 1271341Sstevel (uint64_t)(&sysctrl_gate[CPU->cpu_id])); 1281341Sstevel 1291341Sstevel sysc_tick_limit = 1301341Sstevel ((uint64_t)sys_tick_freq * SYSC_CPU_LOOP_MSEC) / 1000; 1311341Sstevel 1321341Sstevel /* wait for each cpu to check in */ 1331341Sstevel for (i = 0; i < NCPU; i++) { 1341341Sstevel if (!CPU_IN_SET(others, i)) 1351341Sstevel continue; 1361341Sstevel 1371341Sstevel /* 1381341Sstevel * Get current tick value and calculate the deadline tick 1391341Sstevel */ 1401341Sstevel sysc_current_tick = gettick(); 1411341Sstevel sysc_tick_deadline = sysc_current_tick + sysc_tick_limit; 1421341Sstevel 1431341Sstevel while (sysctrl_gate[i] == 0) { 1441341Sstevel /* If in panic, we just return */ 1451341Sstevel if (panicstr) 1461341Sstevel break; 1471341Sstevel 1481341Sstevel /* Panic the system if cpu not responsed by deadline */ 1491341Sstevel sysc_current_tick = gettick(); 1501341Sstevel if (sysc_current_tick >= sysc_tick_deadline) { 1511341Sstevel cmn_err(CE_PANIC, "sysctrl: cpu %d not " 1521341Sstevel "responding to quiesce command", i); 1531341Sstevel } 1541341Sstevel } 1551341Sstevel } 1561341Sstevel 1571341Sstevel /* now even our interrupts are disabled -- really quiet now */ 1581341Sstevel pstate_save = disable_vec_intr(); 1591341Sstevel } 1601341Sstevel 1611341Sstevel static void 1621341Sstevel sysctrl_release_cpus(void) 1631341Sstevel { 1641341Sstevel /* let the other cpus go */ 1651341Sstevel sysctrl_gate[CPU->cpu_id] = 1; 1661341Sstevel 1671341Sstevel /* restore our interrupts too */ 1681341Sstevel enable_vec_intr(pstate_save); 1691341Sstevel } 1701341Sstevel 1711341Sstevel static void 1721341Sstevel sysctrl_stop_intr(void) 1731341Sstevel { 1741341Sstevel mutex_enter(&cpu_lock); 1751341Sstevel kpreempt_disable(); 1761341Sstevel cyclic_suspend(); 1771341Sstevel } 1781341Sstevel 1791341Sstevel static void 1801341Sstevel sysctrl_enable_intr(void) 1811341Sstevel { 1821341Sstevel cyclic_resume(); 1831341Sstevel (void) spl0(); 1841341Sstevel kpreempt_enable(); 1851341Sstevel mutex_exit(&cpu_lock); 1861341Sstevel } 1871341Sstevel 1881341Sstevel static int 1891341Sstevel sysctrl_is_real_device(dev_info_t *dip) 1901341Sstevel { 1911341Sstevel struct regspec *regbuf; 1921341Sstevel int length; 1931341Sstevel int rc; 1941341Sstevel 1951341Sstevel if (ddi_get_driver(dip) == NULL) 1961341Sstevel return (FALSE); 1971341Sstevel 1981341Sstevel if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR)) 1991341Sstevel return (TRUE); 2001341Sstevel if (DEVI(dip)->devi_pm_flags & PMC_NO_SR) 2011341Sstevel return (FALSE); 2021341Sstevel 2031341Sstevel /* 2041341Sstevel * now the general case 2051341Sstevel */ 2061341Sstevel rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg", 2071341Sstevel (caddr_t)®buf, &length); 2081341Sstevel ASSERT(rc != DDI_PROP_NO_MEMORY); 2091341Sstevel if (rc != DDI_PROP_SUCCESS) { 2101341Sstevel return (FALSE); 2111341Sstevel } else { 2121341Sstevel kmem_free(regbuf, length); 2131341Sstevel return (TRUE); 2141341Sstevel } 2151341Sstevel } 2161341Sstevel 2171341Sstevel static dev_info_t *failed_driver; 2181341Sstevel static char device_path[MAXPATHLEN]; 2191341Sstevel 2201341Sstevel static int 2211341Sstevel sysctrl_suspend_devices(dev_info_t *dip, sysc_cfga_pkt_t *pkt) 2221341Sstevel { 2231341Sstevel int circ; 2241341Sstevel 2251341Sstevel ASSERT(dip == NULL || ddi_get_parent(dip) == NULL || 2261341Sstevel DEVI_BUSY_OWNED(ddi_get_parent(dip))); 2271341Sstevel 2281341Sstevel failed_driver = NULL; 2291341Sstevel for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { 2301341Sstevel /* 2311341Sstevel * Hold parent busy while walking child list 2321341Sstevel */ 2331341Sstevel ndi_devi_enter(dip, &circ); 2341341Sstevel if (sysctrl_suspend_devices(ddi_get_child(dip), pkt)) { 2351341Sstevel ndi_devi_exit(dip, circ); 2361341Sstevel return (ENXIO); 2371341Sstevel } 2381341Sstevel ndi_devi_exit(dip, circ); 2391341Sstevel 2401341Sstevel if (!sysctrl_is_real_device(dip)) 2411341Sstevel continue; 2421341Sstevel 2431341Sstevel /* 2441341Sstevel * Safe to call ddi_pathname() as parent is held busy 2451341Sstevel */ 2461341Sstevel (void) ddi_pathname(dip, device_path); 2471341Sstevel DEBUGP(errp(" suspending device %s\n", device_path)); 2481341Sstevel if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) { 2491341Sstevel DEBUGP(errp(" unable to suspend device %s\n", 2501341Sstevel device_path)); 2511341Sstevel 2521341Sstevel (void) strncpy(pkt->errbuf, device_path, 2531341Sstevel SYSC_OUTPUT_LEN); 2541341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_SUSPEND); 2551341Sstevel ndi_hold_devi(dip); 2561341Sstevel failed_driver = dip; 2571341Sstevel return (ENXIO); 2581341Sstevel } 2591341Sstevel } 2601341Sstevel 2611341Sstevel return (DDI_SUCCESS); 2621341Sstevel } 2631341Sstevel 2641341Sstevel static void 2651341Sstevel sysctrl_resume_devices(dev_info_t *start, sysc_cfga_pkt_t *pkt) 2661341Sstevel { 2671341Sstevel int circ; 2681341Sstevel dev_info_t *dip, *next, *last = NULL; 2691341Sstevel 2701341Sstevel ASSERT(start == NULL || ddi_get_parent(start) == NULL || 2711341Sstevel DEVI_BUSY_OWNED(ddi_get_parent(start))); 2721341Sstevel 2731341Sstevel /* attach in reverse device tree order */ 2741341Sstevel while (last != start) { 2751341Sstevel dip = start; 2761341Sstevel next = ddi_get_next_sibling(dip); 2771341Sstevel while (next != last && dip != failed_driver) { 2781341Sstevel dip = next; 2791341Sstevel next = ddi_get_next_sibling(dip); 2801341Sstevel } 2811341Sstevel if (dip == failed_driver) { 2821341Sstevel failed_driver = NULL; 2831341Sstevel ndi_rele_devi(dip); 2841341Sstevel } else if (sysctrl_is_real_device(dip) && 2851341Sstevel failed_driver == NULL) { 2861341Sstevel /* 2871341Sstevel * Parent dip is held busy, so ddi_pathname() can 2881341Sstevel * be safely called. 2891341Sstevel */ 2901341Sstevel (void) ddi_pathname(dip, device_path); 2911341Sstevel DEBUGP(errp(" resuming device %s\n", device_path)); 2921341Sstevel if (devi_attach(dip, DDI_RESUME) != DDI_SUCCESS) { 2931341Sstevel /* 2941341Sstevel * XXX - if in the future we decide not to 2951341Sstevel * panic the system, we need to set the error 2961341Sstevel * SYSC_ERR_RESUME here and also change the 2971341Sstevel * cfgadm platform library. 2981341Sstevel */ 2991341Sstevel cmn_err(CE_PANIC, "Unable to resume device %s", 3001341Sstevel device_path); 3011341Sstevel } 3021341Sstevel } 3031341Sstevel ndi_devi_enter(dip, &circ); 3041341Sstevel sysctrl_resume_devices(ddi_get_child(dip), pkt); 3051341Sstevel ndi_devi_exit(dip, circ); 3061341Sstevel 3071341Sstevel last = dip; 3081341Sstevel } 3091341Sstevel } 3101341Sstevel 3111341Sstevel /* 3121341Sstevel * True if thread is virtually stopped. Similar to CPR_VSTOPPED 3131341Sstevel * but from DR point of view. These user threads are waiting in 3141341Sstevel * the kernel. Once they complete in the kernel, they will process 3151341Sstevel * the stop signal and stop. 3161341Sstevel */ 3171341Sstevel #define SYSCTRL_VSTOPPED(t) \ 3181341Sstevel ((t)->t_state == TS_SLEEP && \ 3191341Sstevel (t)->t_wchan != NULL && \ 3201341Sstevel (t)->t_astflag && \ 3211341Sstevel ((t)->t_proc_flag & TP_CHKPT)) 3221341Sstevel 3231341Sstevel static int 3241341Sstevel sysctrl_stop_user_threads(sysc_cfga_pkt_t *pkt) 3251341Sstevel { 3261341Sstevel int count; 3271341Sstevel char cache_psargs[PSARGSZ]; 3281341Sstevel kthread_id_t cache_tp; 3291341Sstevel uint_t cache_t_state; 3301341Sstevel int bailout; 3311341Sstevel pid_t pid; 3321341Sstevel 3331341Sstevel extern void add_one_utstop(); 3341341Sstevel extern void utstop_timedwait(clock_t); 3351341Sstevel extern void utstop_init(void); 3361341Sstevel 3371341Sstevel #define SYSCTRL_UTSTOP_RETRY 4 3381341Sstevel #define SYSCTRL_UTSTOP_WAIT hz 3391341Sstevel 3401341Sstevel if (sysctrl_skip_user_threads) 3411341Sstevel return (DDI_SUCCESS); 3421341Sstevel 3431341Sstevel utstop_init(); 3441341Sstevel 3451341Sstevel /* we need to try a few times to get past fork, etc. */ 3461341Sstevel for (count = 0; count < SYSCTRL_UTSTOP_RETRY; count++) { 3471341Sstevel kthread_id_t tp; 3481341Sstevel 3491341Sstevel /* walk the entire threadlist */ 3501341Sstevel mutex_enter(&pidlock); 3511341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) { 3521341Sstevel proc_t *p = ttoproc(tp); 3531341Sstevel 3541341Sstevel /* handle kernel threads separately */ 3551341Sstevel if (p->p_as == &kas || p->p_stat == SZOMB) 3561341Sstevel continue; 3571341Sstevel 3581341Sstevel mutex_enter(&p->p_lock); 3591341Sstevel thread_lock(tp); 3601341Sstevel 3611341Sstevel if (tp->t_state == TS_STOPPED) { 3621341Sstevel /* add another reason to stop this thread */ 3631341Sstevel tp->t_schedflag &= ~TS_RESUME; 3641341Sstevel } else { 3651341Sstevel tp->t_proc_flag |= TP_CHKPT; 3661341Sstevel 3671341Sstevel thread_unlock(tp); 3681341Sstevel mutex_exit(&p->p_lock); 3691341Sstevel add_one_utstop(); 3701341Sstevel mutex_enter(&p->p_lock); 3711341Sstevel thread_lock(tp); 3721341Sstevel 3731341Sstevel aston(tp); 3741341Sstevel 375*3792Sakolb if (ISWAKEABLE(tp) || ISWAITING(tp)) { 3761341Sstevel setrun_locked(tp); 3771341Sstevel } 3781341Sstevel 3791341Sstevel } 3801341Sstevel 3811341Sstevel /* grab thread if needed */ 3821341Sstevel if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU) 3831341Sstevel poke_cpu(tp->t_cpu->cpu_id); 3841341Sstevel 3851341Sstevel 3861341Sstevel thread_unlock(tp); 3871341Sstevel mutex_exit(&p->p_lock); 3881341Sstevel } 3891341Sstevel mutex_exit(&pidlock); 3901341Sstevel 3911341Sstevel 3921341Sstevel /* let everything catch up */ 3931341Sstevel utstop_timedwait(count * count * SYSCTRL_UTSTOP_WAIT); 3941341Sstevel 3951341Sstevel 3961341Sstevel /* now, walk the threadlist again to see if we are done */ 3971341Sstevel mutex_enter(&pidlock); 3981341Sstevel for (tp = curthread->t_next, bailout = 0; 3991341Sstevel bailout == 0 && tp != curthread; tp = tp->t_next) { 4001341Sstevel proc_t *p = ttoproc(tp); 4011341Sstevel 4021341Sstevel /* handle kernel threads separately */ 4031341Sstevel if (p->p_as == &kas || p->p_stat == SZOMB) 4041341Sstevel continue; 4051341Sstevel 4061341Sstevel /* 4071341Sstevel * If this thread didn't stop, and we don't allow 4081341Sstevel * unstopped blocked threads, bail. 4091341Sstevel */ 4101341Sstevel /* did this thread stop? */ 4111341Sstevel thread_lock(tp); 4121341Sstevel if (!CPR_ISTOPPED(tp) && 4131341Sstevel !(sysctrl_allow_blocked_threads && 4141341Sstevel SYSCTRL_VSTOPPED(tp))) { 4151341Sstevel 4161341Sstevel /* nope, cache the details for later */ 4171341Sstevel bcopy(p->p_user.u_psargs, cache_psargs, 4181341Sstevel sizeof (cache_psargs)); 4191341Sstevel cache_tp = tp; 4201341Sstevel cache_t_state = tp->t_state; 4211341Sstevel bailout = 1; 4221341Sstevel pid = p->p_pidp->pid_id; 4231341Sstevel } 4241341Sstevel thread_unlock(tp); 4251341Sstevel } 4261341Sstevel mutex_exit(&pidlock); 4271341Sstevel 4281341Sstevel /* were all the threads stopped? */ 4291341Sstevel if (!bailout) 4301341Sstevel break; 4311341Sstevel } 4321341Sstevel 4331341Sstevel /* were we unable to stop all threads after a few tries? */ 4341341Sstevel if (bailout) { 4351341Sstevel (void) sprintf(pkt->errbuf, "process: %s id: %d state: %x" 4361341Sstevel " thread descriptor: %p", 4371341Sstevel cache_psargs, (int)pid, cache_t_state, 4381341Sstevel (void *)cache_tp); 4391341Sstevel 4401341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_UTHREAD); 4411341Sstevel 4421341Sstevel return (ESRCH); 4431341Sstevel } 4441341Sstevel 4451341Sstevel return (DDI_SUCCESS); 4461341Sstevel } 4471341Sstevel 4481341Sstevel static int 4491341Sstevel sysctrl_stop_kernel_threads(sysc_cfga_pkt_t *pkt) 4501341Sstevel { 4511341Sstevel caddr_t name; 4521341Sstevel kthread_id_t tp; 4531341Sstevel 4541341Sstevel if (sysctrl_skip_kernel_threads) { 4551341Sstevel return (DDI_SUCCESS); 4561341Sstevel } 4571341Sstevel 4581341Sstevel /* 4591341Sstevel * Note: we unlock the table in resume. 4601341Sstevel * We only need to lock the callback table if we are actually 4611341Sstevel * suspending kernel threads. 4621341Sstevel */ 4631341Sstevel callb_lock_table(); 4641341Sstevel if ((name = callb_execute_class(CB_CL_CPR_DAEMON, 4651341Sstevel CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) { 4661341Sstevel 4671341Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN); 4681341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD); 4691341Sstevel return (EBUSY); 4701341Sstevel } 4711341Sstevel 4721341Sstevel /* 4731341Sstevel * Verify that all threads are accounted for 4741341Sstevel */ 4751341Sstevel mutex_enter(&pidlock); 4761341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) { 4771341Sstevel proc_t *p = ttoproc(tp); 4781341Sstevel 4791341Sstevel if (p->p_as != &kas) 4801341Sstevel continue; 4811341Sstevel 4821341Sstevel if (tp->t_flag & T_INTR_THREAD) 4831341Sstevel continue; 4841341Sstevel 4851341Sstevel if (!callb_is_stopped(tp, &name)) { 4861341Sstevel mutex_exit(&pidlock); 4871341Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN); 4881341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD); 4891341Sstevel return (EBUSY); 4901341Sstevel } 4911341Sstevel } 4921341Sstevel 4931341Sstevel mutex_exit(&pidlock); 4941341Sstevel return (DDI_SUCCESS); 4951341Sstevel } 4961341Sstevel 4971341Sstevel static void 4981341Sstevel sysctrl_start_user_threads(void) 4991341Sstevel { 5001341Sstevel kthread_id_t tp; 5011341Sstevel 5021341Sstevel mutex_enter(&pidlock); 5031341Sstevel 5041341Sstevel /* walk all threads and release them */ 5051341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) { 5061341Sstevel proc_t *p = ttoproc(tp); 5071341Sstevel 5081341Sstevel /* skip kernel threads */ 5091341Sstevel if (ttoproc(tp)->p_as == &kas) 5101341Sstevel continue; 5111341Sstevel 5121341Sstevel mutex_enter(&p->p_lock); 5131341Sstevel tp->t_proc_flag &= ~TP_CHKPT; 5141341Sstevel mutex_exit(&p->p_lock); 5151341Sstevel 5161341Sstevel thread_lock(tp); 5171341Sstevel if (CPR_ISTOPPED(tp)) { 5181341Sstevel /* back on the runq */ 5191341Sstevel tp->t_schedflag |= TS_RESUME; 5201341Sstevel setrun_locked(tp); 5211341Sstevel } 5221341Sstevel thread_unlock(tp); 5231341Sstevel } 5241341Sstevel 5251341Sstevel mutex_exit(&pidlock); 5261341Sstevel } 5271341Sstevel 5281341Sstevel static void 5291341Sstevel sysctrl_signal_user(int sig) 5301341Sstevel { 5311341Sstevel struct proc *p; 5321341Sstevel 5331341Sstevel mutex_enter(&pidlock); 5341341Sstevel 5351341Sstevel for (p = practive; p != NULL; p = p->p_next) { 5361341Sstevel /* only user threads */ 5371341Sstevel if (p->p_exec == NULL || p->p_stat == SZOMB || 5381341Sstevel p == proc_init || p == ttoproc(curthread)) 5391341Sstevel continue; 5401341Sstevel 5411341Sstevel mutex_enter(&p->p_lock); 5421341Sstevel sigtoproc(p, NULL, sig); 5431341Sstevel mutex_exit(&p->p_lock); 5441341Sstevel } 5451341Sstevel 5461341Sstevel mutex_exit(&pidlock); 5471341Sstevel 5481341Sstevel /* add a bit of delay */ 5491341Sstevel delay(hz); 5501341Sstevel } 5511341Sstevel 5521341Sstevel void 5531341Sstevel sysctrl_resume(sysc_cfga_pkt_t *pkt) 5541341Sstevel { 5551341Sstevel #ifndef Bug_4154263 5561341Sstevel DEBUGP(errp("resume system...\n")); 5571341Sstevel #endif 5581341Sstevel switch (suspend_state) { 5591341Sstevel case SYSC_STATE_FULL: 5601341Sstevel /* 5611341Sstevel * release all the other cpus 5621341Sstevel */ 5631341Sstevel #ifndef Bug_4154263 5641341Sstevel DEBUGP(errp("release cpus...")); 5651341Sstevel #endif 5662399Scth /* 5672399Scth * Prevent false alarm in tod_validate() due to tod 5682399Scth * value change between suspend and resume 5692399Scth */ 5702399Scth mutex_enter(&tod_lock); 5712399Scth tod_fault_reset(); 5722399Scth mutex_exit(&tod_lock); 5732399Scth 5741341Sstevel sysctrl_release_cpus(); 5751341Sstevel DEBUGP(errp("cpus resumed...\n")); 5761341Sstevel 5771341Sstevel /* 5781341Sstevel * If we suspended hw watchdog at suspend, 5791341Sstevel * re-enable it now. 5801341Sstevel */ 5811341Sstevel if (sysc_watchdog_suspended) { 5821341Sstevel mutex_enter(&tod_lock); 5831341Sstevel tod_ops.tod_set_watchdog_timer( 5841341Sstevel watchdog_timeout_seconds); 5851341Sstevel mutex_exit(&tod_lock); 5861341Sstevel } 5871341Sstevel 5881341Sstevel /* 5891341Sstevel * resume callout 5901341Sstevel */ 5911341Sstevel (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME); 5921341Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT, 5931341Sstevel CB_CODE_CPR_RESUME); 5941341Sstevel sysctrl_enable_intr(); 5951341Sstevel /* FALLTHROUGH */ 5961341Sstevel 5971341Sstevel case SYSC_STATE_DRIVER: 5981341Sstevel /* 5991341Sstevel * resume drivers 6001341Sstevel */ 6011341Sstevel DEBUGP(errp("resume drivers...")); 6021341Sstevel sysctrl_resume_devices(ddi_root_node(), pkt); 6031341Sstevel DEBUGP(errp("done\n")); 6041341Sstevel 6051341Sstevel /* 6061341Sstevel * resume the lock manager 6071341Sstevel */ 6081341Sstevel lm_cprresume(); 6091341Sstevel 6101341Sstevel /* FALLTHROUGH */ 6111341Sstevel 6121341Sstevel case SYSC_STATE_DAEMON: 6131341Sstevel /* 6141341Sstevel * resume kernel daemons 6151341Sstevel */ 6161341Sstevel if (!sysctrl_skip_kernel_threads) { 6171341Sstevel DEBUGP(errp("starting kernel daemons...")); 6181341Sstevel (void) callb_execute_class(CB_CL_CPR_DAEMON, 6191341Sstevel CB_CODE_CPR_RESUME); 6201341Sstevel callb_unlock_table(); 6211341Sstevel } 6221341Sstevel DEBUGP(errp("done\n")); 6231341Sstevel 6241341Sstevel /* FALLTHROUGH */ 6251341Sstevel 6261341Sstevel case SYSC_STATE_USER: 6271341Sstevel /* 6281341Sstevel * finally, resume user threads 6291341Sstevel */ 6301341Sstevel if (!sysctrl_skip_user_threads) { 6311341Sstevel DEBUGP(errp("starting user threads...")); 6321341Sstevel sysctrl_start_user_threads(); 6331341Sstevel DEBUGP(errp("done\n")); 6341341Sstevel } 6351341Sstevel /* FALLTHROUGH */ 6361341Sstevel 6371341Sstevel case SYSC_STATE_BEGIN: 6381341Sstevel default: 6391341Sstevel /* 6401341Sstevel * let those who care know that we've just resumed 6411341Sstevel */ 6421341Sstevel DEBUGP(errp("sending SIGTHAW...")); 6431341Sstevel sysctrl_signal_user(SIGTHAW); 6441341Sstevel DEBUGP(errp("done\n")); 6451341Sstevel break; 6461341Sstevel } 6471341Sstevel 6481341Sstevel /* Restore sysctrl detach/suspend to its original value */ 6491341Sstevel sysctrl_enable_detach_suspend = sysc_lastval; 6501341Sstevel 6511341Sstevel DEBUGP(errp("system state restored\n")); 6521341Sstevel } 6531341Sstevel 6541341Sstevel void 6551341Sstevel sysctrl_suspend_prepare(void) 6561341Sstevel { 6571341Sstevel /* 6581341Sstevel * We use a function, lm_cprsuspend(), in the suspend flow that 6591341Sstevel * is redirected to a module through the modstubs mechanism. 6601341Sstevel * If the module is currently not loaded, modstubs attempts 6611341Sstevel * the modload. The context this happens in below causes the 6621341Sstevel * module load to block forever, so this function must be called 6631341Sstevel * in the normal system call context ahead of time. 6641341Sstevel */ 6651341Sstevel (void) modload("misc", "klmmod"); 6661341Sstevel } 6671341Sstevel 6681341Sstevel int 6691341Sstevel sysctrl_suspend(sysc_cfga_pkt_t *pkt) 6701341Sstevel { 6711341Sstevel int rc = DDI_SUCCESS; 6721341Sstevel 6731341Sstevel /* enable sysctrl detach/suspend function */ 6741341Sstevel sysc_lastval = sysctrl_enable_detach_suspend; 6751341Sstevel sysctrl_enable_detach_suspend = 1; 6761341Sstevel 6771341Sstevel /* 6781341Sstevel * first, stop all user threads 6791341Sstevel */ 6801341Sstevel DEBUGP(errp("\nstopping user threads...")); 6811341Sstevel suspend_state = SYSC_STATE_USER; 6821341Sstevel if (((rc = sysctrl_stop_user_threads(pkt)) != DDI_SUCCESS) && 6831341Sstevel sysctrl_check_user_stop_result) { 6841341Sstevel sysctrl_resume(pkt); 6851341Sstevel return (rc); 6861341Sstevel } 6871341Sstevel DEBUGP(errp("done\n")); 6881341Sstevel 6891341Sstevel /* 6901341Sstevel * now stop daemon activities 6911341Sstevel */ 6921341Sstevel DEBUGP(errp("stopping kernel daemons...")); 6931341Sstevel suspend_state = SYSC_STATE_DAEMON; 6941341Sstevel if (rc = sysctrl_stop_kernel_threads(pkt)) { 6951341Sstevel sysctrl_resume(pkt); 6961341Sstevel return (rc); 6971341Sstevel } 6981341Sstevel DEBUGP(errp("done\n")); 6991341Sstevel 7001341Sstevel /* 7011341Sstevel * This sync swap out all user pages 7021341Sstevel */ 7031341Sstevel vfs_sync(SYNC_ALL); 7041341Sstevel 7051341Sstevel /* 7061341Sstevel * special treatment for lock manager 7071341Sstevel */ 7081341Sstevel lm_cprsuspend(); 7091341Sstevel 7101341Sstevel /* 7111341Sstevel * sync the file system in case we never make it back 7121341Sstevel */ 7131341Sstevel sync(); 7141341Sstevel 7151341Sstevel /* 7161341Sstevel * now suspend drivers 7171341Sstevel */ 7181341Sstevel DEBUGP(errp("suspending drivers...")); 7191341Sstevel suspend_state = SYSC_STATE_DRIVER; 7201341Sstevel if (rc = sysctrl_suspend_devices(ddi_root_node(), pkt)) { 7211341Sstevel sysctrl_resume(pkt); 7221341Sstevel return (rc); 7231341Sstevel } 7241341Sstevel DEBUGP(errp("done\n")); 7251341Sstevel 7261341Sstevel /* 7271341Sstevel * handle the callout table 7281341Sstevel */ 7291341Sstevel sysctrl_stop_intr(); 7301341Sstevel 7311341Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT); 7321341Sstevel 7331341Sstevel /* 7341341Sstevel * if watchdog was activated, disable it 7351341Sstevel */ 7361341Sstevel if (watchdog_activated) { 7371341Sstevel mutex_enter(&tod_lock); 7381341Sstevel tod_ops.tod_clear_watchdog_timer(); 7391341Sstevel mutex_exit(&tod_lock); 7401341Sstevel sysc_watchdog_suspended = 1; 7411341Sstevel } else { 7421341Sstevel sysc_watchdog_suspended = 0; 7431341Sstevel } 7441341Sstevel 7451341Sstevel /* 7461341Sstevel * finally, grab all cpus 7471341Sstevel */ 7481341Sstevel DEBUGP(errp("freezing all cpus...\n")); 7491341Sstevel suspend_state = SYSC_STATE_FULL; 7501341Sstevel sysctrl_grab_cpus(); 7511341Sstevel #ifndef Bug_4154263 7521341Sstevel DEBUGP(errp("done\n")); 7531341Sstevel 7541341Sstevel DEBUGP(errp("system is quiesced\n")); 7551341Sstevel #endif 7561341Sstevel 7571341Sstevel return (rc); 7581341Sstevel } 759