1*1341Sstevel /* 2*1341Sstevel * CDDL HEADER START 3*1341Sstevel * 4*1341Sstevel * The contents of this file are subject to the terms of the 5*1341Sstevel * Common Development and Distribution License (the "License"). 6*1341Sstevel * You may not use this file except in compliance with the License. 7*1341Sstevel * 8*1341Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*1341Sstevel * or http://www.opensolaris.org/os/licensing. 10*1341Sstevel * See the License for the specific language governing permissions 11*1341Sstevel * and limitations under the License. 12*1341Sstevel * 13*1341Sstevel * When distributing Covered Code, include this CDDL HEADER in each 14*1341Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*1341Sstevel * If applicable, add the following below this CDDL HEADER, with the 16*1341Sstevel * fields enclosed by brackets "[]" replaced with your own identifying 17*1341Sstevel * information: Portions Copyright [yyyy] [name of copyright owner] 18*1341Sstevel * 19*1341Sstevel * CDDL HEADER END 20*1341Sstevel */ 21*1341Sstevel 22*1341Sstevel /* 23*1341Sstevel * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*1341Sstevel * Use is subject to license terms. 25*1341Sstevel */ 26*1341Sstevel 27*1341Sstevel #pragma ident "%Z%%M% %I% %E% SMI" 28*1341Sstevel 29*1341Sstevel /* 30*1341Sstevel * This workaround inhibits prom_printf after the cpus are grabbed. 31*1341Sstevel * This can be removed when 4154263 is corrected. 32*1341Sstevel */ 33*1341Sstevel #define Bug_4154263 34*1341Sstevel 35*1341Sstevel /* 36*1341Sstevel * A CPR derivative specifically for sunfire 37*1341Sstevel */ 38*1341Sstevel 39*1341Sstevel #include <sys/types.h> 40*1341Sstevel #include <sys/systm.h> 41*1341Sstevel #include <sys/machparam.h> 42*1341Sstevel #include <sys/machsystm.h> 43*1341Sstevel #include <sys/ddi.h> 44*1341Sstevel #define SUNDDI_IMPL 45*1341Sstevel #include <sys/sunddi.h> 46*1341Sstevel #include <sys/time.h> 47*1341Sstevel #include <sys/kmem.h> 48*1341Sstevel #include <nfs/lm.h> 49*1341Sstevel #include <sys/ddi_impldefs.h> 50*1341Sstevel #include <sys/obpdefs.h> 51*1341Sstevel #include <sys/cmn_err.h> 52*1341Sstevel #include <sys/debug.h> 53*1341Sstevel #include <sys/errno.h> 54*1341Sstevel #include <sys/callb.h> 55*1341Sstevel #include <sys/clock.h> 56*1341Sstevel #include <sys/x_call.h> 57*1341Sstevel #include <sys/cpuvar.h> 58*1341Sstevel #include <sys/epm.h> 59*1341Sstevel #include <sys/vfs.h> 60*1341Sstevel #include <sys/fhc.h> 61*1341Sstevel #include <sys/sysctrl.h> 62*1341Sstevel #include <sys/promif.h> 63*1341Sstevel #include <sys/conf.h> 64*1341Sstevel #include <sys/modctl.h> 65*1341Sstevel #include <sys/cyclic.h> 66*1341Sstevel #include <sys/sunndi.h> 67*1341Sstevel #include <sys/machsystm.h> 68*1341Sstevel 69*1341Sstevel static enum sysctrl_suspend_state { 70*1341Sstevel SYSC_STATE_BEGIN = 0, 71*1341Sstevel SYSC_STATE_USER, 72*1341Sstevel SYSC_STATE_DAEMON, 73*1341Sstevel SYSC_STATE_DRIVER, 74*1341Sstevel SYSC_STATE_FULL } suspend_state; 75*1341Sstevel 76*1341Sstevel static int pstate_save; 77*1341Sstevel static uint_t sysctrl_gate[NCPU]; 78*1341Sstevel int sysctrl_quiesce_debug = FALSE; 79*1341Sstevel static int sysctrl_skip_kernel_threads = TRUE; 80*1341Sstevel 81*1341Sstevel /* 82*1341Sstevel * sysctrl_skip_user_threads is used to control if user threads should 83*1341Sstevel * be suspended. If sysctrl_skip_user_threads is true, the rest of the 84*1341Sstevel * flags are not used; if it is false, sysctrl_check_user_stop_result 85*1341Sstevel * will be used to control whether or not we need to check suspend 86*1341Sstevel * result, and sysctrl_allow_blocked_threads will be used to control 87*1341Sstevel * whether or not we allow suspend to continue if there are blocked 88*1341Sstevel * threads. We allow all combinations of sysctrl_check_user_stop_result 89*1341Sstevel * and sysctrl_allow_block_threads, even though it might not make much 90*1341Sstevel * sense to not allow block threads when we don't even check stop 91*1341Sstevel * result. 92*1341Sstevel */ 93*1341Sstevel static int sysctrl_skip_user_threads = 0; /* default to FALSE */ 94*1341Sstevel static int sysctrl_check_user_stop_result = 1; /* default to TRUE */ 95*1341Sstevel static int sysctrl_allow_blocked_threads = 1; /* default to TRUE */ 96*1341Sstevel 97*1341Sstevel static int sysc_watchdog_suspended; 98*1341Sstevel 99*1341Sstevel extern int sysctrl_enable_detach_suspend; 100*1341Sstevel static int sysc_lastval; 101*1341Sstevel 102*1341Sstevel #define DEBUGP(p) { if (sysctrl_quiesce_debug) p; } 103*1341Sstevel #define errp prom_printf 104*1341Sstevel 105*1341Sstevel #define SYSC_CPU_LOOP_MSEC 1000 106*1341Sstevel 107*1341Sstevel static void 108*1341Sstevel sysctrl_grab_cpus(void) 109*1341Sstevel { 110*1341Sstevel int i; 111*1341Sstevel cpuset_t others; 112*1341Sstevel extern cpuset_t cpu_ready_set; 113*1341Sstevel extern void sysctrl_freeze(void); 114*1341Sstevel uint64_t sysc_tick_limit; 115*1341Sstevel uint64_t sysc_current_tick; 116*1341Sstevel uint64_t sysc_tick_deadline; 117*1341Sstevel 118*1341Sstevel extern u_longlong_t gettick(void); 119*1341Sstevel 120*1341Sstevel for (i = 0; i < NCPU; i++) 121*1341Sstevel sysctrl_gate[i] = 0; 122*1341Sstevel 123*1341Sstevel /* tell other cpus to go quiet and wait for continue signal */ 124*1341Sstevel others = cpu_ready_set; 125*1341Sstevel CPUSET_DEL(others, CPU->cpu_id); 126*1341Sstevel xt_some(others, (xcfunc_t *)sysctrl_freeze, (uint64_t)sysctrl_gate, 127*1341Sstevel (uint64_t)(&sysctrl_gate[CPU->cpu_id])); 128*1341Sstevel 129*1341Sstevel sysc_tick_limit = 130*1341Sstevel ((uint64_t)sys_tick_freq * SYSC_CPU_LOOP_MSEC) / 1000; 131*1341Sstevel 132*1341Sstevel /* wait for each cpu to check in */ 133*1341Sstevel for (i = 0; i < NCPU; i++) { 134*1341Sstevel if (!CPU_IN_SET(others, i)) 135*1341Sstevel continue; 136*1341Sstevel 137*1341Sstevel /* 138*1341Sstevel * Get current tick value and calculate the deadline tick 139*1341Sstevel */ 140*1341Sstevel sysc_current_tick = gettick(); 141*1341Sstevel sysc_tick_deadline = sysc_current_tick + sysc_tick_limit; 142*1341Sstevel 143*1341Sstevel while (sysctrl_gate[i] == 0) { 144*1341Sstevel /* If in panic, we just return */ 145*1341Sstevel if (panicstr) 146*1341Sstevel break; 147*1341Sstevel 148*1341Sstevel /* Panic the system if cpu not responsed by deadline */ 149*1341Sstevel sysc_current_tick = gettick(); 150*1341Sstevel if (sysc_current_tick >= sysc_tick_deadline) { 151*1341Sstevel cmn_err(CE_PANIC, "sysctrl: cpu %d not " 152*1341Sstevel "responding to quiesce command", i); 153*1341Sstevel } 154*1341Sstevel } 155*1341Sstevel } 156*1341Sstevel 157*1341Sstevel /* now even our interrupts are disabled -- really quiet now */ 158*1341Sstevel pstate_save = disable_vec_intr(); 159*1341Sstevel } 160*1341Sstevel 161*1341Sstevel static void 162*1341Sstevel sysctrl_release_cpus(void) 163*1341Sstevel { 164*1341Sstevel /* let the other cpus go */ 165*1341Sstevel sysctrl_gate[CPU->cpu_id] = 1; 166*1341Sstevel 167*1341Sstevel /* restore our interrupts too */ 168*1341Sstevel enable_vec_intr(pstate_save); 169*1341Sstevel } 170*1341Sstevel 171*1341Sstevel static void 172*1341Sstevel sysctrl_stop_intr(void) 173*1341Sstevel { 174*1341Sstevel mutex_enter(&cpu_lock); 175*1341Sstevel kpreempt_disable(); 176*1341Sstevel cyclic_suspend(); 177*1341Sstevel } 178*1341Sstevel 179*1341Sstevel static void 180*1341Sstevel sysctrl_enable_intr(void) 181*1341Sstevel { 182*1341Sstevel cyclic_resume(); 183*1341Sstevel (void) spl0(); 184*1341Sstevel kpreempt_enable(); 185*1341Sstevel mutex_exit(&cpu_lock); 186*1341Sstevel } 187*1341Sstevel 188*1341Sstevel static int 189*1341Sstevel sysctrl_is_real_device(dev_info_t *dip) 190*1341Sstevel { 191*1341Sstevel struct regspec *regbuf; 192*1341Sstevel int length; 193*1341Sstevel int rc; 194*1341Sstevel 195*1341Sstevel if (ddi_get_driver(dip) == NULL) 196*1341Sstevel return (FALSE); 197*1341Sstevel 198*1341Sstevel if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR)) 199*1341Sstevel return (TRUE); 200*1341Sstevel if (DEVI(dip)->devi_pm_flags & PMC_NO_SR) 201*1341Sstevel return (FALSE); 202*1341Sstevel 203*1341Sstevel /* 204*1341Sstevel * now the general case 205*1341Sstevel */ 206*1341Sstevel rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg", 207*1341Sstevel (caddr_t)®buf, &length); 208*1341Sstevel ASSERT(rc != DDI_PROP_NO_MEMORY); 209*1341Sstevel if (rc != DDI_PROP_SUCCESS) { 210*1341Sstevel return (FALSE); 211*1341Sstevel } else { 212*1341Sstevel kmem_free(regbuf, length); 213*1341Sstevel return (TRUE); 214*1341Sstevel } 215*1341Sstevel } 216*1341Sstevel 217*1341Sstevel static dev_info_t *failed_driver; 218*1341Sstevel static char device_path[MAXPATHLEN]; 219*1341Sstevel 220*1341Sstevel static int 221*1341Sstevel sysctrl_suspend_devices(dev_info_t *dip, sysc_cfga_pkt_t *pkt) 222*1341Sstevel { 223*1341Sstevel int circ; 224*1341Sstevel 225*1341Sstevel ASSERT(dip == NULL || ddi_get_parent(dip) == NULL || 226*1341Sstevel DEVI_BUSY_OWNED(ddi_get_parent(dip))); 227*1341Sstevel 228*1341Sstevel failed_driver = NULL; 229*1341Sstevel for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { 230*1341Sstevel /* 231*1341Sstevel * Hold parent busy while walking child list 232*1341Sstevel */ 233*1341Sstevel ndi_devi_enter(dip, &circ); 234*1341Sstevel if (sysctrl_suspend_devices(ddi_get_child(dip), pkt)) { 235*1341Sstevel ndi_devi_exit(dip, circ); 236*1341Sstevel return (ENXIO); 237*1341Sstevel } 238*1341Sstevel ndi_devi_exit(dip, circ); 239*1341Sstevel 240*1341Sstevel if (!sysctrl_is_real_device(dip)) 241*1341Sstevel continue; 242*1341Sstevel 243*1341Sstevel /* 244*1341Sstevel * Safe to call ddi_pathname() as parent is held busy 245*1341Sstevel */ 246*1341Sstevel (void) ddi_pathname(dip, device_path); 247*1341Sstevel DEBUGP(errp(" suspending device %s\n", device_path)); 248*1341Sstevel if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) { 249*1341Sstevel DEBUGP(errp(" unable to suspend device %s\n", 250*1341Sstevel device_path)); 251*1341Sstevel 252*1341Sstevel (void) strncpy(pkt->errbuf, device_path, 253*1341Sstevel SYSC_OUTPUT_LEN); 254*1341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_SUSPEND); 255*1341Sstevel ndi_hold_devi(dip); 256*1341Sstevel failed_driver = dip; 257*1341Sstevel return (ENXIO); 258*1341Sstevel } 259*1341Sstevel } 260*1341Sstevel 261*1341Sstevel return (DDI_SUCCESS); 262*1341Sstevel } 263*1341Sstevel 264*1341Sstevel static void 265*1341Sstevel sysctrl_resume_devices(dev_info_t *start, sysc_cfga_pkt_t *pkt) 266*1341Sstevel { 267*1341Sstevel int circ; 268*1341Sstevel dev_info_t *dip, *next, *last = NULL; 269*1341Sstevel 270*1341Sstevel ASSERT(start == NULL || ddi_get_parent(start) == NULL || 271*1341Sstevel DEVI_BUSY_OWNED(ddi_get_parent(start))); 272*1341Sstevel 273*1341Sstevel /* attach in reverse device tree order */ 274*1341Sstevel while (last != start) { 275*1341Sstevel dip = start; 276*1341Sstevel next = ddi_get_next_sibling(dip); 277*1341Sstevel while (next != last && dip != failed_driver) { 278*1341Sstevel dip = next; 279*1341Sstevel next = ddi_get_next_sibling(dip); 280*1341Sstevel } 281*1341Sstevel if (dip == failed_driver) { 282*1341Sstevel failed_driver = NULL; 283*1341Sstevel ndi_rele_devi(dip); 284*1341Sstevel } else if (sysctrl_is_real_device(dip) && 285*1341Sstevel failed_driver == NULL) { 286*1341Sstevel /* 287*1341Sstevel * Parent dip is held busy, so ddi_pathname() can 288*1341Sstevel * be safely called. 289*1341Sstevel */ 290*1341Sstevel (void) ddi_pathname(dip, device_path); 291*1341Sstevel DEBUGP(errp(" resuming device %s\n", device_path)); 292*1341Sstevel if (devi_attach(dip, DDI_RESUME) != DDI_SUCCESS) { 293*1341Sstevel /* 294*1341Sstevel * XXX - if in the future we decide not to 295*1341Sstevel * panic the system, we need to set the error 296*1341Sstevel * SYSC_ERR_RESUME here and also change the 297*1341Sstevel * cfgadm platform library. 298*1341Sstevel */ 299*1341Sstevel cmn_err(CE_PANIC, "Unable to resume device %s", 300*1341Sstevel device_path); 301*1341Sstevel } 302*1341Sstevel } 303*1341Sstevel ndi_devi_enter(dip, &circ); 304*1341Sstevel sysctrl_resume_devices(ddi_get_child(dip), pkt); 305*1341Sstevel ndi_devi_exit(dip, circ); 306*1341Sstevel 307*1341Sstevel last = dip; 308*1341Sstevel } 309*1341Sstevel } 310*1341Sstevel 311*1341Sstevel /* 312*1341Sstevel * True if thread is virtually stopped. Similar to CPR_VSTOPPED 313*1341Sstevel * but from DR point of view. These user threads are waiting in 314*1341Sstevel * the kernel. Once they complete in the kernel, they will process 315*1341Sstevel * the stop signal and stop. 316*1341Sstevel */ 317*1341Sstevel #define SYSCTRL_VSTOPPED(t) \ 318*1341Sstevel ((t)->t_state == TS_SLEEP && \ 319*1341Sstevel (t)->t_wchan != NULL && \ 320*1341Sstevel (t)->t_astflag && \ 321*1341Sstevel ((t)->t_proc_flag & TP_CHKPT)) 322*1341Sstevel 323*1341Sstevel static int 324*1341Sstevel sysctrl_stop_user_threads(sysc_cfga_pkt_t *pkt) 325*1341Sstevel { 326*1341Sstevel int count; 327*1341Sstevel char cache_psargs[PSARGSZ]; 328*1341Sstevel kthread_id_t cache_tp; 329*1341Sstevel uint_t cache_t_state; 330*1341Sstevel int bailout; 331*1341Sstevel pid_t pid; 332*1341Sstevel 333*1341Sstevel extern void add_one_utstop(); 334*1341Sstevel extern void utstop_timedwait(clock_t); 335*1341Sstevel extern void utstop_init(void); 336*1341Sstevel 337*1341Sstevel #define SYSCTRL_UTSTOP_RETRY 4 338*1341Sstevel #define SYSCTRL_UTSTOP_WAIT hz 339*1341Sstevel 340*1341Sstevel if (sysctrl_skip_user_threads) 341*1341Sstevel return (DDI_SUCCESS); 342*1341Sstevel 343*1341Sstevel utstop_init(); 344*1341Sstevel 345*1341Sstevel /* we need to try a few times to get past fork, etc. */ 346*1341Sstevel for (count = 0; count < SYSCTRL_UTSTOP_RETRY; count++) { 347*1341Sstevel kthread_id_t tp; 348*1341Sstevel 349*1341Sstevel /* walk the entire threadlist */ 350*1341Sstevel mutex_enter(&pidlock); 351*1341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) { 352*1341Sstevel proc_t *p = ttoproc(tp); 353*1341Sstevel 354*1341Sstevel /* handle kernel threads separately */ 355*1341Sstevel if (p->p_as == &kas || p->p_stat == SZOMB) 356*1341Sstevel continue; 357*1341Sstevel 358*1341Sstevel mutex_enter(&p->p_lock); 359*1341Sstevel thread_lock(tp); 360*1341Sstevel 361*1341Sstevel if (tp->t_state == TS_STOPPED) { 362*1341Sstevel /* add another reason to stop this thread */ 363*1341Sstevel tp->t_schedflag &= ~TS_RESUME; 364*1341Sstevel } else { 365*1341Sstevel tp->t_proc_flag |= TP_CHKPT; 366*1341Sstevel 367*1341Sstevel thread_unlock(tp); 368*1341Sstevel mutex_exit(&p->p_lock); 369*1341Sstevel add_one_utstop(); 370*1341Sstevel mutex_enter(&p->p_lock); 371*1341Sstevel thread_lock(tp); 372*1341Sstevel 373*1341Sstevel aston(tp); 374*1341Sstevel 375*1341Sstevel if (tp->t_state == TS_SLEEP && 376*1341Sstevel (tp->t_flag & T_WAKEABLE)) { 377*1341Sstevel setrun_locked(tp); 378*1341Sstevel } 379*1341Sstevel 380*1341Sstevel } 381*1341Sstevel 382*1341Sstevel /* grab thread if needed */ 383*1341Sstevel if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU) 384*1341Sstevel poke_cpu(tp->t_cpu->cpu_id); 385*1341Sstevel 386*1341Sstevel 387*1341Sstevel thread_unlock(tp); 388*1341Sstevel mutex_exit(&p->p_lock); 389*1341Sstevel } 390*1341Sstevel mutex_exit(&pidlock); 391*1341Sstevel 392*1341Sstevel 393*1341Sstevel /* let everything catch up */ 394*1341Sstevel utstop_timedwait(count * count * SYSCTRL_UTSTOP_WAIT); 395*1341Sstevel 396*1341Sstevel 397*1341Sstevel /* now, walk the threadlist again to see if we are done */ 398*1341Sstevel mutex_enter(&pidlock); 399*1341Sstevel for (tp = curthread->t_next, bailout = 0; 400*1341Sstevel bailout == 0 && tp != curthread; tp = tp->t_next) { 401*1341Sstevel proc_t *p = ttoproc(tp); 402*1341Sstevel 403*1341Sstevel /* handle kernel threads separately */ 404*1341Sstevel if (p->p_as == &kas || p->p_stat == SZOMB) 405*1341Sstevel continue; 406*1341Sstevel 407*1341Sstevel /* 408*1341Sstevel * If this thread didn't stop, and we don't allow 409*1341Sstevel * unstopped blocked threads, bail. 410*1341Sstevel */ 411*1341Sstevel /* did this thread stop? */ 412*1341Sstevel thread_lock(tp); 413*1341Sstevel if (!CPR_ISTOPPED(tp) && 414*1341Sstevel !(sysctrl_allow_blocked_threads && 415*1341Sstevel SYSCTRL_VSTOPPED(tp))) { 416*1341Sstevel 417*1341Sstevel /* nope, cache the details for later */ 418*1341Sstevel bcopy(p->p_user.u_psargs, cache_psargs, 419*1341Sstevel sizeof (cache_psargs)); 420*1341Sstevel cache_tp = tp; 421*1341Sstevel cache_t_state = tp->t_state; 422*1341Sstevel bailout = 1; 423*1341Sstevel pid = p->p_pidp->pid_id; 424*1341Sstevel } 425*1341Sstevel thread_unlock(tp); 426*1341Sstevel } 427*1341Sstevel mutex_exit(&pidlock); 428*1341Sstevel 429*1341Sstevel /* were all the threads stopped? */ 430*1341Sstevel if (!bailout) 431*1341Sstevel break; 432*1341Sstevel } 433*1341Sstevel 434*1341Sstevel /* were we unable to stop all threads after a few tries? */ 435*1341Sstevel if (bailout) { 436*1341Sstevel (void) sprintf(pkt->errbuf, "process: %s id: %d state: %x" 437*1341Sstevel " thread descriptor: %p", 438*1341Sstevel cache_psargs, (int)pid, cache_t_state, 439*1341Sstevel (void *)cache_tp); 440*1341Sstevel 441*1341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_UTHREAD); 442*1341Sstevel 443*1341Sstevel return (ESRCH); 444*1341Sstevel } 445*1341Sstevel 446*1341Sstevel return (DDI_SUCCESS); 447*1341Sstevel } 448*1341Sstevel 449*1341Sstevel static int 450*1341Sstevel sysctrl_stop_kernel_threads(sysc_cfga_pkt_t *pkt) 451*1341Sstevel { 452*1341Sstevel caddr_t name; 453*1341Sstevel kthread_id_t tp; 454*1341Sstevel 455*1341Sstevel if (sysctrl_skip_kernel_threads) { 456*1341Sstevel return (DDI_SUCCESS); 457*1341Sstevel } 458*1341Sstevel 459*1341Sstevel /* 460*1341Sstevel * Note: we unlock the table in resume. 461*1341Sstevel * We only need to lock the callback table if we are actually 462*1341Sstevel * suspending kernel threads. 463*1341Sstevel */ 464*1341Sstevel callb_lock_table(); 465*1341Sstevel if ((name = callb_execute_class(CB_CL_CPR_DAEMON, 466*1341Sstevel CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) { 467*1341Sstevel 468*1341Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN); 469*1341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD); 470*1341Sstevel return (EBUSY); 471*1341Sstevel } 472*1341Sstevel 473*1341Sstevel /* 474*1341Sstevel * Verify that all threads are accounted for 475*1341Sstevel */ 476*1341Sstevel mutex_enter(&pidlock); 477*1341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) { 478*1341Sstevel proc_t *p = ttoproc(tp); 479*1341Sstevel 480*1341Sstevel if (p->p_as != &kas) 481*1341Sstevel continue; 482*1341Sstevel 483*1341Sstevel if (tp->t_flag & T_INTR_THREAD) 484*1341Sstevel continue; 485*1341Sstevel 486*1341Sstevel if (!callb_is_stopped(tp, &name)) { 487*1341Sstevel mutex_exit(&pidlock); 488*1341Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN); 489*1341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD); 490*1341Sstevel return (EBUSY); 491*1341Sstevel } 492*1341Sstevel } 493*1341Sstevel 494*1341Sstevel mutex_exit(&pidlock); 495*1341Sstevel return (DDI_SUCCESS); 496*1341Sstevel } 497*1341Sstevel 498*1341Sstevel static void 499*1341Sstevel sysctrl_start_user_threads(void) 500*1341Sstevel { 501*1341Sstevel kthread_id_t tp; 502*1341Sstevel 503*1341Sstevel mutex_enter(&pidlock); 504*1341Sstevel 505*1341Sstevel /* walk all threads and release them */ 506*1341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) { 507*1341Sstevel proc_t *p = ttoproc(tp); 508*1341Sstevel 509*1341Sstevel /* skip kernel threads */ 510*1341Sstevel if (ttoproc(tp)->p_as == &kas) 511*1341Sstevel continue; 512*1341Sstevel 513*1341Sstevel mutex_enter(&p->p_lock); 514*1341Sstevel tp->t_proc_flag &= ~TP_CHKPT; 515*1341Sstevel mutex_exit(&p->p_lock); 516*1341Sstevel 517*1341Sstevel thread_lock(tp); 518*1341Sstevel if (CPR_ISTOPPED(tp)) { 519*1341Sstevel /* back on the runq */ 520*1341Sstevel tp->t_schedflag |= TS_RESUME; 521*1341Sstevel setrun_locked(tp); 522*1341Sstevel } 523*1341Sstevel thread_unlock(tp); 524*1341Sstevel } 525*1341Sstevel 526*1341Sstevel mutex_exit(&pidlock); 527*1341Sstevel } 528*1341Sstevel 529*1341Sstevel static void 530*1341Sstevel sysctrl_signal_user(int sig) 531*1341Sstevel { 532*1341Sstevel struct proc *p; 533*1341Sstevel 534*1341Sstevel mutex_enter(&pidlock); 535*1341Sstevel 536*1341Sstevel for (p = practive; p != NULL; p = p->p_next) { 537*1341Sstevel /* only user threads */ 538*1341Sstevel if (p->p_exec == NULL || p->p_stat == SZOMB || 539*1341Sstevel p == proc_init || p == ttoproc(curthread)) 540*1341Sstevel continue; 541*1341Sstevel 542*1341Sstevel mutex_enter(&p->p_lock); 543*1341Sstevel sigtoproc(p, NULL, sig); 544*1341Sstevel mutex_exit(&p->p_lock); 545*1341Sstevel } 546*1341Sstevel 547*1341Sstevel mutex_exit(&pidlock); 548*1341Sstevel 549*1341Sstevel /* add a bit of delay */ 550*1341Sstevel delay(hz); 551*1341Sstevel } 552*1341Sstevel 553*1341Sstevel void 554*1341Sstevel sysctrl_resume(sysc_cfga_pkt_t *pkt) 555*1341Sstevel { 556*1341Sstevel #ifndef Bug_4154263 557*1341Sstevel DEBUGP(errp("resume system...\n")); 558*1341Sstevel #endif 559*1341Sstevel switch (suspend_state) { 560*1341Sstevel case SYSC_STATE_FULL: 561*1341Sstevel /* 562*1341Sstevel * release all the other cpus 563*1341Sstevel */ 564*1341Sstevel #ifndef Bug_4154263 565*1341Sstevel DEBUGP(errp("release cpus...")); 566*1341Sstevel #endif 567*1341Sstevel sysctrl_release_cpus(); 568*1341Sstevel DEBUGP(errp("cpus resumed...\n")); 569*1341Sstevel 570*1341Sstevel /* 571*1341Sstevel * If we suspended hw watchdog at suspend, 572*1341Sstevel * re-enable it now. 573*1341Sstevel */ 574*1341Sstevel if (sysc_watchdog_suspended) { 575*1341Sstevel mutex_enter(&tod_lock); 576*1341Sstevel tod_ops.tod_set_watchdog_timer( 577*1341Sstevel watchdog_timeout_seconds); 578*1341Sstevel mutex_exit(&tod_lock); 579*1341Sstevel } 580*1341Sstevel 581*1341Sstevel /* 582*1341Sstevel * resume callout 583*1341Sstevel */ 584*1341Sstevel (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME); 585*1341Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT, 586*1341Sstevel CB_CODE_CPR_RESUME); 587*1341Sstevel sysctrl_enable_intr(); 588*1341Sstevel /* FALLTHROUGH */ 589*1341Sstevel 590*1341Sstevel case SYSC_STATE_DRIVER: 591*1341Sstevel /* 592*1341Sstevel * resume drivers 593*1341Sstevel */ 594*1341Sstevel DEBUGP(errp("resume drivers...")); 595*1341Sstevel sysctrl_resume_devices(ddi_root_node(), pkt); 596*1341Sstevel DEBUGP(errp("done\n")); 597*1341Sstevel 598*1341Sstevel /* 599*1341Sstevel * resume the lock manager 600*1341Sstevel */ 601*1341Sstevel lm_cprresume(); 602*1341Sstevel 603*1341Sstevel /* FALLTHROUGH */ 604*1341Sstevel 605*1341Sstevel case SYSC_STATE_DAEMON: 606*1341Sstevel /* 607*1341Sstevel * resume kernel daemons 608*1341Sstevel */ 609*1341Sstevel if (!sysctrl_skip_kernel_threads) { 610*1341Sstevel DEBUGP(errp("starting kernel daemons...")); 611*1341Sstevel (void) callb_execute_class(CB_CL_CPR_DAEMON, 612*1341Sstevel CB_CODE_CPR_RESUME); 613*1341Sstevel callb_unlock_table(); 614*1341Sstevel } 615*1341Sstevel DEBUGP(errp("done\n")); 616*1341Sstevel 617*1341Sstevel /* FALLTHROUGH */ 618*1341Sstevel 619*1341Sstevel case SYSC_STATE_USER: 620*1341Sstevel /* 621*1341Sstevel * finally, resume user threads 622*1341Sstevel */ 623*1341Sstevel if (!sysctrl_skip_user_threads) { 624*1341Sstevel DEBUGP(errp("starting user threads...")); 625*1341Sstevel sysctrl_start_user_threads(); 626*1341Sstevel DEBUGP(errp("done\n")); 627*1341Sstevel } 628*1341Sstevel /* FALLTHROUGH */ 629*1341Sstevel 630*1341Sstevel case SYSC_STATE_BEGIN: 631*1341Sstevel default: 632*1341Sstevel /* 633*1341Sstevel * let those who care know that we've just resumed 634*1341Sstevel */ 635*1341Sstevel DEBUGP(errp("sending SIGTHAW...")); 636*1341Sstevel sysctrl_signal_user(SIGTHAW); 637*1341Sstevel DEBUGP(errp("done\n")); 638*1341Sstevel break; 639*1341Sstevel } 640*1341Sstevel 641*1341Sstevel /* Restore sysctrl detach/suspend to its original value */ 642*1341Sstevel sysctrl_enable_detach_suspend = sysc_lastval; 643*1341Sstevel 644*1341Sstevel DEBUGP(errp("system state restored\n")); 645*1341Sstevel } 646*1341Sstevel 647*1341Sstevel void 648*1341Sstevel sysctrl_suspend_prepare(void) 649*1341Sstevel { 650*1341Sstevel /* 651*1341Sstevel * We use a function, lm_cprsuspend(), in the suspend flow that 652*1341Sstevel * is redirected to a module through the modstubs mechanism. 653*1341Sstevel * If the module is currently not loaded, modstubs attempts 654*1341Sstevel * the modload. The context this happens in below causes the 655*1341Sstevel * module load to block forever, so this function must be called 656*1341Sstevel * in the normal system call context ahead of time. 657*1341Sstevel */ 658*1341Sstevel (void) modload("misc", "klmmod"); 659*1341Sstevel } 660*1341Sstevel 661*1341Sstevel int 662*1341Sstevel sysctrl_suspend(sysc_cfga_pkt_t *pkt) 663*1341Sstevel { 664*1341Sstevel int rc = DDI_SUCCESS; 665*1341Sstevel 666*1341Sstevel /* enable sysctrl detach/suspend function */ 667*1341Sstevel sysc_lastval = sysctrl_enable_detach_suspend; 668*1341Sstevel sysctrl_enable_detach_suspend = 1; 669*1341Sstevel 670*1341Sstevel /* 671*1341Sstevel * first, stop all user threads 672*1341Sstevel */ 673*1341Sstevel DEBUGP(errp("\nstopping user threads...")); 674*1341Sstevel suspend_state = SYSC_STATE_USER; 675*1341Sstevel if (((rc = sysctrl_stop_user_threads(pkt)) != DDI_SUCCESS) && 676*1341Sstevel sysctrl_check_user_stop_result) { 677*1341Sstevel sysctrl_resume(pkt); 678*1341Sstevel return (rc); 679*1341Sstevel } 680*1341Sstevel DEBUGP(errp("done\n")); 681*1341Sstevel 682*1341Sstevel /* 683*1341Sstevel * now stop daemon activities 684*1341Sstevel */ 685*1341Sstevel DEBUGP(errp("stopping kernel daemons...")); 686*1341Sstevel suspend_state = SYSC_STATE_DAEMON; 687*1341Sstevel if (rc = sysctrl_stop_kernel_threads(pkt)) { 688*1341Sstevel sysctrl_resume(pkt); 689*1341Sstevel return (rc); 690*1341Sstevel } 691*1341Sstevel DEBUGP(errp("done\n")); 692*1341Sstevel 693*1341Sstevel /* 694*1341Sstevel * This sync swap out all user pages 695*1341Sstevel */ 696*1341Sstevel vfs_sync(SYNC_ALL); 697*1341Sstevel 698*1341Sstevel /* 699*1341Sstevel * special treatment for lock manager 700*1341Sstevel */ 701*1341Sstevel lm_cprsuspend(); 702*1341Sstevel 703*1341Sstevel /* 704*1341Sstevel * sync the file system in case we never make it back 705*1341Sstevel */ 706*1341Sstevel sync(); 707*1341Sstevel 708*1341Sstevel /* 709*1341Sstevel * now suspend drivers 710*1341Sstevel */ 711*1341Sstevel DEBUGP(errp("suspending drivers...")); 712*1341Sstevel suspend_state = SYSC_STATE_DRIVER; 713*1341Sstevel if (rc = sysctrl_suspend_devices(ddi_root_node(), pkt)) { 714*1341Sstevel sysctrl_resume(pkt); 715*1341Sstevel return (rc); 716*1341Sstevel } 717*1341Sstevel DEBUGP(errp("done\n")); 718*1341Sstevel 719*1341Sstevel /* 720*1341Sstevel * handle the callout table 721*1341Sstevel */ 722*1341Sstevel sysctrl_stop_intr(); 723*1341Sstevel 724*1341Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT); 725*1341Sstevel 726*1341Sstevel /* 727*1341Sstevel * if watchdog was activated, disable it 728*1341Sstevel */ 729*1341Sstevel if (watchdog_activated) { 730*1341Sstevel mutex_enter(&tod_lock); 731*1341Sstevel tod_ops.tod_clear_watchdog_timer(); 732*1341Sstevel mutex_exit(&tod_lock); 733*1341Sstevel sysc_watchdog_suspended = 1; 734*1341Sstevel } else { 735*1341Sstevel sysc_watchdog_suspended = 0; 736*1341Sstevel } 737*1341Sstevel 738*1341Sstevel /* 739*1341Sstevel * finally, grab all cpus 740*1341Sstevel */ 741*1341Sstevel DEBUGP(errp("freezing all cpus...\n")); 742*1341Sstevel suspend_state = SYSC_STATE_FULL; 743*1341Sstevel sysctrl_grab_cpus(); 744*1341Sstevel #ifndef Bug_4154263 745*1341Sstevel DEBUGP(errp("done\n")); 746*1341Sstevel 747*1341Sstevel DEBUGP(errp("system is quiesced\n")); 748*1341Sstevel #endif 749*1341Sstevel 750*1341Sstevel return (rc); 751*1341Sstevel } 752