xref: /onnv-gate/usr/src/uts/common/cpr/cpr_uthread.c (revision 7240:c4957ab6a78e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53446Smrj  * Common Development and Distribution License (the "License").
63446Smrj  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*7240Srh87107  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/types.h>
290Sstevel@tonic-gate #include <sys/thread.h>
300Sstevel@tonic-gate #include <sys/conf.h>
310Sstevel@tonic-gate #include <sys/cpuvar.h>
320Sstevel@tonic-gate #include <sys/cpr.h>
330Sstevel@tonic-gate #include <sys/user.h>
340Sstevel@tonic-gate #include <sys/cmn_err.h>
350Sstevel@tonic-gate #include <sys/callb.h>
360Sstevel@tonic-gate 
370Sstevel@tonic-gate extern void utstop_init(void);
380Sstevel@tonic-gate extern void add_one_utstop(void);
390Sstevel@tonic-gate extern void utstop_timedwait(long ticks);
400Sstevel@tonic-gate 
410Sstevel@tonic-gate static void cpr_stop_user(int);
420Sstevel@tonic-gate static int cpr_check_user_threads(void);
430Sstevel@tonic-gate 
440Sstevel@tonic-gate /*
450Sstevel@tonic-gate  * CPR user thread related support routines
460Sstevel@tonic-gate  */
470Sstevel@tonic-gate void
cpr_signal_user(int sig)480Sstevel@tonic-gate cpr_signal_user(int sig)
490Sstevel@tonic-gate {
500Sstevel@tonic-gate /*
510Sstevel@tonic-gate  * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet
520Sstevel@tonic-gate  * since openwin is catching every signal and default action is to exit.
530Sstevel@tonic-gate  * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads.
540Sstevel@tonic-gate  */
550Sstevel@tonic-gate 	struct proc *p;
560Sstevel@tonic-gate 
570Sstevel@tonic-gate 	mutex_enter(&pidlock);
580Sstevel@tonic-gate 
590Sstevel@tonic-gate 	for (p = practive; p; p = p->p_next) {
600Sstevel@tonic-gate 		/* only user threads */
610Sstevel@tonic-gate 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
625295Srandyf 		    p == proc_init || p == ttoproc(curthread))
630Sstevel@tonic-gate 			continue;
640Sstevel@tonic-gate 
650Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
660Sstevel@tonic-gate 		sigtoproc(p, NULL, sig);
670Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
680Sstevel@tonic-gate 	}
690Sstevel@tonic-gate 	mutex_exit(&pidlock);
700Sstevel@tonic-gate 
710Sstevel@tonic-gate 	DELAY(MICROSEC);
720Sstevel@tonic-gate }
730Sstevel@tonic-gate 
740Sstevel@tonic-gate /* max wait time for user thread stop */
750Sstevel@tonic-gate #define	CPR_UTSTOP_WAIT		hz
760Sstevel@tonic-gate #define	CPR_UTSTOP_RETRY	4
770Sstevel@tonic-gate static int count;
780Sstevel@tonic-gate 
790Sstevel@tonic-gate int
cpr_stop_user_threads()800Sstevel@tonic-gate cpr_stop_user_threads()
810Sstevel@tonic-gate {
820Sstevel@tonic-gate 	utstop_init();
830Sstevel@tonic-gate 
840Sstevel@tonic-gate 	count = 0;
850Sstevel@tonic-gate 	do {
860Sstevel@tonic-gate 		if (++count > CPR_UTSTOP_RETRY)
870Sstevel@tonic-gate 			return (ESRCH);
880Sstevel@tonic-gate 		cpr_stop_user(count * count * CPR_UTSTOP_WAIT);
890Sstevel@tonic-gate 	} while (cpr_check_user_threads() &&
905295Srandyf 	    (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE));
910Sstevel@tonic-gate 
920Sstevel@tonic-gate 	return (0);
930Sstevel@tonic-gate }
940Sstevel@tonic-gate 
950Sstevel@tonic-gate /*
960Sstevel@tonic-gate  * This routine tries to stop all user threads before we get rid of all
970Sstevel@tonic-gate  * its pages.It goes through allthreads list and set the TP_CHKPT flag
980Sstevel@tonic-gate  * for all user threads and make them runnable. If all of the threads
990Sstevel@tonic-gate  * can be stopped within the max wait time, CPR will proceed. Otherwise
1000Sstevel@tonic-gate  * CPR is aborted after a few of similiar retries.
1010Sstevel@tonic-gate  */
1020Sstevel@tonic-gate static void
cpr_stop_user(int wait)1030Sstevel@tonic-gate cpr_stop_user(int wait)
1040Sstevel@tonic-gate {
1050Sstevel@tonic-gate 	kthread_id_t tp;
1060Sstevel@tonic-gate 	proc_t *p;
1070Sstevel@tonic-gate 
1080Sstevel@tonic-gate 	/* The whole loop below needs to be atomic */
1090Sstevel@tonic-gate 	mutex_enter(&pidlock);
1100Sstevel@tonic-gate 
1110Sstevel@tonic-gate 	/* faster this way */
1120Sstevel@tonic-gate 	tp = curthread->t_next;
1130Sstevel@tonic-gate 	do {
1140Sstevel@tonic-gate 		/* kernel threads will be handled later */
1150Sstevel@tonic-gate 		p = ttoproc(tp);
1160Sstevel@tonic-gate 		if (p->p_as == &kas || p->p_stat == SZOMB)
1170Sstevel@tonic-gate 			continue;
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 		/*
1200Sstevel@tonic-gate 		 * If the thread is stopped (by CPR) already, do nothing;
1210Sstevel@tonic-gate 		 * if running, mark TP_CHKPT;
1220Sstevel@tonic-gate 		 * if sleeping normally, mark TP_CHKPT and setrun;
1230Sstevel@tonic-gate 		 * if sleeping non-interruptable, mark TP_CHKPT only for now;
1240Sstevel@tonic-gate 		 * if sleeping with t_wchan0 != 0 etc, virtually stopped,
1250Sstevel@tonic-gate 		 * do nothing.
1260Sstevel@tonic-gate 		 */
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate 		/* p_lock is needed for modifying t_proc_flag */
1290Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
1300Sstevel@tonic-gate 		thread_lock(tp); /* needed to check CPR_ISTOPPED */
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate 		if (tp->t_state == TS_STOPPED) {
1330Sstevel@tonic-gate 			/*
1340Sstevel@tonic-gate 			 * if already stopped by other reasons, add this new
1350Sstevel@tonic-gate 			 * reason to it.
1360Sstevel@tonic-gate 			 */
1370Sstevel@tonic-gate 			if (tp->t_schedflag & TS_RESUME)
1380Sstevel@tonic-gate 				tp->t_schedflag &= ~TS_RESUME;
1390Sstevel@tonic-gate 		} else {
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate 			tp->t_proc_flag |= TP_CHKPT;
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 			thread_unlock(tp);
1440Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
1450Sstevel@tonic-gate 			add_one_utstop();
1460Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
1470Sstevel@tonic-gate 			thread_lock(tp);
1480Sstevel@tonic-gate 
1490Sstevel@tonic-gate 			aston(tp);
1500Sstevel@tonic-gate 
1513792Sakolb 			if (ISWAKEABLE(tp) || ISWAITING(tp)) {
1520Sstevel@tonic-gate 				setrun_locked(tp);
1530Sstevel@tonic-gate 			}
1540Sstevel@tonic-gate 		}
1550Sstevel@tonic-gate 		/*
1560Sstevel@tonic-gate 		 * force the thread into the kernel if it is not already there.
1570Sstevel@tonic-gate 		 */
1580Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
1590Sstevel@tonic-gate 			poke_cpu(tp->t_cpu->cpu_id);
1600Sstevel@tonic-gate 		thread_unlock(tp);
1610Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	} while ((tp = tp->t_next) != curthread);
1640Sstevel@tonic-gate 	mutex_exit(&pidlock);
1650Sstevel@tonic-gate 
1660Sstevel@tonic-gate 	utstop_timedwait(wait);
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate /*
1700Sstevel@tonic-gate  * Checks and makes sure all user threads are stopped
1710Sstevel@tonic-gate  */
1720Sstevel@tonic-gate static int
cpr_check_user_threads()1730Sstevel@tonic-gate cpr_check_user_threads()
1740Sstevel@tonic-gate {
1750Sstevel@tonic-gate 	kthread_id_t tp;
1760Sstevel@tonic-gate 	int rc = 0;
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate 	mutex_enter(&pidlock);
1790Sstevel@tonic-gate 	tp = curthread->t_next;
1800Sstevel@tonic-gate 	do {
1810Sstevel@tonic-gate 		if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB)
1820Sstevel@tonic-gate 			continue;
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate 		thread_lock(tp);
1850Sstevel@tonic-gate 		/*
1860Sstevel@tonic-gate 		 * make sure that we are off all the queues and in a stopped
1870Sstevel@tonic-gate 		 * state.
1880Sstevel@tonic-gate 		 */
1890Sstevel@tonic-gate 		if (!CPR_ISTOPPED(tp)) {
1900Sstevel@tonic-gate 			thread_unlock(tp);
1910Sstevel@tonic-gate 			mutex_exit(&pidlock);
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 			if (count == CPR_UTSTOP_RETRY) {
1943446Smrj 			CPR_DEBUG(CPR_DEBUG1, "Suspend failed: "
1953446Smrj 			    "cannot stop uthread\n");
1960Sstevel@tonic-gate 			cpr_err(CE_WARN, "Suspend cannot stop "
1975295Srandyf 			    "process %s (%p:%x).",
1985295Srandyf 			    ttoproc(tp)->p_user.u_psargs, (void *)tp,
1995295Srandyf 			    tp->t_state);
2000Sstevel@tonic-gate 			cpr_err(CE_WARN, "Process may be waiting for"
2015295Srandyf 			    " network request, please try again.");
2020Sstevel@tonic-gate 			}
2030Sstevel@tonic-gate 
2043446Smrj 			CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x "
205*7240Srh87107 			    "sched=%x\n", (void *)tp, tp->t_state,
206*7240Srh87107 			    tp->t_proc_flag, tp->t_schedflag);
2073446Smrj 			CPR_DEBUG(CPR_DEBUG2, "proc %p state=%x pid=%d\n",
208*7240Srh87107 			    (void *)ttoproc(tp), ttoproc(tp)->p_stat,
2093446Smrj 			    ttoproc(tp)->p_pidp->pid_id);
2100Sstevel@tonic-gate 			return (1);
2110Sstevel@tonic-gate 		}
2120Sstevel@tonic-gate 		thread_unlock(tp);
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	} while ((tp = tp->t_next) != curthread && rc == 0);
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	mutex_exit(&pidlock);
2170Sstevel@tonic-gate 	return (0);
2180Sstevel@tonic-gate }
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate /*
2220Sstevel@tonic-gate  * start all threads that were stopped for checkpoint.
2230Sstevel@tonic-gate  */
2240Sstevel@tonic-gate void
cpr_start_user_threads()2250Sstevel@tonic-gate cpr_start_user_threads()
2260Sstevel@tonic-gate {
2270Sstevel@tonic-gate 	kthread_id_t tp;
2280Sstevel@tonic-gate 	proc_t *p;
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 	mutex_enter(&pidlock);
2310Sstevel@tonic-gate 	tp = curthread->t_next;
2320Sstevel@tonic-gate 	do {
2330Sstevel@tonic-gate 		p = ttoproc(tp);
2340Sstevel@tonic-gate 		/*
2350Sstevel@tonic-gate 		 * kernel threads are callback'ed rather than setrun.
2360Sstevel@tonic-gate 		 */
2370Sstevel@tonic-gate 		if (ttoproc(tp)->p_as == &kas) continue;
2380Sstevel@tonic-gate 		/*
2390Sstevel@tonic-gate 		 * t_proc_flag should have been cleared. Just to make sure here
2400Sstevel@tonic-gate 		 */
2410Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
2420Sstevel@tonic-gate 		tp->t_proc_flag &= ~TP_CHKPT;
2430Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 		thread_lock(tp);
2460Sstevel@tonic-gate 		if (CPR_ISTOPPED(tp)) {
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 			/*
2490Sstevel@tonic-gate 			 * put it back on the runq
2500Sstevel@tonic-gate 			 */
2510Sstevel@tonic-gate 			tp->t_schedflag |= TS_RESUME;
2520Sstevel@tonic-gate 			setrun_locked(tp);
2530Sstevel@tonic-gate 		}
2540Sstevel@tonic-gate 		thread_unlock(tp);
2550Sstevel@tonic-gate 		/*
2560Sstevel@tonic-gate 		 * DEBUG - Keep track of current and next thread pointer.
2570Sstevel@tonic-gate 		 */
2580Sstevel@tonic-gate 	} while ((tp = tp->t_next) != curthread);
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate 	mutex_exit(&pidlock);
2610Sstevel@tonic-gate }
2620Sstevel@tonic-gate 
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate /*
2650Sstevel@tonic-gate  * re/start kernel threads
2660Sstevel@tonic-gate  */
2670Sstevel@tonic-gate void
cpr_start_kernel_threads(void)2680Sstevel@tonic-gate cpr_start_kernel_threads(void)
2690Sstevel@tonic-gate {
2703446Smrj 	CPR_DEBUG(CPR_DEBUG1, "starting kernel daemons...");
2710Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME);
2723446Smrj 	CPR_DEBUG(CPR_DEBUG1, "done\n");
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	/* see table lock below */
2750Sstevel@tonic-gate 	callb_unlock_table();
2760Sstevel@tonic-gate }
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate /*
2800Sstevel@tonic-gate  * Stop kernel threads by using the callback mechanism.  If any thread
2810Sstevel@tonic-gate  * cannot be stopped, return failure.
2820Sstevel@tonic-gate  */
2830Sstevel@tonic-gate int
cpr_stop_kernel_threads(void)2840Sstevel@tonic-gate cpr_stop_kernel_threads(void)
2850Sstevel@tonic-gate {
2860Sstevel@tonic-gate 	caddr_t	name;
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate 	callb_lock_table();	/* Note: we unlock the table in resume. */
2890Sstevel@tonic-gate 
2903446Smrj 	CPR_DEBUG(CPR_DEBUG1, "stopping kernel daemons...");
2910Sstevel@tonic-gate 	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
2920Sstevel@tonic-gate 	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
2930Sstevel@tonic-gate 		cpr_err(CE_WARN,
2940Sstevel@tonic-gate 		    "Could not stop \"%s\" kernel thread.  "
2950Sstevel@tonic-gate 		    "Please try again later.", name);
2960Sstevel@tonic-gate 		return (EBUSY);
2970Sstevel@tonic-gate 	}
2980Sstevel@tonic-gate 
2995295Srandyf 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
3005295Srandyf 	return (0);
3015295Srandyf }
3025295Srandyf 
3035295Srandyf /*
3045295Srandyf  * Check to see that kernel threads are stopped.
3055295Srandyf  * This should be called while CPU's are paused, and the caller is
3065295Srandyf  * effectively running single user, or else we are virtually guaranteed
3075295Srandyf  * to fail.  The routine should not ASSERT on the paused state or spl
3085295Srandyf  * level, as there may be a use for this to verify that things are running
3095295Srandyf  * again.
3105295Srandyf  */
3115295Srandyf int
cpr_threads_are_stopped(void)3125295Srandyf cpr_threads_are_stopped(void)
3135295Srandyf {
3145295Srandyf 	caddr_t	name;
3155295Srandyf 	kthread_id_t tp;
3165295Srandyf 	proc_t *p;
3175295Srandyf 
3180Sstevel@tonic-gate 	/*
3190Sstevel@tonic-gate 	 * We think we stopped all the kernel threads.  Just in case
3200Sstevel@tonic-gate 	 * someone is not playing by the rules, take a spin through
3210Sstevel@tonic-gate 	 * the threadlist and see if we can account for everybody.
3220Sstevel@tonic-gate 	 */
3230Sstevel@tonic-gate 	mutex_enter(&pidlock);
3240Sstevel@tonic-gate 	tp = curthread->t_next;
3250Sstevel@tonic-gate 	do {
3260Sstevel@tonic-gate 		p = ttoproc(tp);
3270Sstevel@tonic-gate 		if (p->p_as != &kas)
3280Sstevel@tonic-gate 			continue;
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate 		if (tp->t_flag & T_INTR_THREAD)
3310Sstevel@tonic-gate 			continue;
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 		if (! callb_is_stopped(tp, &name)) {
3340Sstevel@tonic-gate 			mutex_exit(&pidlock);
3350Sstevel@tonic-gate 			cpr_err(CE_WARN,
3360Sstevel@tonic-gate 			    "\"%s\" kernel thread not stopped.", name);
3370Sstevel@tonic-gate 			return (EBUSY);
3380Sstevel@tonic-gate 		}
3390Sstevel@tonic-gate 	} while ((tp = tp->t_next) != curthread);
3405295Srandyf 
3410Sstevel@tonic-gate 	mutex_exit(&pidlock);
3420Sstevel@tonic-gate 	return (0);
3430Sstevel@tonic-gate }
344