xref: /onnv-gate/usr/src/uts/sun4u/os/cpr_impl.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * Platform specific implementation code
31*0Sstevel@tonic-gate  */
32*0Sstevel@tonic-gate 
33*0Sstevel@tonic-gate #define	SUNDDI_IMPL
34*0Sstevel@tonic-gate 
35*0Sstevel@tonic-gate #include <sys/types.h>
36*0Sstevel@tonic-gate #include <sys/promif.h>
37*0Sstevel@tonic-gate #include <sys/prom_isa.h>
38*0Sstevel@tonic-gate #include <sys/prom_plat.h>
39*0Sstevel@tonic-gate #include <sys/mmu.h>
40*0Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
41*0Sstevel@tonic-gate #include <sys/iommu.h>
42*0Sstevel@tonic-gate #include <sys/scb.h>
43*0Sstevel@tonic-gate #include <sys/cpuvar.h>
44*0Sstevel@tonic-gate #include <sys/intreg.h>
45*0Sstevel@tonic-gate #include <sys/pte.h>
46*0Sstevel@tonic-gate #include <vm/hat.h>
47*0Sstevel@tonic-gate #include <vm/page.h>
48*0Sstevel@tonic-gate #include <vm/as.h>
49*0Sstevel@tonic-gate #include <sys/cpr.h>
50*0Sstevel@tonic-gate #include <sys/kmem.h>
51*0Sstevel@tonic-gate #include <sys/clock.h>
52*0Sstevel@tonic-gate #include <sys/kmem.h>
53*0Sstevel@tonic-gate #include <sys/panic.h>
54*0Sstevel@tonic-gate #include <vm/seg_kmem.h>
55*0Sstevel@tonic-gate #include <sys/cpu_module.h>
56*0Sstevel@tonic-gate #include <sys/callb.h>
57*0Sstevel@tonic-gate #include <sys/machsystm.h>
58*0Sstevel@tonic-gate #include <sys/vmsystm.h>
59*0Sstevel@tonic-gate #include <sys/systm.h>
60*0Sstevel@tonic-gate #include <sys/archsystm.h>
61*0Sstevel@tonic-gate #include <sys/stack.h>
62*0Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
63*0Sstevel@tonic-gate #include <sys/memlist.h>
64*0Sstevel@tonic-gate #include <sys/bootconf.h>
65*0Sstevel@tonic-gate #include <sys/thread.h>
66*0Sstevel@tonic-gate 
67*0Sstevel@tonic-gate extern	void cpr_clear_bitmaps(void);
68*0Sstevel@tonic-gate extern	void dtlb_wr_entry(uint_t, tte_t *, uint64_t *);
69*0Sstevel@tonic-gate extern	void itlb_wr_entry(uint_t, tte_t *, uint64_t *);
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate static	int i_cpr_storage_desc_alloc(csd_t **, pgcnt_t *, csd_t **, int);
72*0Sstevel@tonic-gate static	void i_cpr_storage_desc_init(csd_t *, pgcnt_t, csd_t *);
73*0Sstevel@tonic-gate static	caddr_t i_cpr_storage_data_alloc(pgcnt_t, pgcnt_t *, int);
74*0Sstevel@tonic-gate static	int cpr_dump_sensitive(vnode_t *, csd_t *);
75*0Sstevel@tonic-gate static	void i_cpr_clear_entries(uint64_t, uint64_t);
76*0Sstevel@tonic-gate static	void i_cpr_xcall(xcfunc_t);
77*0Sstevel@tonic-gate 
78*0Sstevel@tonic-gate void	i_cpr_storage_free(void);
79*0Sstevel@tonic-gate 
80*0Sstevel@tonic-gate extern void *i_cpr_data_page;
81*0Sstevel@tonic-gate extern int cpr_test_mode;
82*0Sstevel@tonic-gate extern int cpr_nbitmaps;
83*0Sstevel@tonic-gate extern char cpr_default_path[];
84*0Sstevel@tonic-gate extern caddr_t textva, datava;
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate static struct cpr_map_info cpr_prom_retain[CPR_PROM_RETAIN_CNT];
87*0Sstevel@tonic-gate caddr_t cpr_vaddr = NULL;
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate static	uint_t sensitive_pages_saved;
90*0Sstevel@tonic-gate static	uint_t sensitive_size_saved;
91*0Sstevel@tonic-gate 
92*0Sstevel@tonic-gate caddr_t	i_cpr_storage_data_base;
93*0Sstevel@tonic-gate caddr_t	i_cpr_storage_data_end;
94*0Sstevel@tonic-gate csd_t *i_cpr_storage_desc_base;
95*0Sstevel@tonic-gate csd_t *i_cpr_storage_desc_end;		/* one byte beyond last used descp */
96*0Sstevel@tonic-gate csd_t *i_cpr_storage_desc_last_used;	/* last used descriptor */
97*0Sstevel@tonic-gate caddr_t sensitive_write_ptr;		/* position for next storage write */
98*0Sstevel@tonic-gate 
99*0Sstevel@tonic-gate size_t	i_cpr_sensitive_bytes_dumped;
100*0Sstevel@tonic-gate pgcnt_t	i_cpr_sensitive_pgs_dumped;
101*0Sstevel@tonic-gate pgcnt_t	i_cpr_storage_data_sz;		/* in pages */
102*0Sstevel@tonic-gate pgcnt_t	i_cpr_storage_desc_pgcnt;	/* in pages */
103*0Sstevel@tonic-gate 
104*0Sstevel@tonic-gate ushort_t cpr_mach_type = CPR_MACHTYPE_4U;
105*0Sstevel@tonic-gate static	csu_md_t m_info;
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate 
108*0Sstevel@tonic-gate #define	MAX_STORAGE_RETRY	3
109*0Sstevel@tonic-gate #define	MAX_STORAGE_ALLOC_RETRY	3
110*0Sstevel@tonic-gate #define	INITIAL_ALLOC_PCNT	40	/* starting allocation percentage */
111*0Sstevel@tonic-gate #define	INTEGRAL		100	/* to get 1% precision */
112*0Sstevel@tonic-gate 
113*0Sstevel@tonic-gate #define	EXTRA_RATE		2	/* add EXTRA_RATE% extra space */
114*0Sstevel@tonic-gate #define	EXTRA_DESCS		10
115*0Sstevel@tonic-gate 
116*0Sstevel@tonic-gate #define	CPR_NO_STORAGE_DESC	1
117*0Sstevel@tonic-gate #define	CPR_NO_STORAGE_DATA	2
118*0Sstevel@tonic-gate 
119*0Sstevel@tonic-gate #define	CIF_SPLICE		0
120*0Sstevel@tonic-gate #define	CIF_UNLINK		1
121*0Sstevel@tonic-gate 
122*0Sstevel@tonic-gate 
123*0Sstevel@tonic-gate /*
124*0Sstevel@tonic-gate  * CPR miscellaneous support routines
125*0Sstevel@tonic-gate  */
126*0Sstevel@tonic-gate #define	cpr_open(path, mode,  vpp)	(vn_open(path, UIO_SYSSPACE, \
127*0Sstevel@tonic-gate 		mode, 0600, vpp, CRCREAT, 0))
128*0Sstevel@tonic-gate #define	cpr_rdwr(rw, vp, basep, cnt)	(vn_rdwr(rw, vp,  (caddr_t)(basep), \
129*0Sstevel@tonic-gate 		cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
130*0Sstevel@tonic-gate 		(ssize_t *)NULL))
131*0Sstevel@tonic-gate 
132*0Sstevel@tonic-gate /*
133*0Sstevel@tonic-gate  * definitions for saving/restoring prom pages
134*0Sstevel@tonic-gate  */
135*0Sstevel@tonic-gate static void	*ppage_buf;
136*0Sstevel@tonic-gate static pgcnt_t	ppage_count;
137*0Sstevel@tonic-gate static pfn_t	*pphys_list;
138*0Sstevel@tonic-gate static size_t	pphys_list_size;
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate typedef void (*tlb_rw_t)(uint_t, tte_t *, uint64_t *);
141*0Sstevel@tonic-gate typedef void (*tlb_filter_t)(int, tte_t *, uint64_t, void *);
142*0Sstevel@tonic-gate 
143*0Sstevel@tonic-gate /*
144*0Sstevel@tonic-gate  * private struct for tlb handling
145*0Sstevel@tonic-gate  */
146*0Sstevel@tonic-gate struct cpr_trans_info {
147*0Sstevel@tonic-gate 	sutlb_t		*dst;
148*0Sstevel@tonic-gate 	sutlb_t		*tail;
149*0Sstevel@tonic-gate 	tlb_rw_t	reader;
150*0Sstevel@tonic-gate 	tlb_rw_t	writer;
151*0Sstevel@tonic-gate 	tlb_filter_t	filter;
152*0Sstevel@tonic-gate 	int		index;
153*0Sstevel@tonic-gate 	uint64_t	skip;		/* assumes TLB <= 64 locked entries */
154*0Sstevel@tonic-gate };
155*0Sstevel@tonic-gate typedef struct cpr_trans_info cti_t;
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate /*
159*0Sstevel@tonic-gate  * special handling for tlb info
160*0Sstevel@tonic-gate  */
161*0Sstevel@tonic-gate #define	WITHIN_OFW(va) \
162*0Sstevel@tonic-gate 	(((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate #define	WITHIN_NUCLEUS(va, base) \
165*0Sstevel@tonic-gate 	(((va) >= (base)) && \
166*0Sstevel@tonic-gate 	(((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate #define	IS_BIGKTSB(va) \
169*0Sstevel@tonic-gate 	(enable_bigktsb && \
170*0Sstevel@tonic-gate 	((va) >= (uint64_t)ktsb_base) && \
171*0Sstevel@tonic-gate 	((va) < (uint64_t)(ktsb_base + ktsb_sz)))
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate 
174*0Sstevel@tonic-gate /*
175*0Sstevel@tonic-gate  * WARNING:
176*0Sstevel@tonic-gate  * the text from this file is linked to follow cpr_resume_setup.o;
177*0Sstevel@tonic-gate  * only add text between here and i_cpr_end_jumpback when it needs
178*0Sstevel@tonic-gate  * to be called during resume before we switch back to the kernel
179*0Sstevel@tonic-gate  * trap table.  all the text in this range must fit within a page.
180*0Sstevel@tonic-gate  */
181*0Sstevel@tonic-gate 
182*0Sstevel@tonic-gate 
183*0Sstevel@tonic-gate /*
184*0Sstevel@tonic-gate  * each time a machine is reset, the prom uses an inconsistent set of phys
185*0Sstevel@tonic-gate  * pages and the cif cookie may differ as well.  so prior to restoring the
186*0Sstevel@tonic-gate  * original prom, we have to use to use the new/tmp prom's translations
187*0Sstevel@tonic-gate  * when requesting prom services.
188*0Sstevel@tonic-gate  *
189*0Sstevel@tonic-gate  * cif_handler starts out as the original prom cookie, and that gets used
190*0Sstevel@tonic-gate  * by client_handler() to jump into the prom.  here we splice-in a wrapper
191*0Sstevel@tonic-gate  * routine by writing cif_handler; client_handler() will now jump to the
192*0Sstevel@tonic-gate  * wrapper which switches the %tba to the new/tmp prom's trap table then
193*0Sstevel@tonic-gate  * jumps to the new cookie.
194*0Sstevel@tonic-gate  */
195*0Sstevel@tonic-gate void
196*0Sstevel@tonic-gate i_cpr_cif_setup(int action)
197*0Sstevel@tonic-gate {
198*0Sstevel@tonic-gate 	extern void *i_cpr_orig_cif, *cif_handler;
199*0Sstevel@tonic-gate 	extern int i_cpr_cif_wrapper(void *);
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate 	/*
202*0Sstevel@tonic-gate 	 * save the original cookie and change the current cookie to the
203*0Sstevel@tonic-gate 	 * wrapper routine.  later we just restore the original cookie.
204*0Sstevel@tonic-gate 	 */
205*0Sstevel@tonic-gate 	if (action == CIF_SPLICE) {
206*0Sstevel@tonic-gate 		i_cpr_orig_cif = cif_handler;
207*0Sstevel@tonic-gate 		cif_handler = (void *)i_cpr_cif_wrapper;
208*0Sstevel@tonic-gate 	} else if (action == CIF_UNLINK)
209*0Sstevel@tonic-gate 		cif_handler = i_cpr_orig_cif;
210*0Sstevel@tonic-gate }
211*0Sstevel@tonic-gate 
212*0Sstevel@tonic-gate 
213*0Sstevel@tonic-gate /*
214*0Sstevel@tonic-gate  * launch slave cpus into kernel text, pause them,
215*0Sstevel@tonic-gate  * and restore the original prom pages
216*0Sstevel@tonic-gate  */
217*0Sstevel@tonic-gate void
218*0Sstevel@tonic-gate i_cpr_mp_setup(void)
219*0Sstevel@tonic-gate {
220*0Sstevel@tonic-gate 	extern void restart_other_cpu(int);
221*0Sstevel@tonic-gate 	ihandle_t tmpout = 0;
222*0Sstevel@tonic-gate 	char *str;
223*0Sstevel@tonic-gate 	cpu_t *cp;
224*0Sstevel@tonic-gate 
225*0Sstevel@tonic-gate 	/*
226*0Sstevel@tonic-gate 	 * reset cpu_ready_set so x_calls work properly
227*0Sstevel@tonic-gate 	 */
228*0Sstevel@tonic-gate 	CPUSET_ZERO(cpu_ready_set);
229*0Sstevel@tonic-gate 	CPUSET_ADD(cpu_ready_set, getprocessorid());
230*0Sstevel@tonic-gate 
231*0Sstevel@tonic-gate 	/*
232*0Sstevel@tonic-gate 	 * setup cif to use the cookie from the new/tmp prom
233*0Sstevel@tonic-gate 	 * and setup tmp handling for calling prom services.
234*0Sstevel@tonic-gate 	 */
235*0Sstevel@tonic-gate 	i_cpr_cif_setup(CIF_SPLICE);
236*0Sstevel@tonic-gate 
237*0Sstevel@tonic-gate 	/*
238*0Sstevel@tonic-gate 	 * at this point, only the nucleus and a few cpr pages are
239*0Sstevel@tonic-gate 	 * mapped in.  once we switch to the kernel trap table,
240*0Sstevel@tonic-gate 	 * we can access the rest of kernel space.
241*0Sstevel@tonic-gate 	 */
242*0Sstevel@tonic-gate 	prom_set_traptable(&trap_table);
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate 	if (ncpus > 1) {
245*0Sstevel@tonic-gate 		sfmmu_init_tsbs();
246*0Sstevel@tonic-gate 
247*0Sstevel@tonic-gate 		if (cpr_debug & LEVEL1) {
248*0Sstevel@tonic-gate 			prom_interpret("stdout @ swap l!", (uintptr_t)&tmpout,
249*0Sstevel@tonic-gate 			    0, 0, 0, 0);
250*0Sstevel@tonic-gate 			str = "MP startup...\r\n";
251*0Sstevel@tonic-gate 			(void) prom_write(tmpout, str, strlen(str), 0, 0);
252*0Sstevel@tonic-gate 		}
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate 		mutex_enter(&cpu_lock);
255*0Sstevel@tonic-gate 		/*
256*0Sstevel@tonic-gate 		 * All of the slave cpus are not ready at this time,
257*0Sstevel@tonic-gate 		 * yet the cpu structures have various cpu_flags set;
258*0Sstevel@tonic-gate 		 * clear cpu_flags and mutex_ready.
259*0Sstevel@tonic-gate 		 * Since we are coming up from a CPU suspend, the slave cpus
260*0Sstevel@tonic-gate 		 * are frozen.
261*0Sstevel@tonic-gate 		 */
262*0Sstevel@tonic-gate 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
263*0Sstevel@tonic-gate 			cp->cpu_flags = CPU_FROZEN;
264*0Sstevel@tonic-gate 			cp->cpu_m.mutex_ready = 0;
265*0Sstevel@tonic-gate 		}
266*0Sstevel@tonic-gate 
267*0Sstevel@tonic-gate 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
268*0Sstevel@tonic-gate 			restart_other_cpu(cp->cpu_id);
269*0Sstevel@tonic-gate 
270*0Sstevel@tonic-gate 		pause_cpus(NULL);
271*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate 		if (cpr_debug & LEVEL1) {
274*0Sstevel@tonic-gate 			str = "MP paused...\r\n";
275*0Sstevel@tonic-gate 			(void) prom_write(tmpout, str, strlen(str), 0, 0);
276*0Sstevel@tonic-gate 		}
277*0Sstevel@tonic-gate 
278*0Sstevel@tonic-gate 		i_cpr_xcall(i_cpr_clear_entries);
279*0Sstevel@tonic-gate 	} else
280*0Sstevel@tonic-gate 		i_cpr_clear_entries(0, 0);
281*0Sstevel@tonic-gate 
282*0Sstevel@tonic-gate 	/*
283*0Sstevel@tonic-gate 	 * now unlink the cif wrapper;  WARNING: do not call any
284*0Sstevel@tonic-gate 	 * prom_xxx() routines until after prom pages are restored.
285*0Sstevel@tonic-gate 	 */
286*0Sstevel@tonic-gate 	i_cpr_cif_setup(CIF_UNLINK);
287*0Sstevel@tonic-gate 
288*0Sstevel@tonic-gate 	(void) i_cpr_prom_pages(CPR_PROM_RESTORE);
289*0Sstevel@tonic-gate }
290*0Sstevel@tonic-gate 
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate /*
293*0Sstevel@tonic-gate  * end marker for jumpback page;
294*0Sstevel@tonic-gate  * this symbol is used to check the size of i_cpr_resume_setup()
295*0Sstevel@tonic-gate  * and the above text.  For simplicity, the Makefile needs to
296*0Sstevel@tonic-gate  * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
297*0Sstevel@tonic-gate  */
298*0Sstevel@tonic-gate void
299*0Sstevel@tonic-gate i_cpr_end_jumpback(void)
300*0Sstevel@tonic-gate {
301*0Sstevel@tonic-gate }
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate 
304*0Sstevel@tonic-gate /*
305*0Sstevel@tonic-gate  * scan tlb entries with reader; when valid entries are found,
306*0Sstevel@tonic-gate  * the filter routine will selectively save/clear them
307*0Sstevel@tonic-gate  */
308*0Sstevel@tonic-gate static void
309*0Sstevel@tonic-gate i_cpr_scan_tlb(cti_t *ctip)
310*0Sstevel@tonic-gate {
311*0Sstevel@tonic-gate 	uint64_t va_tag;
312*0Sstevel@tonic-gate 	int tlb_index;
313*0Sstevel@tonic-gate 	tte_t tte;
314*0Sstevel@tonic-gate 
315*0Sstevel@tonic-gate 	for (tlb_index = ctip->index; tlb_index >= 0; tlb_index--) {
316*0Sstevel@tonic-gate 		(*ctip->reader)((uint_t)tlb_index, &tte, &va_tag);
317*0Sstevel@tonic-gate 		if (va_tag && TTE_IS_VALID(&tte))
318*0Sstevel@tonic-gate 			(*ctip->filter)(tlb_index, &tte, va_tag, ctip);
319*0Sstevel@tonic-gate 	}
320*0Sstevel@tonic-gate }
321*0Sstevel@tonic-gate 
322*0Sstevel@tonic-gate 
323*0Sstevel@tonic-gate /*
324*0Sstevel@tonic-gate  * filter for locked tlb entries that reference the text/data nucleus
325*0Sstevel@tonic-gate  * and any bigktsb's; these will be reinstalled by cprboot on all cpus
326*0Sstevel@tonic-gate  */
327*0Sstevel@tonic-gate /* ARGSUSED */
328*0Sstevel@tonic-gate static void
329*0Sstevel@tonic-gate i_cpr_lnb(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
330*0Sstevel@tonic-gate {
331*0Sstevel@tonic-gate 	cti_t *ctip;
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate 	/*
334*0Sstevel@tonic-gate 	 * record tlb data at ctip->dst; the target tlb index starts
335*0Sstevel@tonic-gate 	 * at the highest tlb offset and moves towards 0.  the prom
336*0Sstevel@tonic-gate 	 * reserves both dtlb and itlb index 0.  any selected entry
337*0Sstevel@tonic-gate 	 * also gets marked to prevent being flushed during resume
338*0Sstevel@tonic-gate 	 */
339*0Sstevel@tonic-gate 	if (TTE_IS_LOCKED(ttep) && (va_tag == (uint64_t)textva ||
340*0Sstevel@tonic-gate 	    va_tag == (uint64_t)datava || IS_BIGKTSB(va_tag))) {
341*0Sstevel@tonic-gate 		ctip = ctrans;
342*0Sstevel@tonic-gate 		while ((1 << ctip->index) & ctip->skip)
343*0Sstevel@tonic-gate 			ctip->index--;
344*0Sstevel@tonic-gate 		ASSERT(ctip->index > 0);
345*0Sstevel@tonic-gate 		ASSERT(ctip->dst < ctip->tail);
346*0Sstevel@tonic-gate 		ctip->dst->tte.ll = ttep->ll;
347*0Sstevel@tonic-gate 		ctip->dst->va_tag = va_tag;
348*0Sstevel@tonic-gate 		ctip->dst->index = ctip->index--;
349*0Sstevel@tonic-gate 		ctip->dst->tmp = 0;
350*0Sstevel@tonic-gate 		ctip->dst++;
351*0Sstevel@tonic-gate 	}
352*0Sstevel@tonic-gate }
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate 
355*0Sstevel@tonic-gate /*
356*0Sstevel@tonic-gate  * some tlb entries are stale, filter for unlocked entries
357*0Sstevel@tonic-gate  * within the prom virt range and clear them
358*0Sstevel@tonic-gate  */
359*0Sstevel@tonic-gate static void
360*0Sstevel@tonic-gate i_cpr_ufw(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
361*0Sstevel@tonic-gate {
362*0Sstevel@tonic-gate 	sutlb_t clr;
363*0Sstevel@tonic-gate 	cti_t *ctip;
364*0Sstevel@tonic-gate 
365*0Sstevel@tonic-gate 	if (!TTE_IS_LOCKED(ttep) && WITHIN_OFW(va_tag)) {
366*0Sstevel@tonic-gate 		ctip = ctrans;
367*0Sstevel@tonic-gate 		bzero(&clr, sizeof (clr));
368*0Sstevel@tonic-gate 		(*ctip->writer)((uint_t)index, &clr.tte, &clr.va_tag);
369*0Sstevel@tonic-gate 	}
370*0Sstevel@tonic-gate }
371*0Sstevel@tonic-gate 
372*0Sstevel@tonic-gate 
373*0Sstevel@tonic-gate /*
374*0Sstevel@tonic-gate  * some of the entries installed by cprboot are needed only on a
375*0Sstevel@tonic-gate  * short-term basis and need to be flushed to avoid clogging the tlbs.
376*0Sstevel@tonic-gate  * scan the dtte/itte arrays for items marked as temporary and clear
377*0Sstevel@tonic-gate  * dtlb/itlb entries using wrfunc.
378*0Sstevel@tonic-gate  */
379*0Sstevel@tonic-gate static void
380*0Sstevel@tonic-gate i_cpr_clear_tmp(sutlb_t *listp, int max, tlb_rw_t wrfunc)
381*0Sstevel@tonic-gate {
382*0Sstevel@tonic-gate 	sutlb_t clr, *tail;
383*0Sstevel@tonic-gate 
384*0Sstevel@tonic-gate 	bzero(&clr, sizeof (clr));
385*0Sstevel@tonic-gate 	for (tail = listp + max; listp < tail && listp->va_tag; listp++) {
386*0Sstevel@tonic-gate 		if (listp->tmp)
387*0Sstevel@tonic-gate 			(*wrfunc)((uint_t)listp->index, &clr.tte, &clr.va_tag);
388*0Sstevel@tonic-gate 	}
389*0Sstevel@tonic-gate }
390*0Sstevel@tonic-gate 
391*0Sstevel@tonic-gate 
392*0Sstevel@tonic-gate /* ARGSUSED */
393*0Sstevel@tonic-gate static void
394*0Sstevel@tonic-gate i_cpr_clear_entries(uint64_t arg1, uint64_t arg2)
395*0Sstevel@tonic-gate {
396*0Sstevel@tonic-gate 	extern void demap_all(void);
397*0Sstevel@tonic-gate 	cti_t cti;
398*0Sstevel@tonic-gate 
399*0Sstevel@tonic-gate 	i_cpr_clear_tmp(m_info.dtte, CPR_MAX_TLB, dtlb_wr_entry);
400*0Sstevel@tonic-gate 	i_cpr_clear_tmp(m_info.itte, CPR_MAX_TLB, itlb_wr_entry);
401*0Sstevel@tonic-gate 
402*0Sstevel@tonic-gate 	/*
403*0Sstevel@tonic-gate 	 * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
404*0Sstevel@tonic-gate 	 * a second label for vtag_flushall.  the call is made using
405*0Sstevel@tonic-gate 	 * vtag_flushall() instead of demap_all() due to runtime and
406*0Sstevel@tonic-gate 	 * krtld results with both older and newer cpu modules.
407*0Sstevel@tonic-gate 	 */
408*0Sstevel@tonic-gate 	if (&demap_all != 0) {
409*0Sstevel@tonic-gate 		vtag_flushall();
410*0Sstevel@tonic-gate 		return;
411*0Sstevel@tonic-gate 	}
412*0Sstevel@tonic-gate 
413*0Sstevel@tonic-gate 	/*
414*0Sstevel@tonic-gate 	 * for older V9 cpus, scan tlbs and clear stale entries
415*0Sstevel@tonic-gate 	 */
416*0Sstevel@tonic-gate 	bzero(&cti, sizeof (cti));
417*0Sstevel@tonic-gate 	cti.filter = i_cpr_ufw;
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
420*0Sstevel@tonic-gate 	cti.reader = dtlb_rd_entry;
421*0Sstevel@tonic-gate 	cti.writer = dtlb_wr_entry;
422*0Sstevel@tonic-gate 	i_cpr_scan_tlb(&cti);
423*0Sstevel@tonic-gate 
424*0Sstevel@tonic-gate 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
425*0Sstevel@tonic-gate 	cti.reader = itlb_rd_entry;
426*0Sstevel@tonic-gate 	cti.writer = itlb_wr_entry;
427*0Sstevel@tonic-gate 	i_cpr_scan_tlb(&cti);
428*0Sstevel@tonic-gate }
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate 
431*0Sstevel@tonic-gate /*
432*0Sstevel@tonic-gate  * craft tlb info for tmp use during resume; this data gets used by
433*0Sstevel@tonic-gate  * cprboot to install tlb entries.  we also mark each struct as tmp
434*0Sstevel@tonic-gate  * so those tlb entries will get flushed after switching to the kernel
435*0Sstevel@tonic-gate  * trap table.  no data needs to be recorded for vaddr when it falls
436*0Sstevel@tonic-gate  * within the nucleus since we've already recorded nucleus ttes and
437*0Sstevel@tonic-gate  * a 8K tte would conflict with a 4MB tte.  eg: the cpr module
438*0Sstevel@tonic-gate  * text/data may have been loaded into the text/data nucleus.
439*0Sstevel@tonic-gate  */
440*0Sstevel@tonic-gate static void
441*0Sstevel@tonic-gate i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase)
442*0Sstevel@tonic-gate {
443*0Sstevel@tonic-gate 	pfn_t ppn;
444*0Sstevel@tonic-gate 	uint_t rw;
445*0Sstevel@tonic-gate 
446*0Sstevel@tonic-gate 	if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase))
447*0Sstevel@tonic-gate 		return;
448*0Sstevel@tonic-gate 
449*0Sstevel@tonic-gate 	while ((1 << ctip->index) & ctip->skip)
450*0Sstevel@tonic-gate 		ctip->index--;
451*0Sstevel@tonic-gate 	ASSERT(ctip->index > 0);
452*0Sstevel@tonic-gate 	ASSERT(ctip->dst < ctip->tail);
453*0Sstevel@tonic-gate 
454*0Sstevel@tonic-gate 	/*
455*0Sstevel@tonic-gate 	 * without any global service available to lookup
456*0Sstevel@tonic-gate 	 * a tte by vaddr, we craft our own here:
457*0Sstevel@tonic-gate 	 */
458*0Sstevel@tonic-gate 	ppn = va_to_pfn(vaddr);
459*0Sstevel@tonic-gate 	rw = (nbase == datava) ? TTE_HWWR_INT : 0;
460*0Sstevel@tonic-gate 	ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
461*0Sstevel@tonic-gate 	ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
462*0Sstevel@tonic-gate 	    TTE_CP_INT | TTE_PRIV_INT | rw;
463*0Sstevel@tonic-gate 	ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK);
464*0Sstevel@tonic-gate 	ctip->dst->index = ctip->index--;
465*0Sstevel@tonic-gate 	ctip->dst->tmp = 1;
466*0Sstevel@tonic-gate 	ctip->dst++;
467*0Sstevel@tonic-gate }
468*0Sstevel@tonic-gate 
469*0Sstevel@tonic-gate 
470*0Sstevel@tonic-gate static void
471*0Sstevel@tonic-gate i_cpr_xcall(xcfunc_t func)
472*0Sstevel@tonic-gate {
473*0Sstevel@tonic-gate 	uint_t pil, reset_pil;
474*0Sstevel@tonic-gate 
475*0Sstevel@tonic-gate 	pil = getpil();
476*0Sstevel@tonic-gate 	if (pil < XCALL_PIL)
477*0Sstevel@tonic-gate 		reset_pil = 0;
478*0Sstevel@tonic-gate 	else {
479*0Sstevel@tonic-gate 		reset_pil = 1;
480*0Sstevel@tonic-gate 		setpil(XCALL_PIL - 1);
481*0Sstevel@tonic-gate 	}
482*0Sstevel@tonic-gate 	xc_some(cpu_ready_set, func, 0, 0);
483*0Sstevel@tonic-gate 	if (reset_pil)
484*0Sstevel@tonic-gate 		setpil(pil);
485*0Sstevel@tonic-gate }
486*0Sstevel@tonic-gate 
487*0Sstevel@tonic-gate 
488*0Sstevel@tonic-gate /*
489*0Sstevel@tonic-gate  * restart paused slave cpus
490*0Sstevel@tonic-gate  */
491*0Sstevel@tonic-gate void
492*0Sstevel@tonic-gate i_cpr_machdep_setup(void)
493*0Sstevel@tonic-gate {
494*0Sstevel@tonic-gate 	if (ncpus > 1) {
495*0Sstevel@tonic-gate 		DEBUG1(errp("MP restarted...\n"));
496*0Sstevel@tonic-gate 		mutex_enter(&cpu_lock);
497*0Sstevel@tonic-gate 		start_cpus();
498*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
499*0Sstevel@tonic-gate 	}
500*0Sstevel@tonic-gate }
501*0Sstevel@tonic-gate 
502*0Sstevel@tonic-gate 
503*0Sstevel@tonic-gate /*
504*0Sstevel@tonic-gate  * Stop all interrupt activities in the system
505*0Sstevel@tonic-gate  */
506*0Sstevel@tonic-gate void
507*0Sstevel@tonic-gate i_cpr_stop_intr(void)
508*0Sstevel@tonic-gate {
509*0Sstevel@tonic-gate 	(void) spl7();
510*0Sstevel@tonic-gate }
511*0Sstevel@tonic-gate 
512*0Sstevel@tonic-gate /*
513*0Sstevel@tonic-gate  * Set machine up to take interrupts
514*0Sstevel@tonic-gate  */
515*0Sstevel@tonic-gate void
516*0Sstevel@tonic-gate i_cpr_enable_intr(void)
517*0Sstevel@tonic-gate {
518*0Sstevel@tonic-gate 	(void) spl0();
519*0Sstevel@tonic-gate }
520*0Sstevel@tonic-gate 
521*0Sstevel@tonic-gate 
522*0Sstevel@tonic-gate /*
523*0Sstevel@tonic-gate  * record cpu nodes and ids
524*0Sstevel@tonic-gate  */
525*0Sstevel@tonic-gate static void
526*0Sstevel@tonic-gate i_cpr_save_cpu_info(void)
527*0Sstevel@tonic-gate {
528*0Sstevel@tonic-gate 	struct sun4u_cpu_info *scip;
529*0Sstevel@tonic-gate 	cpu_t *cp;
530*0Sstevel@tonic-gate 
531*0Sstevel@tonic-gate 	scip = m_info.sci;
532*0Sstevel@tonic-gate 	cp = CPU;
533*0Sstevel@tonic-gate 	do {
534*0Sstevel@tonic-gate 		ASSERT(scip < &m_info.sci[NCPU]);
535*0Sstevel@tonic-gate 		scip->cpu_id = cp->cpu_id;
536*0Sstevel@tonic-gate 		scip->node = cpunodes[cp->cpu_id].nodeid;
537*0Sstevel@tonic-gate 		scip++;
538*0Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != CPU);
539*0Sstevel@tonic-gate }
540*0Sstevel@tonic-gate 
541*0Sstevel@tonic-gate 
542*0Sstevel@tonic-gate /*
543*0Sstevel@tonic-gate  * Write necessary machine dependent information to cpr state file,
544*0Sstevel@tonic-gate  * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
545*0Sstevel@tonic-gate  */
546*0Sstevel@tonic-gate int
547*0Sstevel@tonic-gate i_cpr_write_machdep(vnode_t *vp)
548*0Sstevel@tonic-gate {
549*0Sstevel@tonic-gate 	extern uint_t getpstate(), getwstate();
550*0Sstevel@tonic-gate 	extern uint_t i_cpr_tstack_size;
551*0Sstevel@tonic-gate 	const char ustr[] = ": unix-tte 2drop false ;";
552*0Sstevel@tonic-gate 	uintptr_t tinfo;
553*0Sstevel@tonic-gate 	label_t *ltp;
554*0Sstevel@tonic-gate 	cmd_t cmach;
555*0Sstevel@tonic-gate 	char *fmt;
556*0Sstevel@tonic-gate 	int rc;
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 	/*
559*0Sstevel@tonic-gate 	 * ustr[] is used as temporary forth words during
560*0Sstevel@tonic-gate 	 * slave startup sequence, see sfmmu_mp_startup()
561*0Sstevel@tonic-gate 	 */
562*0Sstevel@tonic-gate 
563*0Sstevel@tonic-gate 	cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
564*0Sstevel@tonic-gate 	cmach.md_size = sizeof (m_info) + sizeof (ustr);
565*0Sstevel@tonic-gate 
566*0Sstevel@tonic-gate 	if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
567*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "Failed to write descriptor.");
568*0Sstevel@tonic-gate 		return (rc);
569*0Sstevel@tonic-gate 	}
570*0Sstevel@tonic-gate 
571*0Sstevel@tonic-gate 	/*
572*0Sstevel@tonic-gate 	 * m_info is now cleared in i_cpr_dump_setup()
573*0Sstevel@tonic-gate 	 */
574*0Sstevel@tonic-gate 	m_info.ksb = (uint32_t)STACK_BIAS;
575*0Sstevel@tonic-gate 	m_info.kpstate = (uint16_t)getpstate();
576*0Sstevel@tonic-gate 	m_info.kwstate = (uint16_t)getwstate();
577*0Sstevel@tonic-gate 	DEBUG1(errp("stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
578*0Sstevel@tonic-gate 	    m_info.ksb, m_info.kpstate, m_info.kwstate));
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 	ltp = &ttolwp(curthread)->lwp_qsav;
581*0Sstevel@tonic-gate 	m_info.qsav_pc = (cpr_ext)ltp->val[0];
582*0Sstevel@tonic-gate 	m_info.qsav_sp = (cpr_ext)ltp->val[1];
583*0Sstevel@tonic-gate 
584*0Sstevel@tonic-gate 	/*
585*0Sstevel@tonic-gate 	 * Set secondary context to INVALID_CONTEXT to force the HAT
586*0Sstevel@tonic-gate 	 * to re-setup the MMU registers and locked TTEs it needs for
587*0Sstevel@tonic-gate 	 * TLB miss handling.
588*0Sstevel@tonic-gate 	 */
589*0Sstevel@tonic-gate 	m_info.mmu_ctx_sec = INVALID_CONTEXT;
590*0Sstevel@tonic-gate 	m_info.mmu_ctx_pri = sfmmu_getctx_pri();
591*0Sstevel@tonic-gate 
592*0Sstevel@tonic-gate 	tinfo = (uintptr_t)curthread;
593*0Sstevel@tonic-gate 	m_info.thrp = (cpr_ptr)tinfo;
594*0Sstevel@tonic-gate 
595*0Sstevel@tonic-gate 	tinfo = (uintptr_t)i_cpr_resume_setup;
596*0Sstevel@tonic-gate 	m_info.func = (cpr_ptr)tinfo;
597*0Sstevel@tonic-gate 
598*0Sstevel@tonic-gate 	/*
599*0Sstevel@tonic-gate 	 * i_cpr_data_page is comprised of a 4K stack area and a few
600*0Sstevel@tonic-gate 	 * trailing data symbols; the page is shared by the prom and
601*0Sstevel@tonic-gate 	 * kernel during resume.  the stack size is recorded here
602*0Sstevel@tonic-gate 	 * and used by cprboot to set %sp
603*0Sstevel@tonic-gate 	 */
604*0Sstevel@tonic-gate 	tinfo = (uintptr_t)&i_cpr_data_page;
605*0Sstevel@tonic-gate 	m_info.tmp_stack = (cpr_ptr)tinfo;
606*0Sstevel@tonic-gate 	m_info.tmp_stacksize = i_cpr_tstack_size;
607*0Sstevel@tonic-gate 
608*0Sstevel@tonic-gate 	m_info.test_mode = cpr_test_mode;
609*0Sstevel@tonic-gate 
610*0Sstevel@tonic-gate 	i_cpr_save_cpu_info();
611*0Sstevel@tonic-gate 
612*0Sstevel@tonic-gate 	if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
613*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "Failed to write machdep info.");
614*0Sstevel@tonic-gate 		return (rc);
615*0Sstevel@tonic-gate 	}
616*0Sstevel@tonic-gate 
617*0Sstevel@tonic-gate 	fmt = "error writing %s forth info";
618*0Sstevel@tonic-gate 	if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
619*0Sstevel@tonic-gate 		cpr_err(CE_WARN, fmt, "unix-tte");
620*0Sstevel@tonic-gate 
621*0Sstevel@tonic-gate 	return (rc);
622*0Sstevel@tonic-gate }
623*0Sstevel@tonic-gate 
624*0Sstevel@tonic-gate 
625*0Sstevel@tonic-gate /*
626*0Sstevel@tonic-gate  * Save miscellaneous information which needs to be written to the
627*0Sstevel@tonic-gate  * state file.  This information is required to re-initialize
628*0Sstevel@tonic-gate  * kernel/prom handshaking.
629*0Sstevel@tonic-gate  */
630*0Sstevel@tonic-gate void
631*0Sstevel@tonic-gate i_cpr_save_machdep_info(void)
632*0Sstevel@tonic-gate {
633*0Sstevel@tonic-gate 	DEBUG5(errp("jumpback size = 0x%lx\n",
634*0Sstevel@tonic-gate 	    (uintptr_t)&i_cpr_end_jumpback -
635*0Sstevel@tonic-gate 	    (uintptr_t)i_cpr_resume_setup));
636*0Sstevel@tonic-gate 
637*0Sstevel@tonic-gate 	/*
638*0Sstevel@tonic-gate 	 * Verify the jumpback code all falls in one page.
639*0Sstevel@tonic-gate 	 */
640*0Sstevel@tonic-gate 	if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
641*0Sstevel@tonic-gate 	    ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
642*0Sstevel@tonic-gate 		cpr_err(CE_PANIC, "jumpback code exceeds one page.");
643*0Sstevel@tonic-gate }
644*0Sstevel@tonic-gate 
645*0Sstevel@tonic-gate 
646*0Sstevel@tonic-gate void
647*0Sstevel@tonic-gate i_cpr_set_tbr(void)
648*0Sstevel@tonic-gate {
649*0Sstevel@tonic-gate }
650*0Sstevel@tonic-gate 
651*0Sstevel@tonic-gate 
652*0Sstevel@tonic-gate /*
653*0Sstevel@tonic-gate  * cpu0 should contain bootcpu info
654*0Sstevel@tonic-gate  */
655*0Sstevel@tonic-gate cpu_t *
656*0Sstevel@tonic-gate i_cpr_bootcpu(void)
657*0Sstevel@tonic-gate {
658*0Sstevel@tonic-gate 	return (&cpu0);
659*0Sstevel@tonic-gate }
660*0Sstevel@tonic-gate 
661*0Sstevel@tonic-gate 
662*0Sstevel@tonic-gate /*
663*0Sstevel@tonic-gate  * Return the virtual address of the mapping area
664*0Sstevel@tonic-gate  */
665*0Sstevel@tonic-gate caddr_t
666*0Sstevel@tonic-gate i_cpr_map_setup(void)
667*0Sstevel@tonic-gate {
668*0Sstevel@tonic-gate 	/*
669*0Sstevel@tonic-gate 	 * Allocate a virtual memory range spanned by an hmeblk.
670*0Sstevel@tonic-gate 	 * This would be 8 hments or 64k bytes.  Starting VA
671*0Sstevel@tonic-gate 	 * must be 64k (8-page) aligned.
672*0Sstevel@tonic-gate 	 */
673*0Sstevel@tonic-gate 	cpr_vaddr = vmem_xalloc(heap_arena,
674*0Sstevel@tonic-gate 	    mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
675*0Sstevel@tonic-gate 	    0, 0, NULL, NULL, VM_NOSLEEP);
676*0Sstevel@tonic-gate 	return (cpr_vaddr);
677*0Sstevel@tonic-gate }
678*0Sstevel@tonic-gate 
679*0Sstevel@tonic-gate /*
680*0Sstevel@tonic-gate  * create tmp locked tlb entries for a group of phys pages;
681*0Sstevel@tonic-gate  *
682*0Sstevel@tonic-gate  * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
683*0Sstevel@tonic-gate  * otherwise would fill up a tlb with locked entries
684*0Sstevel@tonic-gate  */
685*0Sstevel@tonic-gate void
686*0Sstevel@tonic-gate i_cpr_mapin(caddr_t vaddr, uint_t pages, pfn_t ppn)
687*0Sstevel@tonic-gate {
688*0Sstevel@tonic-gate 	tte_t tte;
689*0Sstevel@tonic-gate 	extern pfn_t curthreadpfn;
690*0Sstevel@tonic-gate 	extern int curthreadremapped;
691*0Sstevel@tonic-gate 
692*0Sstevel@tonic-gate 	curthreadremapped = (ppn <= curthreadpfn && curthreadpfn < ppn + pages);
693*0Sstevel@tonic-gate 
694*0Sstevel@tonic-gate 	for (; pages--; ppn++, vaddr += MMU_PAGESIZE) {
695*0Sstevel@tonic-gate 		tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
696*0Sstevel@tonic-gate 		tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
697*0Sstevel@tonic-gate 		    TTE_CP_INT | TTE_PRIV_INT | TTE_HWWR_INT;
698*0Sstevel@tonic-gate 		sfmmu_dtlb_ld(vaddr, KCONTEXT, &tte);
699*0Sstevel@tonic-gate 	}
700*0Sstevel@tonic-gate }
701*0Sstevel@tonic-gate 
702*0Sstevel@tonic-gate void
703*0Sstevel@tonic-gate i_cpr_mapout(caddr_t vaddr, uint_t pages)
704*0Sstevel@tonic-gate {
705*0Sstevel@tonic-gate 	extern int curthreadremapped;
706*0Sstevel@tonic-gate 
707*0Sstevel@tonic-gate 	if (curthreadremapped && vaddr <= (caddr_t)curthread &&
708*0Sstevel@tonic-gate 	    (caddr_t)curthread < vaddr + pages * MMU_PAGESIZE)
709*0Sstevel@tonic-gate 		curthreadremapped = 0;
710*0Sstevel@tonic-gate 
711*0Sstevel@tonic-gate 	for (; pages--; vaddr += MMU_PAGESIZE)
712*0Sstevel@tonic-gate 		vtag_flushpage(vaddr, KCONTEXT);
713*0Sstevel@tonic-gate }
714*0Sstevel@tonic-gate 
715*0Sstevel@tonic-gate /*
716*0Sstevel@tonic-gate  * We're done using the mapping area; release virtual space
717*0Sstevel@tonic-gate  */
718*0Sstevel@tonic-gate void
719*0Sstevel@tonic-gate i_cpr_map_destroy(void)
720*0Sstevel@tonic-gate {
721*0Sstevel@tonic-gate 	vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
722*0Sstevel@tonic-gate 	cpr_vaddr = NULL;
723*0Sstevel@tonic-gate }
724*0Sstevel@tonic-gate 
725*0Sstevel@tonic-gate /* ARGSUSED */
726*0Sstevel@tonic-gate void
727*0Sstevel@tonic-gate i_cpr_handle_xc(int flag)
728*0Sstevel@tonic-gate {
729*0Sstevel@tonic-gate }
730*0Sstevel@tonic-gate 
731*0Sstevel@tonic-gate 
732*0Sstevel@tonic-gate /*
733*0Sstevel@tonic-gate  * This function takes care of pages which are not in kas or need to be
734*0Sstevel@tonic-gate  * taken care of in a special way.  For example, panicbuf pages are not
735*0Sstevel@tonic-gate  * in kas and their pages are allocated via prom_retain().
736*0Sstevel@tonic-gate  */
737*0Sstevel@tonic-gate pgcnt_t
738*0Sstevel@tonic-gate i_cpr_count_special_kpages(int mapflag, bitfunc_t bitfunc)
739*0Sstevel@tonic-gate {
740*0Sstevel@tonic-gate 	struct cpr_map_info *pri, *tail;
741*0Sstevel@tonic-gate 	pgcnt_t pages, total = 0;
742*0Sstevel@tonic-gate 	pfn_t pfn;
743*0Sstevel@tonic-gate 
744*0Sstevel@tonic-gate 	/*
745*0Sstevel@tonic-gate 	 * Save information about prom retained panicbuf pages
746*0Sstevel@tonic-gate 	 */
747*0Sstevel@tonic-gate 	if (bitfunc == cpr_setbit) {
748*0Sstevel@tonic-gate 		pri = &cpr_prom_retain[CPR_PANICBUF];
749*0Sstevel@tonic-gate 		pri->virt = (cpr_ptr)panicbuf;
750*0Sstevel@tonic-gate 		pri->phys = va_to_pa(panicbuf);
751*0Sstevel@tonic-gate 		pri->size = sizeof (panicbuf);
752*0Sstevel@tonic-gate 	}
753*0Sstevel@tonic-gate 
754*0Sstevel@tonic-gate 	/*
755*0Sstevel@tonic-gate 	 * Go through the prom_retain array to tag those pages.
756*0Sstevel@tonic-gate 	 */
757*0Sstevel@tonic-gate 	tail = &cpr_prom_retain[CPR_PROM_RETAIN_CNT];
758*0Sstevel@tonic-gate 	for (pri = cpr_prom_retain; pri < tail; pri++) {
759*0Sstevel@tonic-gate 		pages = mmu_btopr(pri->size);
760*0Sstevel@tonic-gate 		for (pfn = ADDR_TO_PN(pri->phys); pages--; pfn++) {
761*0Sstevel@tonic-gate 			if (pf_is_memory(pfn)) {
762*0Sstevel@tonic-gate 				if (bitfunc == cpr_setbit) {
763*0Sstevel@tonic-gate 					if ((*bitfunc)(pfn, mapflag) == 0)
764*0Sstevel@tonic-gate 						total++;
765*0Sstevel@tonic-gate 				} else
766*0Sstevel@tonic-gate 					total++;
767*0Sstevel@tonic-gate 			}
768*0Sstevel@tonic-gate 		}
769*0Sstevel@tonic-gate 	}
770*0Sstevel@tonic-gate 
771*0Sstevel@tonic-gate 	return (total);
772*0Sstevel@tonic-gate }
773*0Sstevel@tonic-gate 
774*0Sstevel@tonic-gate 
775*0Sstevel@tonic-gate /*
776*0Sstevel@tonic-gate  * Free up memory-related resources here.  We start by freeing buffers
777*0Sstevel@tonic-gate  * allocated during suspend initialization.  Also, free up the mapping
778*0Sstevel@tonic-gate  * resources allocated in cpr_init().
779*0Sstevel@tonic-gate  */
780*0Sstevel@tonic-gate void
781*0Sstevel@tonic-gate i_cpr_free_memory_resources(void)
782*0Sstevel@tonic-gate {
783*0Sstevel@tonic-gate 	(void) i_cpr_prom_pages(CPR_PROM_FREE);
784*0Sstevel@tonic-gate 	i_cpr_map_destroy();
785*0Sstevel@tonic-gate 	i_cpr_storage_free();
786*0Sstevel@tonic-gate }
787*0Sstevel@tonic-gate 
788*0Sstevel@tonic-gate 
789*0Sstevel@tonic-gate /*
790*0Sstevel@tonic-gate  * Derived from cpr_write_statefile().
791*0Sstevel@tonic-gate  * Save the sensitive pages to the storage area and do bookkeeping
792*0Sstevel@tonic-gate  * using the sensitive descriptors. Each descriptor will contain no more
793*0Sstevel@tonic-gate  * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
794*0Sstevel@tonic-gate  * of pages that statefile gets written to disk at each write.
795*0Sstevel@tonic-gate  * XXX The CPR_MAXCONTIG can be changed to the size of the compression
796*0Sstevel@tonic-gate  * scratch area.
797*0Sstevel@tonic-gate  */
798*0Sstevel@tonic-gate static int
799*0Sstevel@tonic-gate i_cpr_save_to_storage(void)
800*0Sstevel@tonic-gate {
801*0Sstevel@tonic-gate 	sensitive_size_saved = 0;
802*0Sstevel@tonic-gate 	sensitive_pages_saved = 0;
803*0Sstevel@tonic-gate 	sensitive_write_ptr = i_cpr_storage_data_base;
804*0Sstevel@tonic-gate 	return (cpr_contig_pages(NULL, SAVE_TO_STORAGE));
805*0Sstevel@tonic-gate }
806*0Sstevel@tonic-gate 
807*0Sstevel@tonic-gate 
808*0Sstevel@tonic-gate /*
809*0Sstevel@tonic-gate  * This routine allocates space to save the sensitive kernel pages,
810*0Sstevel@tonic-gate  * i.e. kernel data nucleus, kvalloc and kvseg segments.
811*0Sstevel@tonic-gate  * It's assumed that those segments are the only areas that can be
812*0Sstevel@tonic-gate  * contaminated by memory allocations during statefile dumping.
813*0Sstevel@tonic-gate  * The space allocated here contains:
814*0Sstevel@tonic-gate  * 	A list of descriptors describing the saved sensitive pages.
815*0Sstevel@tonic-gate  * 	The storage area for saving the compressed sensitive kernel pages.
816*0Sstevel@tonic-gate  * Since storage pages are allocated from segkmem, they need to be
817*0Sstevel@tonic-gate  * excluded when saving.
818*0Sstevel@tonic-gate  */
819*0Sstevel@tonic-gate int
820*0Sstevel@tonic-gate i_cpr_save_sensitive_kpages(void)
821*0Sstevel@tonic-gate {
822*0Sstevel@tonic-gate 	static const char pages_fmt[] = "\n%s %s allocs\n"
823*0Sstevel@tonic-gate 	    "	spages %ld, vpages %ld, diff %ld\n";
824*0Sstevel@tonic-gate 	int retry_cnt;
825*0Sstevel@tonic-gate 	int error = 0;
826*0Sstevel@tonic-gate 	pgcnt_t pages, spages, vpages;
827*0Sstevel@tonic-gate 	caddr_t	addr;
828*0Sstevel@tonic-gate 	char *str;
829*0Sstevel@tonic-gate 
830*0Sstevel@tonic-gate 	/*
831*0Sstevel@tonic-gate 	 * Tag sensitive kpages. Allocate space for storage descriptors
832*0Sstevel@tonic-gate 	 * and storage data area based on the resulting bitmaps.
833*0Sstevel@tonic-gate 	 * Note: The storage space will be part of the sensitive
834*0Sstevel@tonic-gate 	 * segment, so we need to tag kpages here before the storage
835*0Sstevel@tonic-gate 	 * is actually allocated just so their space won't be accounted
836*0Sstevel@tonic-gate 	 * for. They will not be part of the statefile although those
837*0Sstevel@tonic-gate 	 * pages will be claimed by cprboot.
838*0Sstevel@tonic-gate 	 */
839*0Sstevel@tonic-gate 	cpr_clear_bitmaps();
840*0Sstevel@tonic-gate 
841*0Sstevel@tonic-gate 	spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
842*0Sstevel@tonic-gate 	vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
843*0Sstevel@tonic-gate 	pages = spages - vpages;
844*0Sstevel@tonic-gate 
845*0Sstevel@tonic-gate 	str = "i_cpr_save_sensitive_kpages:";
846*0Sstevel@tonic-gate 	DEBUG7(errp(pages_fmt, "before", str, spages, vpages, pages));
847*0Sstevel@tonic-gate 
848*0Sstevel@tonic-gate 	/*
849*0Sstevel@tonic-gate 	 * Allocate space to save the clean sensitive kpages
850*0Sstevel@tonic-gate 	 */
851*0Sstevel@tonic-gate 	for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
852*0Sstevel@tonic-gate 		/*
853*0Sstevel@tonic-gate 		 * Alloc on first pass or realloc if we are retrying because
854*0Sstevel@tonic-gate 		 * of insufficient storage for sensitive pages
855*0Sstevel@tonic-gate 		 */
856*0Sstevel@tonic-gate 		if (retry_cnt == 0 || error == ENOMEM) {
857*0Sstevel@tonic-gate 			if (i_cpr_storage_data_base) {
858*0Sstevel@tonic-gate 				kmem_free(i_cpr_storage_data_base,
859*0Sstevel@tonic-gate 				    mmu_ptob(i_cpr_storage_data_sz));
860*0Sstevel@tonic-gate 				i_cpr_storage_data_base = NULL;
861*0Sstevel@tonic-gate 				i_cpr_storage_data_sz = 0;
862*0Sstevel@tonic-gate 			}
863*0Sstevel@tonic-gate 			addr = i_cpr_storage_data_alloc(pages,
864*0Sstevel@tonic-gate 			    &i_cpr_storage_data_sz, retry_cnt);
865*0Sstevel@tonic-gate 			if (addr == NULL) {
866*0Sstevel@tonic-gate 				DEBUG7(errp(
867*0Sstevel@tonic-gate 				    "\n%s can't allocate data storage space!\n",
868*0Sstevel@tonic-gate 				    str));
869*0Sstevel@tonic-gate 				return (ENOMEM);
870*0Sstevel@tonic-gate 			}
871*0Sstevel@tonic-gate 			i_cpr_storage_data_base = addr;
872*0Sstevel@tonic-gate 			i_cpr_storage_data_end =
873*0Sstevel@tonic-gate 			    addr + mmu_ptob(i_cpr_storage_data_sz);
874*0Sstevel@tonic-gate 		}
875*0Sstevel@tonic-gate 
876*0Sstevel@tonic-gate 		/*
877*0Sstevel@tonic-gate 		 * Allocate on first pass, only realloc if retry is because of
878*0Sstevel@tonic-gate 		 * insufficient descriptors, but reset contents on each pass
879*0Sstevel@tonic-gate 		 * (desc_alloc resets contents as well)
880*0Sstevel@tonic-gate 		 */
881*0Sstevel@tonic-gate 		if (retry_cnt == 0 || error == -1) {
882*0Sstevel@tonic-gate 			error = i_cpr_storage_desc_alloc(
883*0Sstevel@tonic-gate 			    &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
884*0Sstevel@tonic-gate 			    &i_cpr_storage_desc_end, retry_cnt);
885*0Sstevel@tonic-gate 			if (error != 0)
886*0Sstevel@tonic-gate 				return (error);
887*0Sstevel@tonic-gate 		} else {
888*0Sstevel@tonic-gate 			i_cpr_storage_desc_init(i_cpr_storage_desc_base,
889*0Sstevel@tonic-gate 			    i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
890*0Sstevel@tonic-gate 		}
891*0Sstevel@tonic-gate 
892*0Sstevel@tonic-gate 		/*
893*0Sstevel@tonic-gate 		 * We are ready to save the sensitive kpages to storage.
894*0Sstevel@tonic-gate 		 * We cannot trust what's tagged in the bitmaps anymore
895*0Sstevel@tonic-gate 		 * after storage allocations.  Clear up the bitmaps and
896*0Sstevel@tonic-gate 		 * retag the sensitive kpages again.  The storage pages
897*0Sstevel@tonic-gate 		 * should be untagged.
898*0Sstevel@tonic-gate 		 */
899*0Sstevel@tonic-gate 		cpr_clear_bitmaps();
900*0Sstevel@tonic-gate 
901*0Sstevel@tonic-gate 		spages =
902*0Sstevel@tonic-gate 		    i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
903*0Sstevel@tonic-gate 		vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
904*0Sstevel@tonic-gate 
905*0Sstevel@tonic-gate 		DEBUG7(errp(pages_fmt, "after ", str,
906*0Sstevel@tonic-gate 		    spages, vpages, spages - vpages));
907*0Sstevel@tonic-gate 
908*0Sstevel@tonic-gate 		/*
909*0Sstevel@tonic-gate 		 * Returns 0 on success, -1 if too few descriptors, and
910*0Sstevel@tonic-gate 		 * ENOMEM if not enough space to save sensitive pages
911*0Sstevel@tonic-gate 		 */
912*0Sstevel@tonic-gate 		DEBUG1(errp("compressing pages to storage...\n"));
913*0Sstevel@tonic-gate 		error = i_cpr_save_to_storage();
914*0Sstevel@tonic-gate 		if (error == 0) {
915*0Sstevel@tonic-gate 			/* Saving to storage succeeded */
916*0Sstevel@tonic-gate 			DEBUG1(errp("compressed %d pages\n",
917*0Sstevel@tonic-gate 			    sensitive_pages_saved));
918*0Sstevel@tonic-gate 			break;
919*0Sstevel@tonic-gate 		} else if (error == -1)
920*0Sstevel@tonic-gate 			DEBUG1(errp("%s too few descriptors\n", str));
921*0Sstevel@tonic-gate 	}
922*0Sstevel@tonic-gate 	if (error == -1)
923*0Sstevel@tonic-gate 		error = ENOMEM;
924*0Sstevel@tonic-gate 	return (error);
925*0Sstevel@tonic-gate }
926*0Sstevel@tonic-gate 
927*0Sstevel@tonic-gate 
928*0Sstevel@tonic-gate /*
929*0Sstevel@tonic-gate  * Estimate how much memory we will need to save
930*0Sstevel@tonic-gate  * the sensitive pages with compression.
931*0Sstevel@tonic-gate  */
932*0Sstevel@tonic-gate static caddr_t
933*0Sstevel@tonic-gate i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
934*0Sstevel@tonic-gate {
935*0Sstevel@tonic-gate 	pgcnt_t alloc_pcnt, last_pcnt;
936*0Sstevel@tonic-gate 	caddr_t addr;
937*0Sstevel@tonic-gate 	char *str;
938*0Sstevel@tonic-gate 
939*0Sstevel@tonic-gate 	str = "i_cpr_storage_data_alloc:";
940*0Sstevel@tonic-gate 	if (retry_cnt == 0) {
941*0Sstevel@tonic-gate 		/*
942*0Sstevel@tonic-gate 		 * common compression ratio is about 3:1
943*0Sstevel@tonic-gate 		 * initial storage allocation is estimated at 40%
944*0Sstevel@tonic-gate 		 * to cover the majority of cases
945*0Sstevel@tonic-gate 		 */
946*0Sstevel@tonic-gate 		alloc_pcnt = INITIAL_ALLOC_PCNT;
947*0Sstevel@tonic-gate 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
948*0Sstevel@tonic-gate 		DEBUG7(errp("%s sensitive pages: %ld\n", str, pages));
949*0Sstevel@tonic-gate 		DEBUG7(errp("%s initial est pages: %ld, alloc %ld%%\n",
950*0Sstevel@tonic-gate 		    str, *alloc_pages, alloc_pcnt));
951*0Sstevel@tonic-gate 	} else {
952*0Sstevel@tonic-gate 		/*
953*0Sstevel@tonic-gate 		 * calculate the prior compression percentage (x100)
954*0Sstevel@tonic-gate 		 * from the last attempt to save sensitive pages
955*0Sstevel@tonic-gate 		 */
956*0Sstevel@tonic-gate 		ASSERT(sensitive_pages_saved != 0);
957*0Sstevel@tonic-gate 		last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
958*0Sstevel@tonic-gate 		    sensitive_pages_saved;
959*0Sstevel@tonic-gate 		DEBUG7(errp("%s last ratio %ld%%\n", str, last_pcnt));
960*0Sstevel@tonic-gate 
961*0Sstevel@tonic-gate 		/*
962*0Sstevel@tonic-gate 		 * new estimated storage size is based on
963*0Sstevel@tonic-gate 		 * the larger ratio + 5% for each retry:
964*0Sstevel@tonic-gate 		 * pages * (last + [5%, 10%])
965*0Sstevel@tonic-gate 		 */
966*0Sstevel@tonic-gate 		alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
967*0Sstevel@tonic-gate 		    (retry_cnt * 5);
968*0Sstevel@tonic-gate 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
969*0Sstevel@tonic-gate 		DEBUG7(errp("%s Retry est pages: %ld, alloc %ld%%\n",
970*0Sstevel@tonic-gate 		    str, *alloc_pages, alloc_pcnt));
971*0Sstevel@tonic-gate 	}
972*0Sstevel@tonic-gate 
973*0Sstevel@tonic-gate 	addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
974*0Sstevel@tonic-gate 	DEBUG7(errp("%s alloc %ld pages\n", str, *alloc_pages));
975*0Sstevel@tonic-gate 	return (addr);
976*0Sstevel@tonic-gate }
977*0Sstevel@tonic-gate 
978*0Sstevel@tonic-gate 
979*0Sstevel@tonic-gate void
980*0Sstevel@tonic-gate i_cpr_storage_free(void)
981*0Sstevel@tonic-gate {
982*0Sstevel@tonic-gate 	/* Free descriptors */
983*0Sstevel@tonic-gate 	if (i_cpr_storage_desc_base) {
984*0Sstevel@tonic-gate 		kmem_free(i_cpr_storage_desc_base,
985*0Sstevel@tonic-gate 		    mmu_ptob(i_cpr_storage_desc_pgcnt));
986*0Sstevel@tonic-gate 		i_cpr_storage_desc_base = NULL;
987*0Sstevel@tonic-gate 		i_cpr_storage_desc_pgcnt = 0;
988*0Sstevel@tonic-gate 	}
989*0Sstevel@tonic-gate 
990*0Sstevel@tonic-gate 
991*0Sstevel@tonic-gate 	/* Data storage */
992*0Sstevel@tonic-gate 	if (i_cpr_storage_data_base) {
993*0Sstevel@tonic-gate 		kmem_free(i_cpr_storage_data_base,
994*0Sstevel@tonic-gate 		    mmu_ptob(i_cpr_storage_data_sz));
995*0Sstevel@tonic-gate 		i_cpr_storage_data_base = NULL;
996*0Sstevel@tonic-gate 		i_cpr_storage_data_sz = 0;
997*0Sstevel@tonic-gate 	}
998*0Sstevel@tonic-gate }
999*0Sstevel@tonic-gate 
1000*0Sstevel@tonic-gate 
1001*0Sstevel@tonic-gate /*
1002*0Sstevel@tonic-gate  * This routine is derived from cpr_compress_and_write().
1003*0Sstevel@tonic-gate  * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1004*0Sstevel@tonic-gate  * 2. Compress and save the clean sensitive pages into the storage area.
1005*0Sstevel@tonic-gate  */
1006*0Sstevel@tonic-gate int
1007*0Sstevel@tonic-gate i_cpr_compress_and_save(int chunks, pfn_t spfn, pgcnt_t pages)
1008*0Sstevel@tonic-gate {
1009*0Sstevel@tonic-gate 	extern char *cpr_compress_pages(cpd_t *, pgcnt_t, int);
1010*0Sstevel@tonic-gate 	extern caddr_t i_cpr_storage_data_end;
1011*0Sstevel@tonic-gate 	uint_t remaining, datalen;
1012*0Sstevel@tonic-gate 	uint32_t test_usum;
1013*0Sstevel@tonic-gate 	char *datap;
1014*0Sstevel@tonic-gate 	csd_t *descp;
1015*0Sstevel@tonic-gate 	cpd_t cpd;
1016*0Sstevel@tonic-gate 	int error;
1017*0Sstevel@tonic-gate 
1018*0Sstevel@tonic-gate 	/*
1019*0Sstevel@tonic-gate 	 * Fill next empty storage descriptor
1020*0Sstevel@tonic-gate 	 */
1021*0Sstevel@tonic-gate 	descp = i_cpr_storage_desc_base + chunks - 1;
1022*0Sstevel@tonic-gate 	if (descp >= i_cpr_storage_desc_end) {
1023*0Sstevel@tonic-gate 		DEBUG1(errp("ran out of descriptors, base 0x%p, chunks %d, "
1024*0Sstevel@tonic-gate 		    "end 0x%p, descp 0x%p\n", i_cpr_storage_desc_base, chunks,
1025*0Sstevel@tonic-gate 		    i_cpr_storage_desc_end, descp));
1026*0Sstevel@tonic-gate 		return (-1);
1027*0Sstevel@tonic-gate 	}
1028*0Sstevel@tonic-gate 	ASSERT(descp->csd_dirty_spfn == (uint_t)-1);
1029*0Sstevel@tonic-gate 	i_cpr_storage_desc_last_used = descp;
1030*0Sstevel@tonic-gate 
1031*0Sstevel@tonic-gate 	descp->csd_dirty_spfn = spfn;
1032*0Sstevel@tonic-gate 	descp->csd_dirty_npages = pages;
1033*0Sstevel@tonic-gate 
1034*0Sstevel@tonic-gate 	i_cpr_mapin(CPR->c_mapping_area, pages, spfn);
1035*0Sstevel@tonic-gate 
1036*0Sstevel@tonic-gate 	/*
1037*0Sstevel@tonic-gate 	 * try compressing pages and copy cpd fields
1038*0Sstevel@tonic-gate 	 * pfn is copied for debug use
1039*0Sstevel@tonic-gate 	 */
1040*0Sstevel@tonic-gate 	cpd.cpd_pfn = spfn;
1041*0Sstevel@tonic-gate 	datap = cpr_compress_pages(&cpd, pages, C_COMPRESSING);
1042*0Sstevel@tonic-gate 	datalen = cpd.cpd_length;
1043*0Sstevel@tonic-gate 	descp->csd_clean_compressed = (cpd.cpd_flag & CPD_COMPRESS);
1044*0Sstevel@tonic-gate #ifdef DEBUG
1045*0Sstevel@tonic-gate 	descp->csd_usum = cpd.cpd_usum;
1046*0Sstevel@tonic-gate 	descp->csd_csum = cpd.cpd_csum;
1047*0Sstevel@tonic-gate #endif
1048*0Sstevel@tonic-gate 
1049*0Sstevel@tonic-gate 	error = 0;
1050*0Sstevel@tonic-gate 
1051*0Sstevel@tonic-gate 	/*
1052*0Sstevel@tonic-gate 	 * Save the raw or compressed data to the storage area pointed to by
1053*0Sstevel@tonic-gate 	 * sensitive_write_ptr. Make sure the storage space is big enough to
1054*0Sstevel@tonic-gate 	 * hold the result. Otherwise roll back to increase the storage space.
1055*0Sstevel@tonic-gate 	 */
1056*0Sstevel@tonic-gate 	descp->csd_clean_sva = (cpr_ptr)sensitive_write_ptr;
1057*0Sstevel@tonic-gate 	descp->csd_clean_sz = datalen;
1058*0Sstevel@tonic-gate 	if ((sensitive_write_ptr + datalen) < i_cpr_storage_data_end) {
1059*0Sstevel@tonic-gate 		extern	void cprbcopy(void *, void *, size_t);
1060*0Sstevel@tonic-gate 
1061*0Sstevel@tonic-gate 		cprbcopy(datap, sensitive_write_ptr, datalen);
1062*0Sstevel@tonic-gate 		sensitive_size_saved += datalen;
1063*0Sstevel@tonic-gate 		sensitive_pages_saved += descp->csd_dirty_npages;
1064*0Sstevel@tonic-gate 		sensitive_write_ptr += datalen;
1065*0Sstevel@tonic-gate 	} else {
1066*0Sstevel@tonic-gate 		remaining = (i_cpr_storage_data_end - sensitive_write_ptr);
1067*0Sstevel@tonic-gate 		DEBUG1(errp("i_cpr_compress_and_save: The storage "
1068*0Sstevel@tonic-gate 		    "space is too small!\ngot %d, want %d\n\n",
1069*0Sstevel@tonic-gate 		    remaining, (remaining + datalen)));
1070*0Sstevel@tonic-gate #ifdef	DEBUG
1071*0Sstevel@tonic-gate 		/*
1072*0Sstevel@tonic-gate 		 * Check to see if the content of the sensitive pages that we
1073*0Sstevel@tonic-gate 		 * just copied have changed during this small time window.
1074*0Sstevel@tonic-gate 		 */
1075*0Sstevel@tonic-gate 		test_usum = checksum32(CPR->c_mapping_area, mmu_ptob(pages));
1076*0Sstevel@tonic-gate 		descp->csd_usum = cpd.cpd_usum;
1077*0Sstevel@tonic-gate 		if (test_usum != descp->csd_usum) {
1078*0Sstevel@tonic-gate 			DEBUG1(errp("\nWARNING: i_cpr_compress_and_save: "
1079*0Sstevel@tonic-gate 			    "Data in the range of pfn 0x%x to pfn "
1080*0Sstevel@tonic-gate 			    "0x%x has changed after they are saved "
1081*0Sstevel@tonic-gate 			    "into storage.", spfn, (spfn + pages - 1)));
1082*0Sstevel@tonic-gate 		}
1083*0Sstevel@tonic-gate #endif
1084*0Sstevel@tonic-gate 		error = ENOMEM;
1085*0Sstevel@tonic-gate 	}
1086*0Sstevel@tonic-gate 
1087*0Sstevel@tonic-gate 	i_cpr_mapout(CPR->c_mapping_area, pages);
1088*0Sstevel@tonic-gate 	return (error);
1089*0Sstevel@tonic-gate }
1090*0Sstevel@tonic-gate 
1091*0Sstevel@tonic-gate 
1092*0Sstevel@tonic-gate /*
1093*0Sstevel@tonic-gate  * This routine is derived from cpr_count_kpages().
1094*0Sstevel@tonic-gate  * It goes through kernel data nucleus and segkmem segments to select
1095*0Sstevel@tonic-gate  * pages in use and mark them in the corresponding bitmap.
1096*0Sstevel@tonic-gate  */
1097*0Sstevel@tonic-gate pgcnt_t
1098*0Sstevel@tonic-gate i_cpr_count_sensitive_kpages(int mapflag, bitfunc_t bitfunc)
1099*0Sstevel@tonic-gate {
1100*0Sstevel@tonic-gate 	pgcnt_t kdata_cnt = 0, segkmem_cnt = 0;
1101*0Sstevel@tonic-gate 	extern caddr_t e_moddata;
1102*0Sstevel@tonic-gate 	extern struct seg kvalloc;
1103*0Sstevel@tonic-gate 	extern struct seg kmem64;
1104*0Sstevel@tonic-gate 	size_t size;
1105*0Sstevel@tonic-gate 
1106*0Sstevel@tonic-gate 	/*
1107*0Sstevel@tonic-gate 	 * Kernel data nucleus pages
1108*0Sstevel@tonic-gate 	 */
1109*0Sstevel@tonic-gate 	size = e_moddata - s_data;
1110*0Sstevel@tonic-gate 	kdata_cnt += cpr_count_pages(s_data, size,
1111*0Sstevel@tonic-gate 	    mapflag, bitfunc, DBG_SHOWRANGE);
1112*0Sstevel@tonic-gate 
1113*0Sstevel@tonic-gate 	/*
1114*0Sstevel@tonic-gate 	 * kvseg and kvalloc pages
1115*0Sstevel@tonic-gate 	 */
1116*0Sstevel@tonic-gate 	segkmem_cnt += cpr_scan_kvseg(mapflag, bitfunc, &kvseg);
1117*0Sstevel@tonic-gate 	segkmem_cnt += cpr_count_pages(kvalloc.s_base, kvalloc.s_size,
1118*0Sstevel@tonic-gate 	    mapflag, bitfunc, DBG_SHOWRANGE);
1119*0Sstevel@tonic-gate 
1120*0Sstevel@tonic-gate 	/* segment to support kernel memory usage above 32-bit space (4GB) */
1121*0Sstevel@tonic-gate 	if (kmem64.s_base)
1122*0Sstevel@tonic-gate 		segkmem_cnt += cpr_count_pages(kmem64.s_base, kmem64.s_size,
1123*0Sstevel@tonic-gate 		    mapflag, bitfunc, DBG_SHOWRANGE);
1124*0Sstevel@tonic-gate 
1125*0Sstevel@tonic-gate 	DEBUG7(errp("\ni_cpr_count_sensitive_kpages:\n"
1126*0Sstevel@tonic-gate 	    "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1127*0Sstevel@tonic-gate 	    kdata_cnt, segkmem_cnt, kdata_cnt + segkmem_cnt));
1128*0Sstevel@tonic-gate 
1129*0Sstevel@tonic-gate 	return (kdata_cnt + segkmem_cnt);
1130*0Sstevel@tonic-gate }
1131*0Sstevel@tonic-gate 
1132*0Sstevel@tonic-gate 
1133*0Sstevel@tonic-gate pgcnt_t
1134*0Sstevel@tonic-gate i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
1135*0Sstevel@tonic-gate {
1136*0Sstevel@tonic-gate 	pgcnt_t count = 0;
1137*0Sstevel@tonic-gate 
1138*0Sstevel@tonic-gate 	if (i_cpr_storage_desc_base) {
1139*0Sstevel@tonic-gate 		count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
1140*0Sstevel@tonic-gate 		    (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
1141*0Sstevel@tonic-gate 		    mapflag, bitfunc, DBG_SHOWRANGE);
1142*0Sstevel@tonic-gate 	}
1143*0Sstevel@tonic-gate 	if (i_cpr_storage_data_base) {
1144*0Sstevel@tonic-gate 		count += cpr_count_pages(i_cpr_storage_data_base,
1145*0Sstevel@tonic-gate 		    (size_t)mmu_ptob(i_cpr_storage_data_sz),
1146*0Sstevel@tonic-gate 		    mapflag, bitfunc, DBG_SHOWRANGE);
1147*0Sstevel@tonic-gate 	}
1148*0Sstevel@tonic-gate 	return (count);
1149*0Sstevel@tonic-gate }
1150*0Sstevel@tonic-gate 
1151*0Sstevel@tonic-gate 
1152*0Sstevel@tonic-gate /*
1153*0Sstevel@tonic-gate  * Derived from cpr_write_statefile().
1154*0Sstevel@tonic-gate  * Allocate (or reallocate after exhausting the supply) descriptors for each
1155*0Sstevel@tonic-gate  * chunk of contiguous sensitive kpages.
1156*0Sstevel@tonic-gate  */
1157*0Sstevel@tonic-gate static int
1158*0Sstevel@tonic-gate i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
1159*0Sstevel@tonic-gate     int retry)
1160*0Sstevel@tonic-gate {
1161*0Sstevel@tonic-gate 	pgcnt_t npages;
1162*0Sstevel@tonic-gate 	int chunks;
1163*0Sstevel@tonic-gate 	csd_t	*descp, *end;
1164*0Sstevel@tonic-gate 	size_t	len;
1165*0Sstevel@tonic-gate 	char *str = "i_cpr_storage_desc_alloc:";
1166*0Sstevel@tonic-gate 
1167*0Sstevel@tonic-gate 	/*
1168*0Sstevel@tonic-gate 	 * On initial allocation, add some extra to cover overhead caused
1169*0Sstevel@tonic-gate 	 * by the allocation for the storage area later.
1170*0Sstevel@tonic-gate 	 */
1171*0Sstevel@tonic-gate 	if (retry == 0) {
1172*0Sstevel@tonic-gate 		chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
1173*0Sstevel@tonic-gate 		    EXTRA_DESCS;
1174*0Sstevel@tonic-gate 		npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
1175*0Sstevel@tonic-gate 		DEBUG7(errp("%s chunks %d, ", str, chunks));
1176*0Sstevel@tonic-gate 	} else {
1177*0Sstevel@tonic-gate 		DEBUG7(errp("%s retry %d: ", str, retry));
1178*0Sstevel@tonic-gate 		npages = *pgsp + 1;
1179*0Sstevel@tonic-gate 	}
1180*0Sstevel@tonic-gate 	/* Free old descriptors, if any */
1181*0Sstevel@tonic-gate 	if (*basepp)
1182*0Sstevel@tonic-gate 		kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));
1183*0Sstevel@tonic-gate 
1184*0Sstevel@tonic-gate 	descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
1185*0Sstevel@tonic-gate 	if (descp == NULL) {
1186*0Sstevel@tonic-gate 		DEBUG7(errp("%s no space for descriptors!\n", str));
1187*0Sstevel@tonic-gate 		return (ENOMEM);
1188*0Sstevel@tonic-gate 	}
1189*0Sstevel@tonic-gate 
1190*0Sstevel@tonic-gate 	*pgsp = npages;
1191*0Sstevel@tonic-gate 	len = mmu_ptob(npages);
1192*0Sstevel@tonic-gate 	end = *endpp = descp + (len / (sizeof (**basepp)));
1193*0Sstevel@tonic-gate 	DEBUG7(errp("npages 0x%x, len 0x%x, items 0x%x\n\t*basepp "
1194*0Sstevel@tonic-gate 	    "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
1195*0Sstevel@tonic-gate 	    *basepp, *endpp));
1196*0Sstevel@tonic-gate 	i_cpr_storage_desc_init(descp, npages, end);
1197*0Sstevel@tonic-gate 	return (0);
1198*0Sstevel@tonic-gate }
1199*0Sstevel@tonic-gate 
1200*0Sstevel@tonic-gate static void
1201*0Sstevel@tonic-gate i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
1202*0Sstevel@tonic-gate {
1203*0Sstevel@tonic-gate 	size_t	len = mmu_ptob(npages);
1204*0Sstevel@tonic-gate 
1205*0Sstevel@tonic-gate 	/* Initialize the descriptors to something impossible. */
1206*0Sstevel@tonic-gate 	bzero(descp, len);
1207*0Sstevel@tonic-gate #ifdef	DEBUG
1208*0Sstevel@tonic-gate 	/*
1209*0Sstevel@tonic-gate 	 * This condition is tested by an ASSERT
1210*0Sstevel@tonic-gate 	 */
1211*0Sstevel@tonic-gate 	for (; descp < end; descp++)
1212*0Sstevel@tonic-gate 		descp->csd_dirty_spfn = (uint_t)-1;
1213*0Sstevel@tonic-gate #endif
1214*0Sstevel@tonic-gate }
1215*0Sstevel@tonic-gate 
1216*0Sstevel@tonic-gate int
1217*0Sstevel@tonic-gate i_cpr_dump_sensitive_kpages(vnode_t *vp)
1218*0Sstevel@tonic-gate {
1219*0Sstevel@tonic-gate 	int	error = 0;
1220*0Sstevel@tonic-gate 	uint_t	spin_cnt = 0;
1221*0Sstevel@tonic-gate 	csd_t	*descp;
1222*0Sstevel@tonic-gate 
1223*0Sstevel@tonic-gate 	/*
1224*0Sstevel@tonic-gate 	 * These following two variables need to be reinitialized
1225*0Sstevel@tonic-gate 	 * for each cpr cycle.
1226*0Sstevel@tonic-gate 	 */
1227*0Sstevel@tonic-gate 	i_cpr_sensitive_bytes_dumped = 0;
1228*0Sstevel@tonic-gate 	i_cpr_sensitive_pgs_dumped = 0;
1229*0Sstevel@tonic-gate 
1230*0Sstevel@tonic-gate 	if (i_cpr_storage_desc_base) {
1231*0Sstevel@tonic-gate 		for (descp = i_cpr_storage_desc_base;
1232*0Sstevel@tonic-gate 		    descp <= i_cpr_storage_desc_last_used; descp++) {
1233*0Sstevel@tonic-gate 			if (error = cpr_dump_sensitive(vp, descp))
1234*0Sstevel@tonic-gate 				return (error);
1235*0Sstevel@tonic-gate 			spin_cnt++;
1236*0Sstevel@tonic-gate 			if ((spin_cnt & 0x5F) == 1)
1237*0Sstevel@tonic-gate 				cpr_spinning_bar();
1238*0Sstevel@tonic-gate 		}
1239*0Sstevel@tonic-gate 		prom_printf(" \b");
1240*0Sstevel@tonic-gate 	}
1241*0Sstevel@tonic-gate 
1242*0Sstevel@tonic-gate 	DEBUG7(errp("\ni_cpr_dump_sensitive_kpages: dumped %d\n",
1243*0Sstevel@tonic-gate 	    i_cpr_sensitive_pgs_dumped));
1244*0Sstevel@tonic-gate 	return (0);
1245*0Sstevel@tonic-gate }
1246*0Sstevel@tonic-gate 
1247*0Sstevel@tonic-gate 
1248*0Sstevel@tonic-gate /*
1249*0Sstevel@tonic-gate  * 1. Fill the cpr page descriptor with the info of the dirty pages
1250*0Sstevel@tonic-gate  *    and
1251*0Sstevel@tonic-gate  *    write the descriptor out. It will be used at resume.
1252*0Sstevel@tonic-gate  * 2. Write the clean data in stead of the dirty data out.
1253*0Sstevel@tonic-gate  *    Note: to save space, the clean data is already compressed.
1254*0Sstevel@tonic-gate  */
1255*0Sstevel@tonic-gate static int
1256*0Sstevel@tonic-gate cpr_dump_sensitive(vnode_t *vp, csd_t *descp)
1257*0Sstevel@tonic-gate {
1258*0Sstevel@tonic-gate 	int error = 0;
1259*0Sstevel@tonic-gate 	caddr_t datap;
1260*0Sstevel@tonic-gate 	cpd_t cpd;	/* cpr page descriptor */
1261*0Sstevel@tonic-gate 	pfn_t	dirty_spfn;
1262*0Sstevel@tonic-gate 	pgcnt_t dirty_npages;
1263*0Sstevel@tonic-gate 	size_t clean_sz;
1264*0Sstevel@tonic-gate 	caddr_t	clean_sva;
1265*0Sstevel@tonic-gate 	int	clean_compressed;
1266*0Sstevel@tonic-gate 	extern uchar_t cpr_pagecopy[];
1267*0Sstevel@tonic-gate 
1268*0Sstevel@tonic-gate 	dirty_spfn = descp->csd_dirty_spfn;
1269*0Sstevel@tonic-gate 	dirty_npages = descp->csd_dirty_npages;
1270*0Sstevel@tonic-gate 	clean_sva = (caddr_t)descp->csd_clean_sva;
1271*0Sstevel@tonic-gate 	clean_sz = descp->csd_clean_sz;
1272*0Sstevel@tonic-gate 	clean_compressed = descp->csd_clean_compressed;
1273*0Sstevel@tonic-gate 
1274*0Sstevel@tonic-gate 	/* Fill cpr page descriptor. */
1275*0Sstevel@tonic-gate 	cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC;
1276*0Sstevel@tonic-gate 	cpd.cpd_pfn = dirty_spfn;
1277*0Sstevel@tonic-gate 	cpd.cpd_flag = 0;  /* must init to zero */
1278*0Sstevel@tonic-gate 	cpd.cpd_pages = dirty_npages;
1279*0Sstevel@tonic-gate 
1280*0Sstevel@tonic-gate #ifdef	DEBUG
1281*0Sstevel@tonic-gate 	if ((cpd.cpd_usum = descp->csd_usum) != 0)
1282*0Sstevel@tonic-gate 		cpd.cpd_flag |= CPD_USUM;
1283*0Sstevel@tonic-gate 	if ((cpd.cpd_csum = descp->csd_csum) != 0)
1284*0Sstevel@tonic-gate 		cpd.cpd_flag |= CPD_CSUM;
1285*0Sstevel@tonic-gate #endif
1286*0Sstevel@tonic-gate 
1287*0Sstevel@tonic-gate 	STAT->cs_dumped_statefsz += mmu_ptob(dirty_npages);
1288*0Sstevel@tonic-gate 
1289*0Sstevel@tonic-gate 	/*
1290*0Sstevel@tonic-gate 	 * The sensitive kpages are usually saved with compression
1291*0Sstevel@tonic-gate 	 * unless compression could not reduce the size of the data.
1292*0Sstevel@tonic-gate 	 * If user choose not to have the statefile compressed,
1293*0Sstevel@tonic-gate 	 * we need to decompress the data back before dumping it to disk.
1294*0Sstevel@tonic-gate 	 */
1295*0Sstevel@tonic-gate 	if (CPR->c_flags & C_COMPRESSING) {
1296*0Sstevel@tonic-gate 		cpd.cpd_length = clean_sz;
1297*0Sstevel@tonic-gate 		datap = clean_sva;
1298*0Sstevel@tonic-gate 		if (clean_compressed)
1299*0Sstevel@tonic-gate 			cpd.cpd_flag |= CPD_COMPRESS;
1300*0Sstevel@tonic-gate 	} else {
1301*0Sstevel@tonic-gate 		if (clean_compressed) {
1302*0Sstevel@tonic-gate 			cpd.cpd_length = decompress(clean_sva, cpr_pagecopy,
1303*0Sstevel@tonic-gate 			    clean_sz, mmu_ptob(dirty_npages));
1304*0Sstevel@tonic-gate 			datap = (caddr_t)cpr_pagecopy;
1305*0Sstevel@tonic-gate 			ASSERT(cpd.cpd_length == mmu_ptob(dirty_npages));
1306*0Sstevel@tonic-gate 		} else {
1307*0Sstevel@tonic-gate 			cpd.cpd_length = clean_sz;
1308*0Sstevel@tonic-gate 			datap = clean_sva;
1309*0Sstevel@tonic-gate 		}
1310*0Sstevel@tonic-gate 		cpd.cpd_csum = 0;
1311*0Sstevel@tonic-gate 	}
1312*0Sstevel@tonic-gate 
1313*0Sstevel@tonic-gate 	/* Write cpr page descriptor */
1314*0Sstevel@tonic-gate 	error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd));
1315*0Sstevel@tonic-gate 	if (error) {
1316*0Sstevel@tonic-gate 		DEBUG7(errp("descp: %x\n", descp));
1317*0Sstevel@tonic-gate #ifdef DEBUG
1318*0Sstevel@tonic-gate 		debug_enter("cpr_dump_sensitive: cpr_write() page "
1319*0Sstevel@tonic-gate 			"descriptor failed!\n");
1320*0Sstevel@tonic-gate #endif
1321*0Sstevel@tonic-gate 		return (error);
1322*0Sstevel@tonic-gate 	}
1323*0Sstevel@tonic-gate 
1324*0Sstevel@tonic-gate 	i_cpr_sensitive_bytes_dumped += sizeof (cpd_t);
1325*0Sstevel@tonic-gate 
1326*0Sstevel@tonic-gate 	/* Write page data */
1327*0Sstevel@tonic-gate 	error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length);
1328*0Sstevel@tonic-gate 	if (error) {
1329*0Sstevel@tonic-gate 		DEBUG7(errp("error: %x\n", error));
1330*0Sstevel@tonic-gate 		DEBUG7(errp("descp: %x\n", descp));
1331*0Sstevel@tonic-gate 		DEBUG7(errp("cpr_write(%x, %x , %x)\n", vp, datap,
1332*0Sstevel@tonic-gate 			cpd.cpd_length));
1333*0Sstevel@tonic-gate #ifdef DEBUG
1334*0Sstevel@tonic-gate 		debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1335*0Sstevel@tonic-gate #endif
1336*0Sstevel@tonic-gate 		return (error);
1337*0Sstevel@tonic-gate 	}
1338*0Sstevel@tonic-gate 
1339*0Sstevel@tonic-gate 	i_cpr_sensitive_bytes_dumped += cpd.cpd_length;
1340*0Sstevel@tonic-gate 	i_cpr_sensitive_pgs_dumped += dirty_npages;
1341*0Sstevel@tonic-gate 
1342*0Sstevel@tonic-gate 	return (error);
1343*0Sstevel@tonic-gate }
1344*0Sstevel@tonic-gate 
1345*0Sstevel@tonic-gate 
1346*0Sstevel@tonic-gate /*
1347*0Sstevel@tonic-gate  * Sanity check to make sure that we have dumped right amount
1348*0Sstevel@tonic-gate  * of pages from different sources to statefile.
1349*0Sstevel@tonic-gate  */
1350*0Sstevel@tonic-gate int
1351*0Sstevel@tonic-gate i_cpr_check_pgs_dumped(uint_t pgs_expected, uint_t regular_pgs_dumped)
1352*0Sstevel@tonic-gate {
1353*0Sstevel@tonic-gate 	uint_t total_pgs_dumped;
1354*0Sstevel@tonic-gate 
1355*0Sstevel@tonic-gate 	total_pgs_dumped = regular_pgs_dumped + i_cpr_sensitive_pgs_dumped;
1356*0Sstevel@tonic-gate 
1357*0Sstevel@tonic-gate 	DEBUG7(errp("\ncheck_pgs: reg %d + sens %d = %d, expect %d\n\n",
1358*0Sstevel@tonic-gate 	    regular_pgs_dumped, i_cpr_sensitive_pgs_dumped,
1359*0Sstevel@tonic-gate 	    total_pgs_dumped, pgs_expected));
1360*0Sstevel@tonic-gate 
1361*0Sstevel@tonic-gate 	if (pgs_expected == total_pgs_dumped)
1362*0Sstevel@tonic-gate 		return (0);
1363*0Sstevel@tonic-gate 
1364*0Sstevel@tonic-gate 	return (EINVAL);
1365*0Sstevel@tonic-gate }
1366*0Sstevel@tonic-gate 
1367*0Sstevel@tonic-gate 
1368*0Sstevel@tonic-gate int
1369*0Sstevel@tonic-gate i_cpr_reusefini(void)
1370*0Sstevel@tonic-gate {
1371*0Sstevel@tonic-gate 	struct vnode *vp;
1372*0Sstevel@tonic-gate 	cdef_t *cdef;
1373*0Sstevel@tonic-gate 	size_t size;
1374*0Sstevel@tonic-gate 	char *bufp;
1375*0Sstevel@tonic-gate 	int rc;
1376*0Sstevel@tonic-gate 
1377*0Sstevel@tonic-gate 	if (cpr_reusable_mode)
1378*0Sstevel@tonic-gate 		cpr_reusable_mode = 0;
1379*0Sstevel@tonic-gate 
1380*0Sstevel@tonic-gate 	if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
1381*0Sstevel@tonic-gate 		if (rc == EROFS) {
1382*0Sstevel@tonic-gate 			cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
1383*0Sstevel@tonic-gate 			    "(uadmin %d %d)\nmust be done with / mounted "
1384*0Sstevel@tonic-gate 			    "writeable.\n", A_FREEZE, AD_REUSEFINI);
1385*0Sstevel@tonic-gate 		}
1386*0Sstevel@tonic-gate 		return (rc);
1387*0Sstevel@tonic-gate 	}
1388*0Sstevel@tonic-gate 
1389*0Sstevel@tonic-gate 	cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
1390*0Sstevel@tonic-gate 	rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));
1391*0Sstevel@tonic-gate 
1392*0Sstevel@tonic-gate 	if (rc) {
1393*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1394*0Sstevel@tonic-gate 		    cpr_default_path, rc);
1395*0Sstevel@tonic-gate 	} else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
1396*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
1397*0Sstevel@tonic-gate 		    "prom values for %s", cpr_default_path,
1398*0Sstevel@tonic-gate 		    cpr_enumerate_promprops(&bufp, &size));
1399*0Sstevel@tonic-gate 		kmem_free(bufp, size);
1400*0Sstevel@tonic-gate 		rc = EINVAL;
1401*0Sstevel@tonic-gate 	} else {
1402*0Sstevel@tonic-gate 		/*
1403*0Sstevel@tonic-gate 		 * clean up prom properties
1404*0Sstevel@tonic-gate 		 */
1405*0Sstevel@tonic-gate 		rc = cpr_update_nvram(cdef->props);
1406*0Sstevel@tonic-gate 		if (rc == 0) {
1407*0Sstevel@tonic-gate 			/*
1408*0Sstevel@tonic-gate 			 * invalidate the disk copy and turn off reusable
1409*0Sstevel@tonic-gate 			 */
1410*0Sstevel@tonic-gate 			cdef->mini.magic = 0;
1411*0Sstevel@tonic-gate 			cdef->mini.reusable = 0;
1412*0Sstevel@tonic-gate 			if (rc = cpr_rdwr(UIO_WRITE, vp,
1413*0Sstevel@tonic-gate 			    &cdef->mini, sizeof (cdef->mini))) {
1414*0Sstevel@tonic-gate 				cpr_err(CE_WARN, "Failed writing %s, errno %d",
1415*0Sstevel@tonic-gate 				    cpr_default_path, rc);
1416*0Sstevel@tonic-gate 			}
1417*0Sstevel@tonic-gate 		}
1418*0Sstevel@tonic-gate 	}
1419*0Sstevel@tonic-gate 
1420*0Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED());
1421*0Sstevel@tonic-gate 	VN_RELE(vp);
1422*0Sstevel@tonic-gate 	kmem_free(cdef, sizeof (*cdef));
1423*0Sstevel@tonic-gate 
1424*0Sstevel@tonic-gate 	return (rc);
1425*0Sstevel@tonic-gate }
1426*0Sstevel@tonic-gate 
1427*0Sstevel@tonic-gate 
1428*0Sstevel@tonic-gate int
1429*0Sstevel@tonic-gate i_cpr_reuseinit(void)
1430*0Sstevel@tonic-gate {
1431*0Sstevel@tonic-gate 	int rc = 0;
1432*0Sstevel@tonic-gate 
1433*0Sstevel@tonic-gate 	if (rc = cpr_default_setup(1))
1434*0Sstevel@tonic-gate 		return (rc);
1435*0Sstevel@tonic-gate 
1436*0Sstevel@tonic-gate 	/*
1437*0Sstevel@tonic-gate 	 * We need to validate default file
1438*0Sstevel@tonic-gate 	 */
1439*0Sstevel@tonic-gate 	rc = cpr_validate_definfo(1);
1440*0Sstevel@tonic-gate 	if (rc == 0)
1441*0Sstevel@tonic-gate 		cpr_reusable_mode = 1;
1442*0Sstevel@tonic-gate 	else if (rc == EROFS) {
1443*0Sstevel@tonic-gate 		cpr_err(CE_NOTE, "reuseinit must be performed "
1444*0Sstevel@tonic-gate 		    "while / is mounted writeable");
1445*0Sstevel@tonic-gate 	}
1446*0Sstevel@tonic-gate 
1447*0Sstevel@tonic-gate 	(void) cpr_default_setup(0);
1448*0Sstevel@tonic-gate 
1449*0Sstevel@tonic-gate 	return (rc);
1450*0Sstevel@tonic-gate }
1451*0Sstevel@tonic-gate 
1452*0Sstevel@tonic-gate 
1453*0Sstevel@tonic-gate int
1454*0Sstevel@tonic-gate i_cpr_check_cprinfo(void)
1455*0Sstevel@tonic-gate {
1456*0Sstevel@tonic-gate 	struct vnode *vp;
1457*0Sstevel@tonic-gate 	cmini_t mini;
1458*0Sstevel@tonic-gate 	int rc = 0;
1459*0Sstevel@tonic-gate 
1460*0Sstevel@tonic-gate 	if (rc = cpr_open_deffile(FREAD, &vp)) {
1461*0Sstevel@tonic-gate 		if (rc == ENOENT)
1462*0Sstevel@tonic-gate 			cpr_err(CE_NOTE, "cprinfo file does not "
1463*0Sstevel@tonic-gate 			    "exist.  You must run 'uadmin %d %d' "
1464*0Sstevel@tonic-gate 			    "command while / is mounted writeable,\n"
1465*0Sstevel@tonic-gate 			    "then reboot and run 'uadmin %d %d' "
1466*0Sstevel@tonic-gate 			    "to create a reusable statefile",
1467*0Sstevel@tonic-gate 			    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1468*0Sstevel@tonic-gate 		return (rc);
1469*0Sstevel@tonic-gate 	}
1470*0Sstevel@tonic-gate 
1471*0Sstevel@tonic-gate 	rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
1472*0Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED());
1473*0Sstevel@tonic-gate 	VN_RELE(vp);
1474*0Sstevel@tonic-gate 
1475*0Sstevel@tonic-gate 	if (rc) {
1476*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1477*0Sstevel@tonic-gate 		    cpr_default_path, rc);
1478*0Sstevel@tonic-gate 	} else if (mini.magic != CPR_DEFAULT_MAGIC) {
1479*0Sstevel@tonic-gate 		cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
1480*0Sstevel@tonic-gate 		    "You must run 'uadmin %d %d' while / is mounted "
1481*0Sstevel@tonic-gate 		    "writeable, then reboot and run 'uadmin %d %d' "
1482*0Sstevel@tonic-gate 		    "to create a reusable statefile\n",
1483*0Sstevel@tonic-gate 		    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1484*0Sstevel@tonic-gate 		rc = EINVAL;
1485*0Sstevel@tonic-gate 	}
1486*0Sstevel@tonic-gate 
1487*0Sstevel@tonic-gate 	return (rc);
1488*0Sstevel@tonic-gate }
1489*0Sstevel@tonic-gate 
1490*0Sstevel@tonic-gate 
1491*0Sstevel@tonic-gate int
1492*0Sstevel@tonic-gate i_cpr_reusable_supported(void)
1493*0Sstevel@tonic-gate {
1494*0Sstevel@tonic-gate 	return (1);
1495*0Sstevel@tonic-gate }
1496*0Sstevel@tonic-gate 
1497*0Sstevel@tonic-gate 
1498*0Sstevel@tonic-gate /*
1499*0Sstevel@tonic-gate  * find prom phys pages and alloc space for a tmp copy
1500*0Sstevel@tonic-gate  */
1501*0Sstevel@tonic-gate static int
1502*0Sstevel@tonic-gate i_cpr_find_ppages(void)
1503*0Sstevel@tonic-gate {
1504*0Sstevel@tonic-gate 	extern struct vnode prom_ppages;
1505*0Sstevel@tonic-gate 	struct page *pp;
1506*0Sstevel@tonic-gate 	struct memlist *pmem;
1507*0Sstevel@tonic-gate 	pgcnt_t npages, pcnt, scnt, vcnt;
1508*0Sstevel@tonic-gate 	pfn_t ppn, plast, *dst;
1509*0Sstevel@tonic-gate 	int mapflag;
1510*0Sstevel@tonic-gate 
1511*0Sstevel@tonic-gate 	cpr_clear_bitmaps();
1512*0Sstevel@tonic-gate 	mapflag = REGULAR_BITMAP;
1513*0Sstevel@tonic-gate 
1514*0Sstevel@tonic-gate 	/*
1515*0Sstevel@tonic-gate 	 * there should be a page_t for each phys page used by the kernel;
1516*0Sstevel@tonic-gate 	 * set a bit for each phys page not tracked by a page_t
1517*0Sstevel@tonic-gate 	 */
1518*0Sstevel@tonic-gate 	pcnt = 0;
1519*0Sstevel@tonic-gate 	memlist_read_lock();
1520*0Sstevel@tonic-gate 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1521*0Sstevel@tonic-gate 		npages = mmu_btop(pmem->size);
1522*0Sstevel@tonic-gate 		ppn = mmu_btop(pmem->address);
1523*0Sstevel@tonic-gate 		for (plast = ppn + npages; ppn < plast; ppn++) {
1524*0Sstevel@tonic-gate 			if (page_numtopp_nolock(ppn))
1525*0Sstevel@tonic-gate 				continue;
1526*0Sstevel@tonic-gate 			(void) cpr_setbit(ppn, mapflag);
1527*0Sstevel@tonic-gate 			pcnt++;
1528*0Sstevel@tonic-gate 		}
1529*0Sstevel@tonic-gate 	}
1530*0Sstevel@tonic-gate 	memlist_read_unlock();
1531*0Sstevel@tonic-gate 
1532*0Sstevel@tonic-gate 	/*
1533*0Sstevel@tonic-gate 	 * clear bits for phys pages in each segment
1534*0Sstevel@tonic-gate 	 */
1535*0Sstevel@tonic-gate 	scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);
1536*0Sstevel@tonic-gate 
1537*0Sstevel@tonic-gate 	/*
1538*0Sstevel@tonic-gate 	 * set bits for phys pages referenced by the prom_ppages vnode;
1539*0Sstevel@tonic-gate 	 * these pages are mostly comprised of forthdebug words
1540*0Sstevel@tonic-gate 	 */
1541*0Sstevel@tonic-gate 	vcnt = 0;
1542*0Sstevel@tonic-gate 	for (pp = prom_ppages.v_pages; pp; ) {
1543*0Sstevel@tonic-gate 		if (cpr_setbit(pp->p_offset, mapflag) == 0)
1544*0Sstevel@tonic-gate 			vcnt++;
1545*0Sstevel@tonic-gate 		pp = pp->p_vpnext;
1546*0Sstevel@tonic-gate 		if (pp == prom_ppages.v_pages)
1547*0Sstevel@tonic-gate 			break;
1548*0Sstevel@tonic-gate 	}
1549*0Sstevel@tonic-gate 
1550*0Sstevel@tonic-gate 	/*
1551*0Sstevel@tonic-gate 	 * total number of prom pages are:
1552*0Sstevel@tonic-gate 	 * (non-page_t pages - seg pages + vnode pages)
1553*0Sstevel@tonic-gate 	 */
1554*0Sstevel@tonic-gate 	ppage_count = pcnt - scnt + vcnt;
1555*0Sstevel@tonic-gate 	DEBUG1(errp("find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1556*0Sstevel@tonic-gate 	    pcnt, scnt, vcnt, ppage_count));
1557*0Sstevel@tonic-gate 
1558*0Sstevel@tonic-gate 	/*
1559*0Sstevel@tonic-gate 	 * alloc array of pfn_t to store phys page list
1560*0Sstevel@tonic-gate 	 */
1561*0Sstevel@tonic-gate 	pphys_list_size = ppage_count * sizeof (pfn_t);
1562*0Sstevel@tonic-gate 	pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
1563*0Sstevel@tonic-gate 	if (pphys_list == NULL) {
1564*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "cannot alloc pphys_list");
1565*0Sstevel@tonic-gate 		return (ENOMEM);
1566*0Sstevel@tonic-gate 	}
1567*0Sstevel@tonic-gate 
1568*0Sstevel@tonic-gate 	/*
1569*0Sstevel@tonic-gate 	 * phys pages referenced in the bitmap should be
1570*0Sstevel@tonic-gate 	 * those used by the prom; scan bitmap and save
1571*0Sstevel@tonic-gate 	 * a list of prom phys page numbers
1572*0Sstevel@tonic-gate 	 */
1573*0Sstevel@tonic-gate 	dst = pphys_list;
1574*0Sstevel@tonic-gate 	memlist_read_lock();
1575*0Sstevel@tonic-gate 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1576*0Sstevel@tonic-gate 		npages = mmu_btop(pmem->size);
1577*0Sstevel@tonic-gate 		ppn = mmu_btop(pmem->address);
1578*0Sstevel@tonic-gate 		for (plast = ppn + npages; ppn < plast; ppn++) {
1579*0Sstevel@tonic-gate 			if (cpr_isset(ppn, mapflag)) {
1580*0Sstevel@tonic-gate 				ASSERT(dst < (pphys_list + ppage_count));
1581*0Sstevel@tonic-gate 				*dst++ = ppn;
1582*0Sstevel@tonic-gate 			}
1583*0Sstevel@tonic-gate 		}
1584*0Sstevel@tonic-gate 	}
1585*0Sstevel@tonic-gate 	memlist_read_unlock();
1586*0Sstevel@tonic-gate 
1587*0Sstevel@tonic-gate 	/*
1588*0Sstevel@tonic-gate 	 * allocate space to store prom pages
1589*0Sstevel@tonic-gate 	 */
1590*0Sstevel@tonic-gate 	ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
1591*0Sstevel@tonic-gate 	if (ppage_buf == NULL) {
1592*0Sstevel@tonic-gate 		kmem_free(pphys_list, pphys_list_size);
1593*0Sstevel@tonic-gate 		pphys_list = NULL;
1594*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "cannot alloc ppage_buf");
1595*0Sstevel@tonic-gate 		return (ENOMEM);
1596*0Sstevel@tonic-gate 	}
1597*0Sstevel@tonic-gate 
1598*0Sstevel@tonic-gate 	return (0);
1599*0Sstevel@tonic-gate }
1600*0Sstevel@tonic-gate 
1601*0Sstevel@tonic-gate 
1602*0Sstevel@tonic-gate /*
1603*0Sstevel@tonic-gate  * save prom pages to kmem pages
1604*0Sstevel@tonic-gate  */
1605*0Sstevel@tonic-gate static void
1606*0Sstevel@tonic-gate i_cpr_save_ppages(void)
1607*0Sstevel@tonic-gate {
1608*0Sstevel@tonic-gate 	pfn_t *pphys, *plast;
1609*0Sstevel@tonic-gate 	caddr_t dst;
1610*0Sstevel@tonic-gate 
1611*0Sstevel@tonic-gate 	/*
1612*0Sstevel@tonic-gate 	 * map in each prom page and copy to a kmem page
1613*0Sstevel@tonic-gate 	 */
1614*0Sstevel@tonic-gate 	dst = ppage_buf;
1615*0Sstevel@tonic-gate 	plast = pphys_list + ppage_count;
1616*0Sstevel@tonic-gate 	for (pphys = pphys_list; pphys < plast; pphys++) {
1617*0Sstevel@tonic-gate 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1618*0Sstevel@tonic-gate 		bcopy(cpr_vaddr, dst, MMU_PAGESIZE);
1619*0Sstevel@tonic-gate 		i_cpr_mapout(cpr_vaddr, 1);
1620*0Sstevel@tonic-gate 		dst += MMU_PAGESIZE;
1621*0Sstevel@tonic-gate 	}
1622*0Sstevel@tonic-gate 
1623*0Sstevel@tonic-gate 	DEBUG1(errp("saved %d prom pages\n", ppage_count));
1624*0Sstevel@tonic-gate }
1625*0Sstevel@tonic-gate 
1626*0Sstevel@tonic-gate 
1627*0Sstevel@tonic-gate /*
1628*0Sstevel@tonic-gate  * restore prom pages from kmem pages
1629*0Sstevel@tonic-gate  */
1630*0Sstevel@tonic-gate static void
1631*0Sstevel@tonic-gate i_cpr_restore_ppages(void)
1632*0Sstevel@tonic-gate {
1633*0Sstevel@tonic-gate 	pfn_t *pphys, *plast;
1634*0Sstevel@tonic-gate 	caddr_t src;
1635*0Sstevel@tonic-gate 
1636*0Sstevel@tonic-gate 	dcache_flushall();
1637*0Sstevel@tonic-gate 
1638*0Sstevel@tonic-gate 	/*
1639*0Sstevel@tonic-gate 	 * map in each prom page and copy from a kmem page
1640*0Sstevel@tonic-gate 	 */
1641*0Sstevel@tonic-gate 	src = ppage_buf;
1642*0Sstevel@tonic-gate 	plast = pphys_list + ppage_count;
1643*0Sstevel@tonic-gate 	for (pphys = pphys_list; pphys < plast; pphys++) {
1644*0Sstevel@tonic-gate 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1645*0Sstevel@tonic-gate 		bcopy(src, cpr_vaddr, MMU_PAGESIZE);
1646*0Sstevel@tonic-gate 		i_cpr_mapout(cpr_vaddr, 1);
1647*0Sstevel@tonic-gate 		src += MMU_PAGESIZE;
1648*0Sstevel@tonic-gate 	}
1649*0Sstevel@tonic-gate 
1650*0Sstevel@tonic-gate 	dcache_flushall();
1651*0Sstevel@tonic-gate 
1652*0Sstevel@tonic-gate 	DEBUG1(errp("restored %d prom pages\n", ppage_count));
1653*0Sstevel@tonic-gate }
1654*0Sstevel@tonic-gate 
1655*0Sstevel@tonic-gate 
1656*0Sstevel@tonic-gate /*
1657*0Sstevel@tonic-gate  * save/restore prom pages or free related allocs
1658*0Sstevel@tonic-gate  */
1659*0Sstevel@tonic-gate int
1660*0Sstevel@tonic-gate i_cpr_prom_pages(int action)
1661*0Sstevel@tonic-gate {
1662*0Sstevel@tonic-gate 	int error;
1663*0Sstevel@tonic-gate 
1664*0Sstevel@tonic-gate 	if (action == CPR_PROM_SAVE) {
1665*0Sstevel@tonic-gate 		if (ppage_buf == NULL) {
1666*0Sstevel@tonic-gate 			ASSERT(pphys_list == NULL);
1667*0Sstevel@tonic-gate 			if (error = i_cpr_find_ppages())
1668*0Sstevel@tonic-gate 				return (error);
1669*0Sstevel@tonic-gate 			i_cpr_save_ppages();
1670*0Sstevel@tonic-gate 		}
1671*0Sstevel@tonic-gate 	} else if (action == CPR_PROM_RESTORE) {
1672*0Sstevel@tonic-gate 		i_cpr_restore_ppages();
1673*0Sstevel@tonic-gate 	} else if (action == CPR_PROM_FREE) {
1674*0Sstevel@tonic-gate 		if (pphys_list) {
1675*0Sstevel@tonic-gate 			ASSERT(pphys_list_size);
1676*0Sstevel@tonic-gate 			kmem_free(pphys_list, pphys_list_size);
1677*0Sstevel@tonic-gate 			pphys_list = NULL;
1678*0Sstevel@tonic-gate 			pphys_list_size = 0;
1679*0Sstevel@tonic-gate 		}
1680*0Sstevel@tonic-gate 		if (ppage_buf) {
1681*0Sstevel@tonic-gate 			ASSERT(ppage_count);
1682*0Sstevel@tonic-gate 			kmem_free(ppage_buf, mmu_ptob(ppage_count));
1683*0Sstevel@tonic-gate 			DEBUG1(errp("freed %d prom pages\n", ppage_count));
1684*0Sstevel@tonic-gate 			ppage_buf = NULL;
1685*0Sstevel@tonic-gate 			ppage_count = 0;
1686*0Sstevel@tonic-gate 		}
1687*0Sstevel@tonic-gate 	}
1688*0Sstevel@tonic-gate 	return (0);
1689*0Sstevel@tonic-gate }
1690*0Sstevel@tonic-gate 
1691*0Sstevel@tonic-gate 
1692*0Sstevel@tonic-gate /*
1693*0Sstevel@tonic-gate  * record tlb data for the nucleus, bigktsb's, and the cpr module;
1694*0Sstevel@tonic-gate  * this data is later used by cprboot to install dtlb/itlb entries.
1695*0Sstevel@tonic-gate  * when we jump into the cpr module during the resume phase, those
1696*0Sstevel@tonic-gate  * mappings are needed until switching to the kernel trap table.
1697*0Sstevel@tonic-gate  * to make the dtte/itte info available during resume, we need
1698*0Sstevel@tonic-gate  * the info recorded prior to saving sensitive pages, otherwise
1699*0Sstevel@tonic-gate  * all the data would appear as NULLs.
1700*0Sstevel@tonic-gate  */
1701*0Sstevel@tonic-gate static void
1702*0Sstevel@tonic-gate i_cpr_save_tlbinfo(void)
1703*0Sstevel@tonic-gate {
1704*0Sstevel@tonic-gate 	cti_t cti;
1705*0Sstevel@tonic-gate 
1706*0Sstevel@tonic-gate 	/*
1707*0Sstevel@tonic-gate 	 * during resume - shortly after jumping into the cpr module,
1708*0Sstevel@tonic-gate 	 * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1709*0Sstevel@tonic-gate 	 * index used for TSBs; skip is set so that any saved tte will
1710*0Sstevel@tonic-gate 	 * target other tlb offsets and prevent being lost during
1711*0Sstevel@tonic-gate 	 * resume.  now scan the dtlb and save locked entries,
1712*0Sstevel@tonic-gate 	 * then add entries for the tmp stack / data page and the
1713*0Sstevel@tonic-gate 	 * cpr thread structure.
1714*0Sstevel@tonic-gate 	 */
1715*0Sstevel@tonic-gate 	cti.dst = m_info.dtte;
1716*0Sstevel@tonic-gate 	cti.tail = cti.dst + CPR_MAX_TLB;
1717*0Sstevel@tonic-gate 	cti.reader = dtlb_rd_entry;
1718*0Sstevel@tonic-gate 	cti.writer = NULL;
1719*0Sstevel@tonic-gate 	cti.filter = i_cpr_lnb;
1720*0Sstevel@tonic-gate 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
1721*0Sstevel@tonic-gate 	cti.skip = (1 << utsb_dtlb_ttenum);
1722*0Sstevel@tonic-gate 	cti.skip |= (1 << utsb4m_dtlb_ttenum);
1723*0Sstevel@tonic-gate 	i_cpr_scan_tlb(&cti);
1724*0Sstevel@tonic-gate 	i_cpr_make_tte(&cti, &i_cpr_data_page, datava);
1725*0Sstevel@tonic-gate 	i_cpr_make_tte(&cti, curthread, datava);
1726*0Sstevel@tonic-gate 
1727*0Sstevel@tonic-gate 	/*
1728*0Sstevel@tonic-gate 	 * scan itlb and save locked entries; add an entry for
1729*0Sstevel@tonic-gate 	 * the first text page of the cpr module; cprboot will
1730*0Sstevel@tonic-gate 	 * jump to that page after restoring kernel pages.
1731*0Sstevel@tonic-gate 	 */
1732*0Sstevel@tonic-gate 	cti.dst = m_info.itte;
1733*0Sstevel@tonic-gate 	cti.tail = cti.dst + CPR_MAX_TLB;
1734*0Sstevel@tonic-gate 	cti.reader = itlb_rd_entry;
1735*0Sstevel@tonic-gate 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
1736*0Sstevel@tonic-gate 	cti.skip = 0;
1737*0Sstevel@tonic-gate 	i_cpr_scan_tlb(&cti);
1738*0Sstevel@tonic-gate 	i_cpr_make_tte(&cti, (void *)i_cpr_resume_setup, textva);
1739*0Sstevel@tonic-gate }
1740*0Sstevel@tonic-gate 
1741*0Sstevel@tonic-gate 
1742*0Sstevel@tonic-gate /* ARGSUSED */
1743*0Sstevel@tonic-gate int
1744*0Sstevel@tonic-gate i_cpr_dump_setup(vnode_t *vp)
1745*0Sstevel@tonic-gate {
1746*0Sstevel@tonic-gate 	/*
1747*0Sstevel@tonic-gate 	 * zero out m_info and add info to dtte/itte arrays
1748*0Sstevel@tonic-gate 	 */
1749*0Sstevel@tonic-gate 	bzero(&m_info, sizeof (m_info));
1750*0Sstevel@tonic-gate 	i_cpr_save_tlbinfo();
1751*0Sstevel@tonic-gate 	return (0);
1752*0Sstevel@tonic-gate }
1753*0Sstevel@tonic-gate 
1754*0Sstevel@tonic-gate 
1755*0Sstevel@tonic-gate int
1756*0Sstevel@tonic-gate i_cpr_is_supported(void)
1757*0Sstevel@tonic-gate {
1758*0Sstevel@tonic-gate 	char es_prop[] = "energystar-v2";
1759*0Sstevel@tonic-gate 	dnode_t node;
1760*0Sstevel@tonic-gate 	int last;
1761*0Sstevel@tonic-gate 	extern int cpr_supported_override;
1762*0Sstevel@tonic-gate 	extern int cpr_platform_enable;
1763*0Sstevel@tonic-gate 
1764*0Sstevel@tonic-gate 	/*
1765*0Sstevel@tonic-gate 	 * The next statement tests if a specific platform has turned off
1766*0Sstevel@tonic-gate 	 * cpr support.
1767*0Sstevel@tonic-gate 	 */
1768*0Sstevel@tonic-gate 	if (cpr_supported_override)
1769*0Sstevel@tonic-gate 		return (0);
1770*0Sstevel@tonic-gate 
1771*0Sstevel@tonic-gate 	/*
1772*0Sstevel@tonic-gate 	 * Do not inspect energystar-v* property if a platform has
1773*0Sstevel@tonic-gate 	 * specifically turned on cpr support
1774*0Sstevel@tonic-gate 	 */
1775*0Sstevel@tonic-gate 	if (cpr_platform_enable)
1776*0Sstevel@tonic-gate 		return (1);
1777*0Sstevel@tonic-gate 
1778*0Sstevel@tonic-gate 	node = prom_rootnode();
1779*0Sstevel@tonic-gate 	if (prom_getproplen(node, es_prop) != -1)
1780*0Sstevel@tonic-gate 		return (1);
1781*0Sstevel@tonic-gate 	last = strlen(es_prop) - 1;
1782*0Sstevel@tonic-gate 	es_prop[last] = '3';
1783*0Sstevel@tonic-gate 	return (prom_getproplen(node, es_prop) != -1);
1784*0Sstevel@tonic-gate }
1785*0Sstevel@tonic-gate 
1786*0Sstevel@tonic-gate 
1787*0Sstevel@tonic-gate /*
1788*0Sstevel@tonic-gate  * the actual size of the statefile data isn't known until after all the
1789*0Sstevel@tonic-gate  * compressed pages are written; even the inode size doesn't reflect the
1790*0Sstevel@tonic-gate  * data size since there are usually many extra fs blocks.  for recording
1791*0Sstevel@tonic-gate  * the actual data size, the first sector of the statefile is copied to
1792*0Sstevel@tonic-gate  * a tmp buf, and the copy is later updated and flushed to disk.
1793*0Sstevel@tonic-gate  */
1794*0Sstevel@tonic-gate int
1795*0Sstevel@tonic-gate i_cpr_blockzero(char *base, char **bufpp, int *blkno, vnode_t *vp)
1796*0Sstevel@tonic-gate {
1797*0Sstevel@tonic-gate 	extern int cpr_flush_write(vnode_t *);
1798*0Sstevel@tonic-gate 	static char cpr_sector[DEV_BSIZE];
1799*0Sstevel@tonic-gate 	cpr_ext bytes, *dst;
1800*0Sstevel@tonic-gate 
1801*0Sstevel@tonic-gate 	/*
1802*0Sstevel@tonic-gate 	 * this routine is called after cdd_t and csu_md_t are copied
1803*0Sstevel@tonic-gate 	 * to cpr_buf; mini-hack alert: the save/update method creates
1804*0Sstevel@tonic-gate 	 * a dependency on the combined struct size being >= one sector
1805*0Sstevel@tonic-gate 	 * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1806*0Sstevel@tonic-gate 	 * over 1K bytes and will probably grow with any changes.
1807*0Sstevel@tonic-gate 	 *
1808*0Sstevel@tonic-gate 	 * copy when vp is NULL, flush when non-NULL
1809*0Sstevel@tonic-gate 	 */
1810*0Sstevel@tonic-gate 	if (vp == NULL) {
1811*0Sstevel@tonic-gate 		ASSERT((*bufpp - base) >= DEV_BSIZE);
1812*0Sstevel@tonic-gate 		bcopy(base, cpr_sector, sizeof (cpr_sector));
1813*0Sstevel@tonic-gate 		return (0);
1814*0Sstevel@tonic-gate 	} else {
1815*0Sstevel@tonic-gate 		bytes = dbtob(*blkno);
1816*0Sstevel@tonic-gate 		dst = &((cdd_t *)cpr_sector)->cdd_filesize;
1817*0Sstevel@tonic-gate 		bcopy(&bytes, dst, sizeof (bytes));
1818*0Sstevel@tonic-gate 		bcopy(cpr_sector, base, sizeof (cpr_sector));
1819*0Sstevel@tonic-gate 		*bufpp = base + sizeof (cpr_sector);
1820*0Sstevel@tonic-gate 		*blkno = cpr_statefile_offset();
1821*0Sstevel@tonic-gate 		DEBUG1(errp("statefile data size: %lld\n\n", bytes));
1822*0Sstevel@tonic-gate 		return (cpr_flush_write(vp));
1823*0Sstevel@tonic-gate 	}
1824*0Sstevel@tonic-gate }
1825*0Sstevel@tonic-gate 
1826*0Sstevel@tonic-gate 
1827*0Sstevel@tonic-gate /*
1828*0Sstevel@tonic-gate  * Allocate bitmaps according to the phys_install list.
1829*0Sstevel@tonic-gate  */
1830*0Sstevel@tonic-gate static int
1831*0Sstevel@tonic-gate i_cpr_bitmap_setup(void)
1832*0Sstevel@tonic-gate {
1833*0Sstevel@tonic-gate 	struct memlist *pmem;
1834*0Sstevel@tonic-gate 	cbd_t *dp, *tail;
1835*0Sstevel@tonic-gate 	void *space;
1836*0Sstevel@tonic-gate 	size_t size;
1837*0Sstevel@tonic-gate 
1838*0Sstevel@tonic-gate 	/*
1839*0Sstevel@tonic-gate 	 * The number of bitmap descriptors will be the count of
1840*0Sstevel@tonic-gate 	 * phys_install ranges plus 1 for a trailing NULL struct.
1841*0Sstevel@tonic-gate 	 */
1842*0Sstevel@tonic-gate 	cpr_nbitmaps = 1;
1843*0Sstevel@tonic-gate 	for (pmem = phys_install; pmem; pmem = pmem->next)
1844*0Sstevel@tonic-gate 		cpr_nbitmaps++;
1845*0Sstevel@tonic-gate 
1846*0Sstevel@tonic-gate 	if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
1847*0Sstevel@tonic-gate 		cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
1848*0Sstevel@tonic-gate 		    cpr_nbitmaps, CPR_MAX_BMDESC - 1);
1849*0Sstevel@tonic-gate 		return (EFBIG);
1850*0Sstevel@tonic-gate 	}
1851*0Sstevel@tonic-gate 
1852*0Sstevel@tonic-gate 	/* Alloc an array of bitmap descriptors. */
1853*0Sstevel@tonic-gate 	dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
1854*0Sstevel@tonic-gate 	if (dp == NULL) {
1855*0Sstevel@tonic-gate 		cpr_nbitmaps = 0;
1856*0Sstevel@tonic-gate 		return (ENOMEM);
1857*0Sstevel@tonic-gate 	}
1858*0Sstevel@tonic-gate 	tail = dp + cpr_nbitmaps;
1859*0Sstevel@tonic-gate 
1860*0Sstevel@tonic-gate 	CPR->c_bmda = dp;
1861*0Sstevel@tonic-gate 	for (pmem = phys_install; pmem; pmem = pmem->next) {
1862*0Sstevel@tonic-gate 		size = BITMAP_BYTES(pmem->size);
1863*0Sstevel@tonic-gate 		space = kmem_zalloc(size * 2, KM_NOSLEEP);
1864*0Sstevel@tonic-gate 		if (space == NULL)
1865*0Sstevel@tonic-gate 			return (ENOMEM);
1866*0Sstevel@tonic-gate 		ASSERT(dp < tail);
1867*0Sstevel@tonic-gate 		dp->cbd_magic = CPR_BITMAP_MAGIC;
1868*0Sstevel@tonic-gate 		dp->cbd_spfn = mmu_btop(pmem->address);
1869*0Sstevel@tonic-gate 		dp->cbd_epfn = mmu_btop(pmem->address + pmem->size) - 1;
1870*0Sstevel@tonic-gate 		dp->cbd_size = size;
1871*0Sstevel@tonic-gate 		dp->cbd_reg_bitmap = (cpr_ptr)space;
1872*0Sstevel@tonic-gate 		dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
1873*0Sstevel@tonic-gate 		dp++;
1874*0Sstevel@tonic-gate 	}
1875*0Sstevel@tonic-gate 
1876*0Sstevel@tonic-gate 	/* set magic for the last descriptor */
1877*0Sstevel@tonic-gate 	ASSERT(dp == (tail - 1));
1878*0Sstevel@tonic-gate 	dp->cbd_magic = CPR_BITMAP_MAGIC;
1879*0Sstevel@tonic-gate 
1880*0Sstevel@tonic-gate 	return (0);
1881*0Sstevel@tonic-gate }
1882*0Sstevel@tonic-gate 
1883*0Sstevel@tonic-gate 
1884*0Sstevel@tonic-gate void
1885*0Sstevel@tonic-gate i_cpr_bitmap_cleanup(void)
1886*0Sstevel@tonic-gate {
1887*0Sstevel@tonic-gate 	cbd_t *dp;
1888*0Sstevel@tonic-gate 
1889*0Sstevel@tonic-gate 	if (CPR->c_bmda == NULL)
1890*0Sstevel@tonic-gate 		return;
1891*0Sstevel@tonic-gate 	for (dp = CPR->c_bmda; dp->cbd_size; dp++)
1892*0Sstevel@tonic-gate 		kmem_free((void *)dp->cbd_reg_bitmap, dp->cbd_size * 2);
1893*0Sstevel@tonic-gate 	kmem_free(CPR->c_bmda, cpr_nbitmaps * sizeof (*CPR->c_bmda));
1894*0Sstevel@tonic-gate 	CPR->c_bmda = NULL;
1895*0Sstevel@tonic-gate 	cpr_nbitmaps = 0;
1896*0Sstevel@tonic-gate }
1897*0Sstevel@tonic-gate 
1898*0Sstevel@tonic-gate 
1899*0Sstevel@tonic-gate /*
1900*0Sstevel@tonic-gate  * A "regular" and "volatile" bitmap are created for each range of
1901*0Sstevel@tonic-gate  * physical memory.  The volatile maps are used to count and track pages
1902*0Sstevel@tonic-gate  * susceptible to heap corruption - caused by drivers that allocate mem
1903*0Sstevel@tonic-gate  * during VOP_DUMP(); the regular maps are used for all the other non-
1904*0Sstevel@tonic-gate  * susceptible pages.  Before writing the bitmaps to the statefile,
1905*0Sstevel@tonic-gate  * each bitmap pair gets merged to simplify handling within cprboot.
1906*0Sstevel@tonic-gate  */
1907*0Sstevel@tonic-gate int
1908*0Sstevel@tonic-gate i_cpr_alloc_bitmaps(void)
1909*0Sstevel@tonic-gate {
1910*0Sstevel@tonic-gate 	int err;
1911*0Sstevel@tonic-gate 
1912*0Sstevel@tonic-gate 	memlist_read_lock();
1913*0Sstevel@tonic-gate 	err = i_cpr_bitmap_setup();
1914*0Sstevel@tonic-gate 	memlist_read_unlock();
1915*0Sstevel@tonic-gate 	if (err)
1916*0Sstevel@tonic-gate 		i_cpr_bitmap_cleanup();
1917*0Sstevel@tonic-gate 	return (err);
1918*0Sstevel@tonic-gate }
1919