xref: /onnv-gate/usr/src/uts/i86pc/os/memscrub.c (revision 286:4b8792eeb005)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*286Sdmick  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * i86pc Memory Scrubbing
310Sstevel@tonic-gate  *
320Sstevel@tonic-gate  * On detection of a correctable memory ECC error, the i86pc hardware
330Sstevel@tonic-gate  * returns the corrected data to the requester and may re-write it
340Sstevel@tonic-gate  * to memory (DRAM or NVRAM). Machines which do not re-write this to
350Sstevel@tonic-gate  * memory should add an NMI handler to correct and rewrite.
360Sstevel@tonic-gate  *
370Sstevel@tonic-gate  * Scrubbing thus reduces the likelyhood that multiple transient errors
380Sstevel@tonic-gate  * will occur in the same memory word, making uncorrectable errors due
390Sstevel@tonic-gate  * to transients less likely.
400Sstevel@tonic-gate  *
410Sstevel@tonic-gate  * Thus is born the desire that every memory location be periodically
420Sstevel@tonic-gate  * accessed.
430Sstevel@tonic-gate  *
440Sstevel@tonic-gate  * This file implements a memory scrubbing thread.  This scrubber
450Sstevel@tonic-gate  * guarantees that all of physical memory is accessed periodically
460Sstevel@tonic-gate  * (memscrub_period_sec -- 12 hours).
470Sstevel@tonic-gate  *
480Sstevel@tonic-gate  * It attempts to do this as unobtrusively as possible.  The thread
490Sstevel@tonic-gate  * schedules itself to wake up at an interval such that if it reads
500Sstevel@tonic-gate  * memscrub_span_pages (4MB) on each wakeup, it will read all of physical
510Sstevel@tonic-gate  * memory in in memscrub_period_sec (12 hours).
520Sstevel@tonic-gate  *
530Sstevel@tonic-gate  * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200).
540Sstevel@tonic-gate  * When it completes a span, if all the CPUs are idle, it reads another span.
550Sstevel@tonic-gate  * Typically it soaks up idle time this way to reach its deadline early
560Sstevel@tonic-gate  * -- and sleeps until the next period begins.
570Sstevel@tonic-gate  *
580Sstevel@tonic-gate  * Maximal Cost Estimate:  8GB @ xxMB/s = xxx seconds spent in 640 wakeups
590Sstevel@tonic-gate  * that run for 0.15 seconds at intervals of 67 seconds.
600Sstevel@tonic-gate  *
610Sstevel@tonic-gate  * In practice, the scrubber finds enough idle time to finish in a few
620Sstevel@tonic-gate  * minutes, and sleeps until its 12 hour deadline.
630Sstevel@tonic-gate  *
640Sstevel@tonic-gate  * The scrubber maintains a private copy of the phys_install memory list
650Sstevel@tonic-gate  * to keep track of what memory should be scrubbed.
660Sstevel@tonic-gate  *
670Sstevel@tonic-gate  * The following parameters can be set via /etc/system
680Sstevel@tonic-gate  *
690Sstevel@tonic-gate  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB)
700Sstevel@tonic-gate  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
710Sstevel@tonic-gate  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0)
720Sstevel@tonic-gate  * memscrub_delay_start_sec = (10 seconds)
730Sstevel@tonic-gate  * disable_memscrub = (0)
740Sstevel@tonic-gate  *
750Sstevel@tonic-gate  * the scrubber will exit (or never be started) if it finds the variable
760Sstevel@tonic-gate  * "disable_memscrub" set.
770Sstevel@tonic-gate  *
780Sstevel@tonic-gate  * MEMSCRUB_DFL_SPAN_PAGES  is based on the guess that 0.15 sec
790Sstevel@tonic-gate  * is a "good" amount of minimum time for the thread to run at a time.
800Sstevel@tonic-gate  *
810Sstevel@tonic-gate  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
820Sstevel@tonic-gate  * twice the frequency the hardware folk estimated would be necessary.
830Sstevel@tonic-gate  *
840Sstevel@tonic-gate  * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly
850Sstevel@tonic-gate  * any other use of the system should be higher priority than scrubbing.
860Sstevel@tonic-gate  */
870Sstevel@tonic-gate 
880Sstevel@tonic-gate #include <sys/types.h>
890Sstevel@tonic-gate #include <sys/systm.h>		/* timeout, types, t_lock */
900Sstevel@tonic-gate #include <sys/cmn_err.h>
910Sstevel@tonic-gate #include <sys/sysmacros.h>	/* MIN */
920Sstevel@tonic-gate #include <sys/memlist.h>	/* memlist */
930Sstevel@tonic-gate #include <sys/kmem.h>		/* KMEM_NOSLEEP */
940Sstevel@tonic-gate #include <sys/cpuvar.h>		/* ncpus_online */
950Sstevel@tonic-gate #include <sys/debug.h>		/* ASSERTs */
960Sstevel@tonic-gate #include <sys/vmem.h>
970Sstevel@tonic-gate #include <sys/mman.h>
980Sstevel@tonic-gate #include <vm/seg_kmem.h>
990Sstevel@tonic-gate #include <vm/seg_kpm.h>
1000Sstevel@tonic-gate #include <vm/hat_i86.h>
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate static caddr_t	memscrub_window;
1030Sstevel@tonic-gate static void	*memscrub_pte;
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate /*
1060Sstevel@tonic-gate  * Global Data:
1070Sstevel@tonic-gate  */
1080Sstevel@tonic-gate /*
1090Sstevel@tonic-gate  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
1100Sstevel@tonic-gate  */
1110Sstevel@tonic-gate #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate /*
1140Sstevel@tonic-gate  * start only if at least MEMSCRUB_MIN_PAGES in system
1150Sstevel@tonic-gate  */
1160Sstevel@tonic-gate #define	MEMSCRUB_MIN_PAGES	((32 * 1024 * 1024) / PAGESIZE)
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate /*
1190Sstevel@tonic-gate  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
1200Sstevel@tonic-gate  */
1210Sstevel@tonic-gate #define	MEMSCRUB_DFL_SPAN_PAGES	((4 * 1024 * 1024) / PAGESIZE)
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate  * almost anything is higher priority than scrubbing
1250Sstevel@tonic-gate  */
1260Sstevel@tonic-gate #define	MEMSCRUB_DFL_THREAD_PRI	0
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate /*
1290Sstevel@tonic-gate  * we can patch these defaults in /etc/system if necessary
1300Sstevel@tonic-gate  */
1310Sstevel@tonic-gate uint_t disable_memscrub = 0;
1320Sstevel@tonic-gate pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES;
1330Sstevel@tonic-gate pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
1340Sstevel@tonic-gate time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
1350Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
1360Sstevel@tonic-gate time_t memscrub_delay_start_sec = 10;
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate /*
1390Sstevel@tonic-gate  * Static Routines
1400Sstevel@tonic-gate  */
1410Sstevel@tonic-gate static void memscrubber(void);
1420Sstevel@tonic-gate static int system_is_idle(void);
1430Sstevel@tonic-gate static int memscrub_add_span(uint64_t, uint64_t);
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate /*
1460Sstevel@tonic-gate  * Static Data
1470Sstevel@tonic-gate  */
1480Sstevel@tonic-gate static struct memlist *memscrub_memlist;
1490Sstevel@tonic-gate static uint_t memscrub_phys_pages;
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate static kcondvar_t memscrub_cv;
1520Sstevel@tonic-gate static kmutex_t memscrub_lock;
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate  * memscrub_lock protects memscrub_memlist
1550Sstevel@tonic-gate  */
1560Sstevel@tonic-gate uint_t memscrub_scans_done;
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate uint_t memscrub_done_early;
1590Sstevel@tonic-gate uint_t memscrub_early_sec;
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate uint_t memscrub_done_late;
1620Sstevel@tonic-gate time_t memscrub_late_sec;
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate /*
1650Sstevel@tonic-gate  * create memscrub_memlist from phys_install list
1660Sstevel@tonic-gate  * initialize locks, set memscrub_phys_pages.
1670Sstevel@tonic-gate  */
1680Sstevel@tonic-gate void
1690Sstevel@tonic-gate memscrub_init()
1700Sstevel@tonic-gate {
1710Sstevel@tonic-gate 	struct memlist *src;
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate 	if (physmem < memscrub_min_pages)
1740Sstevel@tonic-gate 		return;
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate 	if (!kpm_enable) {
1770Sstevel@tonic-gate 		memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
1780Sstevel@tonic-gate 		memscrub_pte = hat_mempte_setup(memscrub_window);
1790Sstevel@tonic-gate 	}
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate 	/*
1820Sstevel@tonic-gate 	 * copy phys_install to memscrub_memlist
1830Sstevel@tonic-gate 	 */
1840Sstevel@tonic-gate 	for (src = phys_install; src; src = src->next) {
1850Sstevel@tonic-gate 		if (memscrub_add_span(src->address, src->size)) {
1860Sstevel@tonic-gate 			cmn_err(CE_WARN,
1870Sstevel@tonic-gate 			    "Memory scrubber failed to initialize\n");
1880Sstevel@tonic-gate 			return;
1890Sstevel@tonic-gate 		}
1900Sstevel@tonic-gate 	}
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate 	mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
1930Sstevel@tonic-gate 	cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 	/*
1960Sstevel@tonic-gate 	 * create memscrubber thread
1970Sstevel@tonic-gate 	 */
1980Sstevel@tonic-gate 	(void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0,
1990Sstevel@tonic-gate 	    TS_RUN, memscrub_thread_pri);
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
2030Sstevel@tonic-gate void
2040Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp)
2050Sstevel@tonic-gate {
2060Sstevel@tonic-gate 	struct memlist *list;
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 	cmn_err(CE_CONT, "%s:\n", title);
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 	for (list = listp; list; list = list->next) {
2110Sstevel@tonic-gate 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
2120Sstevel@tonic-gate 		    list->address, list->size);
2130Sstevel@tonic-gate 	}
2140Sstevel@tonic-gate }
2150Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate /* ARGSUSED */
2180Sstevel@tonic-gate void
2190Sstevel@tonic-gate memscrub_wakeup(void *c)
2200Sstevel@tonic-gate {
2210Sstevel@tonic-gate 	/*
2220Sstevel@tonic-gate 	 * grab mutex to guarantee that our wakeup call
2230Sstevel@tonic-gate 	 * arrives after we go to sleep -- so we can't sleep forever.
2240Sstevel@tonic-gate 	 */
2250Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
2260Sstevel@tonic-gate 	cv_signal(&memscrub_cv);
2270Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
2280Sstevel@tonic-gate }
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate /*
2310Sstevel@tonic-gate  * this calculation doesn't account for the time that the actual scan
2320Sstevel@tonic-gate  * consumes -- so we'd fall slightly behind schedule with this
2330Sstevel@tonic-gate  * interval_sec.  but the idle loop optimization below usually makes us
2340Sstevel@tonic-gate  * come in way ahead of schedule.
2350Sstevel@tonic-gate  */
2360Sstevel@tonic-gate static int
2370Sstevel@tonic-gate compute_interval_sec()
2380Sstevel@tonic-gate {
2390Sstevel@tonic-gate 	if (memscrub_phys_pages <= memscrub_span_pages)
2400Sstevel@tonic-gate 		return (memscrub_period_sec);
2410Sstevel@tonic-gate 	else
2420Sstevel@tonic-gate 		return (memscrub_period_sec/
2430Sstevel@tonic-gate 			(memscrub_phys_pages/memscrub_span_pages));
2440Sstevel@tonic-gate }
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate void
2470Sstevel@tonic-gate memscrubber()
2480Sstevel@tonic-gate {
2490Sstevel@tonic-gate 	time_t deadline;
2500Sstevel@tonic-gate 	uint64_t mlp_last_addr;
2510Sstevel@tonic-gate 	uint64_t mlp_next_addr;
2520Sstevel@tonic-gate 	int reached_end = 1;
2530Sstevel@tonic-gate 	time_t interval_sec = 0;
2540Sstevel@tonic-gate 	struct memlist *mlp;
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate 	extern void scan_memory(caddr_t, size_t);
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate 	if (memscrub_memlist == NULL) {
2590Sstevel@tonic-gate 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
2600Sstevel@tonic-gate 		goto memscrub_exit;
2610Sstevel@tonic-gate 	}
2620Sstevel@tonic-gate 
2630Sstevel@tonic-gate 	mlp = memscrub_memlist;
2640Sstevel@tonic-gate 	mlp_next_addr = mlp->address;
2650Sstevel@tonic-gate 	mlp_last_addr = mlp->address + mlp->size;
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate 	for (;;) {
2700Sstevel@tonic-gate 		if (disable_memscrub)
2710Sstevel@tonic-gate 			break;
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate 		mutex_enter(&memscrub_lock);
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate 		/*
2760Sstevel@tonic-gate 		 * did we just reach the end of memory?
2770Sstevel@tonic-gate 		 */
2780Sstevel@tonic-gate 		if (reached_end) {
2790Sstevel@tonic-gate 			time_t now = gethrestime_sec();
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 			if (now >= deadline) {
2820Sstevel@tonic-gate 				memscrub_done_late++;
2830Sstevel@tonic-gate 				memscrub_late_sec += (now - deadline);
2840Sstevel@tonic-gate 				/*
2850Sstevel@tonic-gate 				 * past deadline, start right away
2860Sstevel@tonic-gate 				 */
2870Sstevel@tonic-gate 				interval_sec = 0;
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate 				deadline = now + memscrub_period_sec;
2900Sstevel@tonic-gate 			} else {
2910Sstevel@tonic-gate 				/*
2920Sstevel@tonic-gate 				 * we finished ahead of schedule.
2930Sstevel@tonic-gate 				 * wait till previous dealine before re-start.
2940Sstevel@tonic-gate 				 */
2950Sstevel@tonic-gate 				interval_sec = deadline - now;
2960Sstevel@tonic-gate 				memscrub_done_early++;
2970Sstevel@tonic-gate 				memscrub_early_sec += interval_sec;
2980Sstevel@tonic-gate 				deadline += memscrub_period_sec;
2990Sstevel@tonic-gate 			}
3000Sstevel@tonic-gate 		} else {
3010Sstevel@tonic-gate 			interval_sec = compute_interval_sec();
3020Sstevel@tonic-gate 		}
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate 		/*
3050Sstevel@tonic-gate 		 * hit the snooze bar
3060Sstevel@tonic-gate 		 */
3070Sstevel@tonic-gate 		(void) timeout(memscrub_wakeup, NULL, interval_sec * hz);
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate 		/*
3100Sstevel@tonic-gate 		 * go to sleep
3110Sstevel@tonic-gate 		 */
3120Sstevel@tonic-gate 		cv_wait(&memscrub_cv, &memscrub_lock);
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate 		mutex_exit(&memscrub_lock);
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 		do {
3170Sstevel@tonic-gate 			pgcnt_t pages = memscrub_span_pages;
3180Sstevel@tonic-gate 			uint64_t address = mlp_next_addr;
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 			if (disable_memscrub)
3210Sstevel@tonic-gate 				break;
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate 			mutex_enter(&memscrub_lock);
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate 			/*
3260Sstevel@tonic-gate 			 * Make sure we don't try to scan beyond the end of
3270Sstevel@tonic-gate 			 * the current memlist.  If we would, then resize
3280Sstevel@tonic-gate 			 * our scan target for this iteration, and prepare
3290Sstevel@tonic-gate 			 * to read the next memlist entry on the next
3300Sstevel@tonic-gate 			 * iteration.
3310Sstevel@tonic-gate 			 */
3320Sstevel@tonic-gate 			reached_end = 0;
3330Sstevel@tonic-gate 			if (address + mmu_ptob(pages) >= mlp_last_addr) {
3340Sstevel@tonic-gate 				pages = mmu_btop(mlp_last_addr - address);
3350Sstevel@tonic-gate 				mlp = mlp->next;
3360Sstevel@tonic-gate 				if (mlp == NULL) {
3370Sstevel@tonic-gate 					reached_end = 1;
3380Sstevel@tonic-gate 					mlp = memscrub_memlist;
3390Sstevel@tonic-gate 				}
3400Sstevel@tonic-gate 				mlp_next_addr = mlp->address;
3410Sstevel@tonic-gate 				mlp_last_addr = mlp->address + mlp->size;
3420Sstevel@tonic-gate 			} else {
3430Sstevel@tonic-gate 				mlp_next_addr += mmu_ptob(pages);
3440Sstevel@tonic-gate 			}
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 			mutex_exit(&memscrub_lock);
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate 			while (pages--) {
3490Sstevel@tonic-gate 				pfn_t pfn = btop(address);
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate 				/*
3520Sstevel@tonic-gate 				 * Without segkpm, the memscrubber cannot
3530Sstevel@tonic-gate 				 * be allowed to migrate across CPUs, as
3540Sstevel@tonic-gate 				 * the CPU-specific mapping of
3550Sstevel@tonic-gate 				 * memscrub_window would be incorrect.
3560Sstevel@tonic-gate 				 * With segkpm, switching CPUs is legal, but
3570Sstevel@tonic-gate 				 * inefficient.  We don't use
3580Sstevel@tonic-gate 				 * kpreempt_disable as it might hold a
3590Sstevel@tonic-gate 				 * higher priority thread (eg, RT) too long
3600Sstevel@tonic-gate 				 * off CPU.
3610Sstevel@tonic-gate 				 */
3620Sstevel@tonic-gate 				thread_affinity_set(curthread, CPU_CURRENT);
3630Sstevel@tonic-gate 				if (kpm_enable)
3640Sstevel@tonic-gate 					memscrub_window = hat_kpm_pfn2va(pfn);
3650Sstevel@tonic-gate 				else
3660Sstevel@tonic-gate 					hat_mempte_remap(pfn, memscrub_window,
3670Sstevel@tonic-gate 					    memscrub_pte,
3680Sstevel@tonic-gate 					    PROT_READ, HAT_LOAD_NOCONSIST);
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 				scan_memory(memscrub_window, PAGESIZE);
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 				thread_affinity_clear(curthread);
3730Sstevel@tonic-gate 				address += MMU_PAGESIZE;
3740Sstevel@tonic-gate 			}
3750Sstevel@tonic-gate 
3760Sstevel@tonic-gate 			memscrub_scans_done++;
3770Sstevel@tonic-gate 		} while (!reached_end && system_is_idle());
3780Sstevel@tonic-gate 	}
3790Sstevel@tonic-gate 
3800Sstevel@tonic-gate memscrub_exit:
3810Sstevel@tonic-gate 
3820Sstevel@tonic-gate 	cmn_err(CE_NOTE, "memory scrubber exiting.");
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	cv_destroy(&memscrub_cv);
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 	thread_exit();
3870Sstevel@tonic-gate }
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate /*
3910Sstevel@tonic-gate  * return 1 if we're MP and all the other CPUs are idle
3920Sstevel@tonic-gate  */
3930Sstevel@tonic-gate static int
3940Sstevel@tonic-gate system_is_idle()
3950Sstevel@tonic-gate {
3960Sstevel@tonic-gate 	int cpu_id;
3970Sstevel@tonic-gate 	int found = 0;
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	if (1 == ncpus_online)
4000Sstevel@tonic-gate 		return (0);
4010Sstevel@tonic-gate 
4020Sstevel@tonic-gate 	for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) {
4030Sstevel@tonic-gate 		if (!cpu[cpu_id])
4040Sstevel@tonic-gate 			continue;
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate 		found++;
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 		if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) {
4090Sstevel@tonic-gate 			if (CPU->cpu_id == cpu_id &&
4100Sstevel@tonic-gate 			    CPU->cpu_disp->disp_nrunnable == 0)
4110Sstevel@tonic-gate 				continue;
4120Sstevel@tonic-gate 			return (0);
4130Sstevel@tonic-gate 		}
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 		if (found == ncpus)
4160Sstevel@tonic-gate 			break;
4170Sstevel@tonic-gate 	}
4180Sstevel@tonic-gate 	return (1);
4190Sstevel@tonic-gate }
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate /*
4220Sstevel@tonic-gate  * add a span to the memscrub list
4230Sstevel@tonic-gate  */
4240Sstevel@tonic-gate static int
4250Sstevel@tonic-gate memscrub_add_span(uint64_t start, uint64_t bytes)
4260Sstevel@tonic-gate {
4270Sstevel@tonic-gate 	struct memlist *dst;
4280Sstevel@tonic-gate 	struct memlist *prev, *next;
4290Sstevel@tonic-gate 	uint64_t end = start + bytes - 1;
4300Sstevel@tonic-gate 	int retval = 0;
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
4350Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
4360Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
4370Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
4380Sstevel@tonic-gate 		" size: 0x%llx\n", start, bytes);
4390Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	/*
4420Sstevel@tonic-gate 	 * Scan through the list to find the proper place to install it.
4430Sstevel@tonic-gate 	 */
4440Sstevel@tonic-gate 	prev = NULL;
4450Sstevel@tonic-gate 	next = memscrub_memlist;
4460Sstevel@tonic-gate 	while (next) {
4470Sstevel@tonic-gate 		uint64_t ns = next->address;
4480Sstevel@tonic-gate 		uint64_t ne = next->address + next->size - 1;
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate 		/*
4510Sstevel@tonic-gate 		 * If this span overlaps with an existing span, then
4520Sstevel@tonic-gate 		 * something has gone horribly wrong with the phys_install
4530Sstevel@tonic-gate 		 * list.  In fact, I'm surprised we made it this far.
4540Sstevel@tonic-gate 		 */
4550Sstevel@tonic-gate 		if ((start >= ns && start <= ne) || (end >= ns && end <= ne) ||
4560Sstevel@tonic-gate 		    (start < ns && end > ne))
4570Sstevel@tonic-gate 			panic("memscrub found overlapping memory ranges "
4580Sstevel@tonic-gate 			    "(0x%p-0x%p) and (0x%p-0x%p)",
459*286Sdmick 			    (void *)(uintptr_t)start, (void *)(uintptr_t)end,
460*286Sdmick 			    (void *)(uintptr_t)ns, (void *)(uintptr_t)ne);
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate 		/*
4630Sstevel@tonic-gate 		 * New span can be appended to an existing one.
4640Sstevel@tonic-gate 		 */
4650Sstevel@tonic-gate 		if (start == ne + 1) {
4660Sstevel@tonic-gate 			next->size += bytes;
4670Sstevel@tonic-gate 			goto add_done;
4680Sstevel@tonic-gate 		}
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 		/*
4710Sstevel@tonic-gate 		 * New span can be prepended to an existing one.
4720Sstevel@tonic-gate 		 */
4730Sstevel@tonic-gate 		if (end + 1 == ns) {
4740Sstevel@tonic-gate 			next->size += bytes;
4750Sstevel@tonic-gate 			next->address = start;
4760Sstevel@tonic-gate 			goto add_done;
4770Sstevel@tonic-gate 		}
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 		/*
4800Sstevel@tonic-gate 		 * If the next span has a higher start address than the new
4810Sstevel@tonic-gate 		 * one, then we have found the right spot for our
4820Sstevel@tonic-gate 		 * insertion.
4830Sstevel@tonic-gate 		 */
4840Sstevel@tonic-gate 		if (ns > start)
4850Sstevel@tonic-gate 			break;
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 		prev = next;
4880Sstevel@tonic-gate 		next = next->next;
4890Sstevel@tonic-gate 	}
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 	/*
4920Sstevel@tonic-gate 	 * allocate a new struct memlist
4930Sstevel@tonic-gate 	 */
4940Sstevel@tonic-gate 	dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
4950Sstevel@tonic-gate 	if (dst == NULL) {
4960Sstevel@tonic-gate 		retval = -1;
4970Sstevel@tonic-gate 		goto add_done;
4980Sstevel@tonic-gate 	}
4990Sstevel@tonic-gate 	dst->address = start;
5000Sstevel@tonic-gate 	dst->size = bytes;
5010Sstevel@tonic-gate 	dst->prev = prev;
5020Sstevel@tonic-gate 	dst->next = next;
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate 	if (prev)
5050Sstevel@tonic-gate 		prev->next = dst;
5060Sstevel@tonic-gate 	else
5070Sstevel@tonic-gate 		memscrub_memlist = dst;
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate 	if (next)
5100Sstevel@tonic-gate 		next->prev = dst;
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate add_done:
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 	if (retval != -1)
5150Sstevel@tonic-gate 		memscrub_phys_pages += mmu_btop(bytes);
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
5180Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
5190Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
5200Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
5210Sstevel@tonic-gate 
5220Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
5230Sstevel@tonic-gate 	return (retval);
5240Sstevel@tonic-gate }
525