10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
53446Smrj * Common Development and Distribution License (the "License").
63446Smrj * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
211414Scindi
220Sstevel@tonic-gate /*
23*11474SJonathan.Adams@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate * i86pc Memory Scrubbing
290Sstevel@tonic-gate *
300Sstevel@tonic-gate * On detection of a correctable memory ECC error, the i86pc hardware
310Sstevel@tonic-gate * returns the corrected data to the requester and may re-write it
320Sstevel@tonic-gate * to memory (DRAM or NVRAM). Machines which do not re-write this to
330Sstevel@tonic-gate * memory should add an NMI handler to correct and rewrite.
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * Scrubbing thus reduces the likelyhood that multiple transient errors
360Sstevel@tonic-gate * will occur in the same memory word, making uncorrectable errors due
370Sstevel@tonic-gate * to transients less likely.
380Sstevel@tonic-gate *
390Sstevel@tonic-gate * Thus is born the desire that every memory location be periodically
400Sstevel@tonic-gate * accessed.
410Sstevel@tonic-gate *
420Sstevel@tonic-gate * This file implements a memory scrubbing thread. This scrubber
430Sstevel@tonic-gate * guarantees that all of physical memory is accessed periodically
440Sstevel@tonic-gate * (memscrub_period_sec -- 12 hours).
450Sstevel@tonic-gate *
460Sstevel@tonic-gate * It attempts to do this as unobtrusively as possible. The thread
470Sstevel@tonic-gate * schedules itself to wake up at an interval such that if it reads
480Sstevel@tonic-gate * memscrub_span_pages (4MB) on each wakeup, it will read all of physical
490Sstevel@tonic-gate * memory in in memscrub_period_sec (12 hours).
500Sstevel@tonic-gate *
510Sstevel@tonic-gate * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200).
520Sstevel@tonic-gate * When it completes a span, if all the CPUs are idle, it reads another span.
530Sstevel@tonic-gate * Typically it soaks up idle time this way to reach its deadline early
540Sstevel@tonic-gate * -- and sleeps until the next period begins.
550Sstevel@tonic-gate *
560Sstevel@tonic-gate * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups
570Sstevel@tonic-gate * that run for 0.15 seconds at intervals of 67 seconds.
580Sstevel@tonic-gate *
590Sstevel@tonic-gate * In practice, the scrubber finds enough idle time to finish in a few
600Sstevel@tonic-gate * minutes, and sleeps until its 12 hour deadline.
610Sstevel@tonic-gate *
620Sstevel@tonic-gate * The scrubber maintains a private copy of the phys_install memory list
630Sstevel@tonic-gate * to keep track of what memory should be scrubbed.
640Sstevel@tonic-gate *
650Sstevel@tonic-gate * The following parameters can be set via /etc/system
660Sstevel@tonic-gate *
670Sstevel@tonic-gate * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB)
680Sstevel@tonic-gate * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
690Sstevel@tonic-gate * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0)
700Sstevel@tonic-gate * memscrub_delay_start_sec = (10 seconds)
710Sstevel@tonic-gate * disable_memscrub = (0)
720Sstevel@tonic-gate *
730Sstevel@tonic-gate * the scrubber will exit (or never be started) if it finds the variable
740Sstevel@tonic-gate * "disable_memscrub" set.
750Sstevel@tonic-gate *
760Sstevel@tonic-gate * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec
770Sstevel@tonic-gate * is a "good" amount of minimum time for the thread to run at a time.
780Sstevel@tonic-gate *
790Sstevel@tonic-gate * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
800Sstevel@tonic-gate * twice the frequency the hardware folk estimated would be necessary.
810Sstevel@tonic-gate *
820Sstevel@tonic-gate * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly
830Sstevel@tonic-gate * any other use of the system should be higher priority than scrubbing.
840Sstevel@tonic-gate */
850Sstevel@tonic-gate
860Sstevel@tonic-gate #include <sys/types.h>
870Sstevel@tonic-gate #include <sys/systm.h> /* timeout, types, t_lock */
880Sstevel@tonic-gate #include <sys/cmn_err.h>
890Sstevel@tonic-gate #include <sys/sysmacros.h> /* MIN */
900Sstevel@tonic-gate #include <sys/memlist.h> /* memlist */
910Sstevel@tonic-gate #include <sys/kmem.h> /* KMEM_NOSLEEP */
920Sstevel@tonic-gate #include <sys/cpuvar.h> /* ncpus_online */
930Sstevel@tonic-gate #include <sys/debug.h> /* ASSERTs */
940Sstevel@tonic-gate #include <sys/vmem.h>
950Sstevel@tonic-gate #include <sys/mman.h>
960Sstevel@tonic-gate #include <vm/seg_kmem.h>
970Sstevel@tonic-gate #include <vm/seg_kpm.h>
980Sstevel@tonic-gate #include <vm/hat_i86.h>
995295Srandyf #include <sys/callb.h> /* CPR callback */
1000Sstevel@tonic-gate
1010Sstevel@tonic-gate static caddr_t memscrub_window;
1023446Smrj static hat_mempte_t memscrub_pte;
1030Sstevel@tonic-gate
1040Sstevel@tonic-gate /*
1050Sstevel@tonic-gate * Global Data:
1060Sstevel@tonic-gate */
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
1090Sstevel@tonic-gate */
1100Sstevel@tonic-gate #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */
1110Sstevel@tonic-gate
1120Sstevel@tonic-gate /*
1130Sstevel@tonic-gate * start only if at least MEMSCRUB_MIN_PAGES in system
1140Sstevel@tonic-gate */
1150Sstevel@tonic-gate #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE)
1160Sstevel@tonic-gate
1170Sstevel@tonic-gate /*
1180Sstevel@tonic-gate * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
1190Sstevel@tonic-gate */
1200Sstevel@tonic-gate #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE)
1210Sstevel@tonic-gate
1220Sstevel@tonic-gate /*
1230Sstevel@tonic-gate * almost anything is higher priority than scrubbing
1240Sstevel@tonic-gate */
1250Sstevel@tonic-gate #define MEMSCRUB_DFL_THREAD_PRI 0
1260Sstevel@tonic-gate
1270Sstevel@tonic-gate /*
1280Sstevel@tonic-gate * we can patch these defaults in /etc/system if necessary
1290Sstevel@tonic-gate */
1300Sstevel@tonic-gate uint_t disable_memscrub = 0;
1311414Scindi static uint_t disable_memscrub_quietly = 0;
1320Sstevel@tonic-gate pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES;
1330Sstevel@tonic-gate pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
1340Sstevel@tonic-gate time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
1350Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
1360Sstevel@tonic-gate time_t memscrub_delay_start_sec = 10;
1370Sstevel@tonic-gate
1380Sstevel@tonic-gate /*
1390Sstevel@tonic-gate * Static Routines
1400Sstevel@tonic-gate */
1410Sstevel@tonic-gate static void memscrubber(void);
1420Sstevel@tonic-gate static int system_is_idle(void);
1430Sstevel@tonic-gate static int memscrub_add_span(uint64_t, uint64_t);
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate /*
1460Sstevel@tonic-gate * Static Data
1470Sstevel@tonic-gate */
1480Sstevel@tonic-gate static struct memlist *memscrub_memlist;
1490Sstevel@tonic-gate static uint_t memscrub_phys_pages;
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate static kcondvar_t memscrub_cv;
1520Sstevel@tonic-gate static kmutex_t memscrub_lock;
1531414Scindi
1540Sstevel@tonic-gate /*
1550Sstevel@tonic-gate * memscrub_lock protects memscrub_memlist
1560Sstevel@tonic-gate */
1570Sstevel@tonic-gate uint_t memscrub_scans_done;
1580Sstevel@tonic-gate
1590Sstevel@tonic-gate uint_t memscrub_done_early;
1600Sstevel@tonic-gate uint_t memscrub_early_sec;
1610Sstevel@tonic-gate
1620Sstevel@tonic-gate uint_t memscrub_done_late;
1630Sstevel@tonic-gate time_t memscrub_late_sec;
1640Sstevel@tonic-gate
1650Sstevel@tonic-gate /*
1660Sstevel@tonic-gate * create memscrub_memlist from phys_install list
1670Sstevel@tonic-gate * initialize locks, set memscrub_phys_pages.
1680Sstevel@tonic-gate */
1690Sstevel@tonic-gate void
memscrub_init()1700Sstevel@tonic-gate memscrub_init()
1710Sstevel@tonic-gate {
1720Sstevel@tonic-gate struct memlist *src;
1730Sstevel@tonic-gate
1740Sstevel@tonic-gate if (physmem < memscrub_min_pages)
1750Sstevel@tonic-gate return;
1760Sstevel@tonic-gate
1770Sstevel@tonic-gate if (!kpm_enable) {
1780Sstevel@tonic-gate memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
1790Sstevel@tonic-gate memscrub_pte = hat_mempte_setup(memscrub_window);
1800Sstevel@tonic-gate }
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate /*
1830Sstevel@tonic-gate * copy phys_install to memscrub_memlist
1840Sstevel@tonic-gate */
185*11474SJonathan.Adams@Sun.COM for (src = phys_install; src; src = src->ml_next) {
186*11474SJonathan.Adams@Sun.COM if (memscrub_add_span(src->ml_address, src->ml_size)) {
1870Sstevel@tonic-gate cmn_err(CE_WARN,
1887532SSean.Ye@Sun.COM "Software memory scrubber failed to initialize\n");
1890Sstevel@tonic-gate return;
1900Sstevel@tonic-gate }
1910Sstevel@tonic-gate }
1920Sstevel@tonic-gate
1930Sstevel@tonic-gate mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
1940Sstevel@tonic-gate cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
1950Sstevel@tonic-gate
1960Sstevel@tonic-gate /*
1970Sstevel@tonic-gate * create memscrubber thread
1980Sstevel@tonic-gate */
1990Sstevel@tonic-gate (void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0,
2000Sstevel@tonic-gate TS_RUN, memscrub_thread_pri);
2010Sstevel@tonic-gate }
2020Sstevel@tonic-gate
2031414Scindi /*
2041414Scindi * Function to cause the software memscrubber to exit quietly if the
2051414Scindi * platform support has located a hardware scrubber and enabled it.
2061414Scindi */
2071414Scindi void
memscrub_disable(void)2081414Scindi memscrub_disable(void)
2091414Scindi {
2101414Scindi disable_memscrub_quietly = 1;
2111414Scindi }
2121414Scindi
2130Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
2147532SSean.Ye@Sun.COM static void
memscrub_printmemlist(char * title,struct memlist * listp)2150Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp)
2160Sstevel@tonic-gate {
2170Sstevel@tonic-gate struct memlist *list;
2180Sstevel@tonic-gate
2190Sstevel@tonic-gate cmn_err(CE_CONT, "%s:\n", title);
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate for (list = listp; list; list = list->next) {
2220Sstevel@tonic-gate cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
2230Sstevel@tonic-gate list->address, list->size);
2240Sstevel@tonic-gate }
2250Sstevel@tonic-gate }
2260Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
2270Sstevel@tonic-gate
2280Sstevel@tonic-gate /* ARGSUSED */
2297532SSean.Ye@Sun.COM static void
memscrub_wakeup(void * c)2300Sstevel@tonic-gate memscrub_wakeup(void *c)
2310Sstevel@tonic-gate {
2320Sstevel@tonic-gate /*
2330Sstevel@tonic-gate * grab mutex to guarantee that our wakeup call
2340Sstevel@tonic-gate * arrives after we go to sleep -- so we can't sleep forever.
2350Sstevel@tonic-gate */
2360Sstevel@tonic-gate mutex_enter(&memscrub_lock);
2370Sstevel@tonic-gate cv_signal(&memscrub_cv);
2380Sstevel@tonic-gate mutex_exit(&memscrub_lock);
2390Sstevel@tonic-gate }
2400Sstevel@tonic-gate
2410Sstevel@tonic-gate /*
2420Sstevel@tonic-gate * this calculation doesn't account for the time that the actual scan
2430Sstevel@tonic-gate * consumes -- so we'd fall slightly behind schedule with this
2440Sstevel@tonic-gate * interval_sec. but the idle loop optimization below usually makes us
2450Sstevel@tonic-gate * come in way ahead of schedule.
2460Sstevel@tonic-gate */
2470Sstevel@tonic-gate static int
compute_interval_sec()2480Sstevel@tonic-gate compute_interval_sec()
2490Sstevel@tonic-gate {
2500Sstevel@tonic-gate if (memscrub_phys_pages <= memscrub_span_pages)
2510Sstevel@tonic-gate return (memscrub_period_sec);
2520Sstevel@tonic-gate else
2530Sstevel@tonic-gate return (memscrub_period_sec/
2545295Srandyf (memscrub_phys_pages/memscrub_span_pages));
2550Sstevel@tonic-gate }
2560Sstevel@tonic-gate
2577532SSean.Ye@Sun.COM static void
memscrubber()2580Sstevel@tonic-gate memscrubber()
2590Sstevel@tonic-gate {
2600Sstevel@tonic-gate time_t deadline;
2610Sstevel@tonic-gate uint64_t mlp_last_addr;
2620Sstevel@tonic-gate uint64_t mlp_next_addr;
2630Sstevel@tonic-gate int reached_end = 1;
2640Sstevel@tonic-gate time_t interval_sec = 0;
2650Sstevel@tonic-gate struct memlist *mlp;
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate extern void scan_memory(caddr_t, size_t);
2685295Srandyf callb_cpr_t cprinfo;
2695295Srandyf
2705295Srandyf /*
2715295Srandyf * notify CPR of our existence
2725295Srandyf */
2735295Srandyf CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
2740Sstevel@tonic-gate
2750Sstevel@tonic-gate if (memscrub_memlist == NULL) {
2760Sstevel@tonic-gate cmn_err(CE_WARN, "memscrub_memlist not initialized.");
2770Sstevel@tonic-gate goto memscrub_exit;
2780Sstevel@tonic-gate }
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate mlp = memscrub_memlist;
281*11474SJonathan.Adams@Sun.COM mlp_next_addr = mlp->ml_address;
282*11474SJonathan.Adams@Sun.COM mlp_last_addr = mlp->ml_address + mlp->ml_size;
2830Sstevel@tonic-gate
2840Sstevel@tonic-gate deadline = gethrestime_sec() + memscrub_delay_start_sec;
2850Sstevel@tonic-gate
2860Sstevel@tonic-gate for (;;) {
2871414Scindi if (disable_memscrub || disable_memscrub_quietly)
2880Sstevel@tonic-gate break;
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate mutex_enter(&memscrub_lock);
2910Sstevel@tonic-gate
2920Sstevel@tonic-gate /*
2930Sstevel@tonic-gate * did we just reach the end of memory?
2940Sstevel@tonic-gate */
2950Sstevel@tonic-gate if (reached_end) {
2960Sstevel@tonic-gate time_t now = gethrestime_sec();
2970Sstevel@tonic-gate
2980Sstevel@tonic-gate if (now >= deadline) {
2990Sstevel@tonic-gate memscrub_done_late++;
3000Sstevel@tonic-gate memscrub_late_sec += (now - deadline);
3010Sstevel@tonic-gate /*
3020Sstevel@tonic-gate * past deadline, start right away
3030Sstevel@tonic-gate */
3040Sstevel@tonic-gate interval_sec = 0;
3050Sstevel@tonic-gate
3060Sstevel@tonic-gate deadline = now + memscrub_period_sec;
3070Sstevel@tonic-gate } else {
3080Sstevel@tonic-gate /*
3090Sstevel@tonic-gate * we finished ahead of schedule.
3100Sstevel@tonic-gate * wait till previous dealine before re-start.
3110Sstevel@tonic-gate */
3120Sstevel@tonic-gate interval_sec = deadline - now;
3130Sstevel@tonic-gate memscrub_done_early++;
3140Sstevel@tonic-gate memscrub_early_sec += interval_sec;
3150Sstevel@tonic-gate deadline += memscrub_period_sec;
3160Sstevel@tonic-gate }
3170Sstevel@tonic-gate } else {
3180Sstevel@tonic-gate interval_sec = compute_interval_sec();
3190Sstevel@tonic-gate }
3200Sstevel@tonic-gate
3210Sstevel@tonic-gate /*
3225295Srandyf * it is safe from our standpoint for CPR to
3235295Srandyf * suspend the system
3245295Srandyf */
3255295Srandyf CALLB_CPR_SAFE_BEGIN(&cprinfo);
3265295Srandyf
3275295Srandyf /*
3280Sstevel@tonic-gate * hit the snooze bar
3290Sstevel@tonic-gate */
3300Sstevel@tonic-gate (void) timeout(memscrub_wakeup, NULL, interval_sec * hz);
3310Sstevel@tonic-gate
3320Sstevel@tonic-gate /*
3330Sstevel@tonic-gate * go to sleep
3340Sstevel@tonic-gate */
3350Sstevel@tonic-gate cv_wait(&memscrub_cv, &memscrub_lock);
3360Sstevel@tonic-gate
3375295Srandyf /* we need to goto work */
3385295Srandyf CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
3395295Srandyf
3400Sstevel@tonic-gate mutex_exit(&memscrub_lock);
3410Sstevel@tonic-gate
3420Sstevel@tonic-gate do {
3430Sstevel@tonic-gate pgcnt_t pages = memscrub_span_pages;
3440Sstevel@tonic-gate uint64_t address = mlp_next_addr;
3450Sstevel@tonic-gate
3461414Scindi if (disable_memscrub || disable_memscrub_quietly)
3470Sstevel@tonic-gate break;
3480Sstevel@tonic-gate
3490Sstevel@tonic-gate mutex_enter(&memscrub_lock);
3500Sstevel@tonic-gate
3510Sstevel@tonic-gate /*
3520Sstevel@tonic-gate * Make sure we don't try to scan beyond the end of
3530Sstevel@tonic-gate * the current memlist. If we would, then resize
3540Sstevel@tonic-gate * our scan target for this iteration, and prepare
3550Sstevel@tonic-gate * to read the next memlist entry on the next
3560Sstevel@tonic-gate * iteration.
3570Sstevel@tonic-gate */
3580Sstevel@tonic-gate reached_end = 0;
3590Sstevel@tonic-gate if (address + mmu_ptob(pages) >= mlp_last_addr) {
3600Sstevel@tonic-gate pages = mmu_btop(mlp_last_addr - address);
361*11474SJonathan.Adams@Sun.COM mlp = mlp->ml_next;
3620Sstevel@tonic-gate if (mlp == NULL) {
3630Sstevel@tonic-gate reached_end = 1;
3640Sstevel@tonic-gate mlp = memscrub_memlist;
3650Sstevel@tonic-gate }
366*11474SJonathan.Adams@Sun.COM mlp_next_addr = mlp->ml_address;
367*11474SJonathan.Adams@Sun.COM mlp_last_addr = mlp->ml_address + mlp->ml_size;
3680Sstevel@tonic-gate } else {
3690Sstevel@tonic-gate mlp_next_addr += mmu_ptob(pages);
3700Sstevel@tonic-gate }
3710Sstevel@tonic-gate
3720Sstevel@tonic-gate mutex_exit(&memscrub_lock);
3730Sstevel@tonic-gate
3740Sstevel@tonic-gate while (pages--) {
3750Sstevel@tonic-gate pfn_t pfn = btop(address);
3760Sstevel@tonic-gate
3770Sstevel@tonic-gate /*
3780Sstevel@tonic-gate * Without segkpm, the memscrubber cannot
3790Sstevel@tonic-gate * be allowed to migrate across CPUs, as
3800Sstevel@tonic-gate * the CPU-specific mapping of
3810Sstevel@tonic-gate * memscrub_window would be incorrect.
3820Sstevel@tonic-gate * With segkpm, switching CPUs is legal, but
3830Sstevel@tonic-gate * inefficient. We don't use
3840Sstevel@tonic-gate * kpreempt_disable as it might hold a
3850Sstevel@tonic-gate * higher priority thread (eg, RT) too long
3860Sstevel@tonic-gate * off CPU.
3870Sstevel@tonic-gate */
3880Sstevel@tonic-gate thread_affinity_set(curthread, CPU_CURRENT);
3890Sstevel@tonic-gate if (kpm_enable)
3900Sstevel@tonic-gate memscrub_window = hat_kpm_pfn2va(pfn);
3910Sstevel@tonic-gate else
3920Sstevel@tonic-gate hat_mempte_remap(pfn, memscrub_window,
3930Sstevel@tonic-gate memscrub_pte,
3940Sstevel@tonic-gate PROT_READ, HAT_LOAD_NOCONSIST);
3950Sstevel@tonic-gate
3960Sstevel@tonic-gate scan_memory(memscrub_window, PAGESIZE);
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate thread_affinity_clear(curthread);
3990Sstevel@tonic-gate address += MMU_PAGESIZE;
4000Sstevel@tonic-gate }
4010Sstevel@tonic-gate
4020Sstevel@tonic-gate memscrub_scans_done++;
4030Sstevel@tonic-gate } while (!reached_end && system_is_idle());
4040Sstevel@tonic-gate }
4050Sstevel@tonic-gate
4060Sstevel@tonic-gate memscrub_exit:
4070Sstevel@tonic-gate
4081414Scindi if (!disable_memscrub_quietly)
4097532SSean.Ye@Sun.COM cmn_err(CE_NOTE, "Software memory scrubber exiting.");
4105295Srandyf /*
4115295Srandyf * We are about to bail, but don't have the memscrub_lock,
4125295Srandyf * and it is needed for CALLB_CPR_EXIT.
4135295Srandyf */
4145295Srandyf mutex_enter(&memscrub_lock);
4155295Srandyf CALLB_CPR_EXIT(&cprinfo);
4160Sstevel@tonic-gate
4170Sstevel@tonic-gate cv_destroy(&memscrub_cv);
4180Sstevel@tonic-gate
4190Sstevel@tonic-gate thread_exit();
4200Sstevel@tonic-gate }
4210Sstevel@tonic-gate
4220Sstevel@tonic-gate
4230Sstevel@tonic-gate /*
4240Sstevel@tonic-gate * return 1 if we're MP and all the other CPUs are idle
4250Sstevel@tonic-gate */
4260Sstevel@tonic-gate static int
system_is_idle()4270Sstevel@tonic-gate system_is_idle()
4280Sstevel@tonic-gate {
4290Sstevel@tonic-gate int cpu_id;
4300Sstevel@tonic-gate int found = 0;
4310Sstevel@tonic-gate
4320Sstevel@tonic-gate if (1 == ncpus_online)
4330Sstevel@tonic-gate return (0);
4340Sstevel@tonic-gate
4350Sstevel@tonic-gate for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) {
4360Sstevel@tonic-gate if (!cpu[cpu_id])
4370Sstevel@tonic-gate continue;
4380Sstevel@tonic-gate
4390Sstevel@tonic-gate found++;
4400Sstevel@tonic-gate
4410Sstevel@tonic-gate if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) {
4420Sstevel@tonic-gate if (CPU->cpu_id == cpu_id &&
4430Sstevel@tonic-gate CPU->cpu_disp->disp_nrunnable == 0)
4440Sstevel@tonic-gate continue;
4450Sstevel@tonic-gate return (0);
4460Sstevel@tonic-gate }
4470Sstevel@tonic-gate
4480Sstevel@tonic-gate if (found == ncpus)
4490Sstevel@tonic-gate break;
4500Sstevel@tonic-gate }
4510Sstevel@tonic-gate return (1);
4520Sstevel@tonic-gate }
4530Sstevel@tonic-gate
4540Sstevel@tonic-gate /*
4550Sstevel@tonic-gate * add a span to the memscrub list
4560Sstevel@tonic-gate */
4570Sstevel@tonic-gate static int
memscrub_add_span(uint64_t start,uint64_t bytes)4580Sstevel@tonic-gate memscrub_add_span(uint64_t start, uint64_t bytes)
4590Sstevel@tonic-gate {
4600Sstevel@tonic-gate struct memlist *dst;
4610Sstevel@tonic-gate struct memlist *prev, *next;
4620Sstevel@tonic-gate uint64_t end = start + bytes - 1;
4630Sstevel@tonic-gate int retval = 0;
4640Sstevel@tonic-gate
4650Sstevel@tonic-gate mutex_enter(&memscrub_lock);
4660Sstevel@tonic-gate
4670Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
4680Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
4690Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
4700Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
4715295Srandyf " size: 0x%llx\n", start, bytes);
4720Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
4730Sstevel@tonic-gate
4740Sstevel@tonic-gate /*
4750Sstevel@tonic-gate * Scan through the list to find the proper place to install it.
4760Sstevel@tonic-gate */
4770Sstevel@tonic-gate prev = NULL;
4780Sstevel@tonic-gate next = memscrub_memlist;
4790Sstevel@tonic-gate while (next) {
480*11474SJonathan.Adams@Sun.COM uint64_t ns = next->ml_address;
481*11474SJonathan.Adams@Sun.COM uint64_t ne = next->ml_address + next->ml_size - 1;
4820Sstevel@tonic-gate
4830Sstevel@tonic-gate /*
4840Sstevel@tonic-gate * If this span overlaps with an existing span, then
4850Sstevel@tonic-gate * something has gone horribly wrong with the phys_install
4860Sstevel@tonic-gate * list. In fact, I'm surprised we made it this far.
4870Sstevel@tonic-gate */
4880Sstevel@tonic-gate if ((start >= ns && start <= ne) || (end >= ns && end <= ne) ||
4890Sstevel@tonic-gate (start < ns && end > ne))
4900Sstevel@tonic-gate panic("memscrub found overlapping memory ranges "
4910Sstevel@tonic-gate "(0x%p-0x%p) and (0x%p-0x%p)",
492286Sdmick (void *)(uintptr_t)start, (void *)(uintptr_t)end,
493286Sdmick (void *)(uintptr_t)ns, (void *)(uintptr_t)ne);
4940Sstevel@tonic-gate
4950Sstevel@tonic-gate /*
4960Sstevel@tonic-gate * New span can be appended to an existing one.
4970Sstevel@tonic-gate */
4980Sstevel@tonic-gate if (start == ne + 1) {
499*11474SJonathan.Adams@Sun.COM next->ml_size += bytes;
5000Sstevel@tonic-gate goto add_done;
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate
5030Sstevel@tonic-gate /*
5040Sstevel@tonic-gate * New span can be prepended to an existing one.
5050Sstevel@tonic-gate */
5060Sstevel@tonic-gate if (end + 1 == ns) {
507*11474SJonathan.Adams@Sun.COM next->ml_size += bytes;
508*11474SJonathan.Adams@Sun.COM next->ml_address = start;
5090Sstevel@tonic-gate goto add_done;
5100Sstevel@tonic-gate }
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate /*
5130Sstevel@tonic-gate * If the next span has a higher start address than the new
5140Sstevel@tonic-gate * one, then we have found the right spot for our
5150Sstevel@tonic-gate * insertion.
5160Sstevel@tonic-gate */
5170Sstevel@tonic-gate if (ns > start)
5180Sstevel@tonic-gate break;
5190Sstevel@tonic-gate
5200Sstevel@tonic-gate prev = next;
521*11474SJonathan.Adams@Sun.COM next = next->ml_next;
5220Sstevel@tonic-gate }
5230Sstevel@tonic-gate
5240Sstevel@tonic-gate /*
5250Sstevel@tonic-gate * allocate a new struct memlist
5260Sstevel@tonic-gate */
5270Sstevel@tonic-gate dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
5280Sstevel@tonic-gate if (dst == NULL) {
5290Sstevel@tonic-gate retval = -1;
5300Sstevel@tonic-gate goto add_done;
5310Sstevel@tonic-gate }
532*11474SJonathan.Adams@Sun.COM dst->ml_address = start;
533*11474SJonathan.Adams@Sun.COM dst->ml_size = bytes;
534*11474SJonathan.Adams@Sun.COM dst->ml_prev = prev;
535*11474SJonathan.Adams@Sun.COM dst->ml_next = next;
5360Sstevel@tonic-gate
5370Sstevel@tonic-gate if (prev)
538*11474SJonathan.Adams@Sun.COM prev->ml_next = dst;
5390Sstevel@tonic-gate else
5400Sstevel@tonic-gate memscrub_memlist = dst;
5410Sstevel@tonic-gate
5420Sstevel@tonic-gate if (next)
543*11474SJonathan.Adams@Sun.COM next->ml_prev = dst;
5440Sstevel@tonic-gate
5450Sstevel@tonic-gate add_done:
5460Sstevel@tonic-gate
5470Sstevel@tonic-gate if (retval != -1)
5480Sstevel@tonic-gate memscrub_phys_pages += mmu_btop(bytes);
5490Sstevel@tonic-gate
5500Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
5510Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
5520Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
5530Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
5540Sstevel@tonic-gate
5550Sstevel@tonic-gate mutex_exit(&memscrub_lock);
5560Sstevel@tonic-gate return (retval);
5570Sstevel@tonic-gate }
558