10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52895Svb70745 * Common Development and Distribution License (the "License").
62895Svb70745 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
2211474SJonathan.Adams@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * sun4u Memory Scrubbing
280Sstevel@tonic-gate *
290Sstevel@tonic-gate * On detection of a correctable memory ECC error, the sun4u kernel
300Sstevel@tonic-gate * returns the corrected data to the requester and re-writes it
310Sstevel@tonic-gate * to memory (DRAM). So if the correctable error was transient,
320Sstevel@tonic-gate * the read has effectively been cleaned (scrubbed) from memory.
330Sstevel@tonic-gate *
340Sstevel@tonic-gate * Scrubbing thus reduces the likelyhood that multiple transient errors
350Sstevel@tonic-gate * will occur in the same memory word, making uncorrectable errors due
360Sstevel@tonic-gate * to transients less likely.
370Sstevel@tonic-gate *
380Sstevel@tonic-gate * Thus is born the desire that every memory location be periodically
390Sstevel@tonic-gate * accessed.
400Sstevel@tonic-gate *
410Sstevel@tonic-gate * This file implements a memory scrubbing thread. This scrubber
420Sstevel@tonic-gate * guarantees that all of physical memory is accessed periodically
430Sstevel@tonic-gate * (memscrub_period_sec -- 12 hours).
440Sstevel@tonic-gate *
450Sstevel@tonic-gate * It attempts to do this as unobtrusively as possible. The thread
460Sstevel@tonic-gate * schedules itself to wake up at an interval such that if it reads
473876Spt157919 * memscrub_span_pages (32MB) on each wakeup, it will read all of physical
480Sstevel@tonic-gate * memory in in memscrub_period_sec (12 hours).
490Sstevel@tonic-gate *
503876Spt157919 * The scrubber uses the block load and prefetch hardware to read memory
513876Spt157919 * @ 1300MB/s, so it reads spans of 32MB in 0.025 seconds. Unlike the
523876Spt157919 * original sun4d scrubber the sun4u scrubber does not read ahead if the
533876Spt157919 * system is idle because we can read memory very efficently.
540Sstevel@tonic-gate *
550Sstevel@tonic-gate * The scrubber maintains a private copy of the phys_install memory list
560Sstevel@tonic-gate * to keep track of what memory should be scrubbed.
570Sstevel@tonic-gate *
580Sstevel@tonic-gate * The global routines memscrub_add_span() and memscrub_delete_span() are
590Sstevel@tonic-gate * used to add and delete from this list. If hotplug memory is later
600Sstevel@tonic-gate * supported these two routines can be used to notify the scrubber of
610Sstevel@tonic-gate * memory configuration changes.
620Sstevel@tonic-gate *
630Sstevel@tonic-gate * The following parameters can be set via /etc/system
640Sstevel@tonic-gate *
650Sstevel@tonic-gate * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB)
660Sstevel@tonic-gate * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
670Sstevel@tonic-gate * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI)
680Sstevel@tonic-gate * memscrub_delay_start_sec = (5 minutes)
690Sstevel@tonic-gate * memscrub_verbose = (0)
700Sstevel@tonic-gate * memscrub_override_ticks = (1 tick)
710Sstevel@tonic-gate * disable_memscrub = (0)
720Sstevel@tonic-gate * pause_memscrub = (0)
730Sstevel@tonic-gate * read_all_memscrub = (0)
740Sstevel@tonic-gate *
750Sstevel@tonic-gate * The scrubber will print NOTICE messages of what it is doing if
760Sstevel@tonic-gate * "memscrub_verbose" is set.
770Sstevel@tonic-gate *
780Sstevel@tonic-gate * If the scrubber's sleep time calculation drops to zero ticks,
790Sstevel@tonic-gate * memscrub_override_ticks will be used as the sleep time instead. The
803876Spt157919 * sleep time should only drop to zero on a system with over 131.84
810Sstevel@tonic-gate * terabytes of memory, or where the default scrubber parameters have
820Sstevel@tonic-gate * been adjusted. For example, reducing memscrub_span_pages or
830Sstevel@tonic-gate * memscrub_period_sec causes the sleep time to drop to zero with less
840Sstevel@tonic-gate * memory. Note that since the sleep time is calculated in clock ticks,
850Sstevel@tonic-gate * using hires clock ticks allows for more memory before the sleep time
860Sstevel@tonic-gate * becomes zero.
870Sstevel@tonic-gate *
880Sstevel@tonic-gate * The scrubber will exit (or never be started) if it finds the variable
890Sstevel@tonic-gate * "disable_memscrub" set.
900Sstevel@tonic-gate *
910Sstevel@tonic-gate * The scrubber will pause (not read memory) when "pause_memscrub"
920Sstevel@tonic-gate * is set. It will check the state of pause_memscrub at each wakeup
930Sstevel@tonic-gate * period. The scrubber will not make up for lost time. If you
940Sstevel@tonic-gate * pause the scrubber for a prolonged period of time you can use
950Sstevel@tonic-gate * the "read_all_memscrub" switch (see below) to catch up. In addition,
960Sstevel@tonic-gate * pause_memscrub is used internally by the post memory DR callbacks.
970Sstevel@tonic-gate * It is set for the small period of time during which the callbacks
980Sstevel@tonic-gate * are executing. This ensures "memscrub_lock" will be released,
990Sstevel@tonic-gate * allowing the callbacks to finish.
1000Sstevel@tonic-gate *
1010Sstevel@tonic-gate * The scrubber will read all memory if "read_all_memscrub" is set.
1020Sstevel@tonic-gate * The normal span read will also occur during the wakeup.
1030Sstevel@tonic-gate *
1040Sstevel@tonic-gate * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system
1050Sstevel@tonic-gate * must have before we'll start the scrubber.
1060Sstevel@tonic-gate *
1073876Spt157919 * MEMSCRUB_DFL_SPAN_PAGES (32MB) is based on the guess that 0.025 sec
1080Sstevel@tonic-gate * is a "good" amount of minimum time for the thread to run at a time.
1090Sstevel@tonic-gate *
1100Sstevel@tonic-gate * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
1110Sstevel@tonic-gate * twice the frequency the hardware folk estimated would be necessary.
1120Sstevel@tonic-gate *
1130Sstevel@tonic-gate * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption
1140Sstevel@tonic-gate * that the scurbber should get its fair share of time (since it
1150Sstevel@tonic-gate * is short). At a priority of 0 the scrubber will be starved.
1160Sstevel@tonic-gate */
1170Sstevel@tonic-gate
1180Sstevel@tonic-gate #include <sys/systm.h> /* timeout, types, t_lock */
1190Sstevel@tonic-gate #include <sys/cmn_err.h>
1200Sstevel@tonic-gate #include <sys/sysmacros.h> /* MIN */
1210Sstevel@tonic-gate #include <sys/memlist.h> /* memlist */
1220Sstevel@tonic-gate #include <sys/mem_config.h> /* memory add/delete */
1230Sstevel@tonic-gate #include <sys/kmem.h> /* KMEM_NOSLEEP */
1240Sstevel@tonic-gate #include <sys/cpuvar.h> /* ncpus_online */
1250Sstevel@tonic-gate #include <sys/debug.h> /* ASSERTs */
1260Sstevel@tonic-gate #include <sys/machsystm.h> /* lddphys */
1270Sstevel@tonic-gate #include <sys/cpu_module.h> /* vtag_flushpage */
1280Sstevel@tonic-gate #include <sys/kstat.h>
1290Sstevel@tonic-gate #include <sys/atomic.h> /* atomic_add_32 */
1300Sstevel@tonic-gate
1310Sstevel@tonic-gate #include <vm/hat.h>
1320Sstevel@tonic-gate #include <vm/seg_kmem.h>
1330Sstevel@tonic-gate #include <vm/hat_sfmmu.h> /* XXX FIXME - delete */
1340Sstevel@tonic-gate
1350Sstevel@tonic-gate #include <sys/time.h>
1360Sstevel@tonic-gate #include <sys/callb.h> /* CPR callback */
1370Sstevel@tonic-gate #include <sys/ontrap.h>
1380Sstevel@tonic-gate
1390Sstevel@tonic-gate /*
1400Sstevel@tonic-gate * Should really have paddr_t defined, but it is broken. Use
1410Sstevel@tonic-gate * ms_paddr_t in the meantime to make the code cleaner
1420Sstevel@tonic-gate */
1430Sstevel@tonic-gate typedef uint64_t ms_paddr_t;
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate /*
1460Sstevel@tonic-gate * Global Routines:
1470Sstevel@tonic-gate */
1480Sstevel@tonic-gate int memscrub_add_span(pfn_t pfn, pgcnt_t pages);
1490Sstevel@tonic-gate int memscrub_delete_span(pfn_t pfn, pgcnt_t pages);
1500Sstevel@tonic-gate int memscrub_init(void);
1512895Svb70745 void memscrub_induced_error(void);
1520Sstevel@tonic-gate
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate * Global Data:
1550Sstevel@tonic-gate */
1560Sstevel@tonic-gate
1570Sstevel@tonic-gate /*
1580Sstevel@tonic-gate * scrub if we have at least this many pages
1590Sstevel@tonic-gate */
1600Sstevel@tonic-gate #define MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE)
1610Sstevel@tonic-gate
1620Sstevel@tonic-gate /*
1630Sstevel@tonic-gate * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
1640Sstevel@tonic-gate */
1650Sstevel@tonic-gate #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate /*
1680Sstevel@tonic-gate * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
1690Sstevel@tonic-gate */
1703876Spt157919 #define MEMSCRUB_DFL_SPAN_PAGES ((32 * 1024 * 1024) / PAGESIZE)
1710Sstevel@tonic-gate
1720Sstevel@tonic-gate /*
1730Sstevel@tonic-gate * almost anything is higher priority than scrubbing
1740Sstevel@tonic-gate */
1750Sstevel@tonic-gate #define MEMSCRUB_DFL_THREAD_PRI MINCLSYSPRI
1760Sstevel@tonic-gate
1770Sstevel@tonic-gate /*
1780Sstevel@tonic-gate * size used when scanning memory
1790Sstevel@tonic-gate */
1800Sstevel@tonic-gate #define MEMSCRUB_BLOCK_SIZE 256
1810Sstevel@tonic-gate #define MEMSCRUB_BLOCK_SIZE_SHIFT 8 /* log2(MEMSCRUB_BLOCK_SIZE) */
1820Sstevel@tonic-gate #define MEMSCRUB_BLOCKS_PER_PAGE (PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT)
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate #define MEMSCRUB_BPP4M MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT
1850Sstevel@tonic-gate #define MEMSCRUB_BPP512K MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT
1860Sstevel@tonic-gate #define MEMSCRUB_BPP64K MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT
1870Sstevel@tonic-gate #define MEMSCRUB_BPP MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT
1880Sstevel@tonic-gate
1890Sstevel@tonic-gate /*
1900Sstevel@tonic-gate * This message indicates that we have exceeded the limitations of
1910Sstevel@tonic-gate * the memscrubber. See the comments above regarding what would
1920Sstevel@tonic-gate * cause the sleep time to become zero. In DEBUG mode, this message
1930Sstevel@tonic-gate * is logged on the console and in the messages file. In non-DEBUG
1940Sstevel@tonic-gate * mode, it is only logged in the messages file.
1950Sstevel@tonic-gate */
1960Sstevel@tonic-gate #ifdef DEBUG
1970Sstevel@tonic-gate #define MEMSCRUB_OVERRIDE_MSG "Memory scrubber sleep time is zero " \
1980Sstevel@tonic-gate "seconds, consuming entire CPU."
1990Sstevel@tonic-gate #else
2000Sstevel@tonic-gate #define MEMSCRUB_OVERRIDE_MSG "!Memory scrubber sleep time is zero " \
2010Sstevel@tonic-gate "seconds, consuming entire CPU."
2020Sstevel@tonic-gate #endif /* DEBUG */
2030Sstevel@tonic-gate
2040Sstevel@tonic-gate /*
2050Sstevel@tonic-gate * we can patch these defaults in /etc/system if necessary
2060Sstevel@tonic-gate */
2070Sstevel@tonic-gate uint_t disable_memscrub = 0;
2080Sstevel@tonic-gate uint_t pause_memscrub = 0;
2090Sstevel@tonic-gate uint_t read_all_memscrub = 0;
2100Sstevel@tonic-gate uint_t memscrub_verbose = 0;
2110Sstevel@tonic-gate uint_t memscrub_all_idle = 0;
2120Sstevel@tonic-gate uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
2130Sstevel@tonic-gate uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
2140Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
2150Sstevel@tonic-gate uint_t memscrub_delay_start_sec = 5 * 60;
2160Sstevel@tonic-gate uint_t memscrub_override_ticks = 1;
2170Sstevel@tonic-gate
2180Sstevel@tonic-gate /*
2190Sstevel@tonic-gate * Static Routines
2200Sstevel@tonic-gate */
2210Sstevel@tonic-gate static void memscrubber(void);
2220Sstevel@tonic-gate static void memscrub_cleanup(void);
2230Sstevel@tonic-gate static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *);
2240Sstevel@tonic-gate static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp);
2250Sstevel@tonic-gate static void memscrub_scan(uint_t blks, ms_paddr_t src);
2260Sstevel@tonic-gate
2270Sstevel@tonic-gate /*
2280Sstevel@tonic-gate * Static Data
2290Sstevel@tonic-gate */
2300Sstevel@tonic-gate
2310Sstevel@tonic-gate static struct memlist *memscrub_memlist;
2320Sstevel@tonic-gate static uint_t memscrub_phys_pages;
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate static kcondvar_t memscrub_cv;
2350Sstevel@tonic-gate static kmutex_t memscrub_lock;
2360Sstevel@tonic-gate /*
2370Sstevel@tonic-gate * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ...
2380Sstevel@tonic-gate */
2390Sstevel@tonic-gate static void memscrub_init_mem_config(void);
2400Sstevel@tonic-gate static void memscrub_uninit_mem_config(void);
2410Sstevel@tonic-gate
2420Sstevel@tonic-gate /*
2432895Svb70745 * Linked list of memscrub aware spans having retired pages.
2442895Svb70745 * Currently enabled only on sun4u USIII-based platforms.
2452895Svb70745 */
2462895Svb70745 typedef struct memscrub_page_retire_span {
2472895Svb70745 ms_paddr_t address;
2482895Svb70745 struct memscrub_page_retire_span *next;
2492895Svb70745 } memscrub_page_retire_span_t;
2502895Svb70745
2512895Svb70745 static memscrub_page_retire_span_t *memscrub_page_retire_span_list = NULL;
2522895Svb70745
2532895Svb70745 static void memscrub_page_retire_span_add(ms_paddr_t);
2542895Svb70745 static void memscrub_page_retire_span_delete(ms_paddr_t);
2552895Svb70745 static int memscrub_page_retire_span_search(ms_paddr_t);
2562895Svb70745 static void memscrub_page_retire_span_list_update(void);
2572895Svb70745
2582895Svb70745 /*
2592895Svb70745 * add_to_page_retire_list: Set by cpu_async_log_err() routine
2602895Svb70745 * by calling memscrub_induced_error() when CE/UE occurs on a retired
2612895Svb70745 * page due to memscrub reading. Cleared by memscrub after updating
2622895Svb70745 * global page retire span list. Piggybacking on protection of
2632895Svb70745 * memscrub_lock, which is held during set and clear.
2642895Svb70745 * Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running
2652895Svb70745 * on softint context, which gets fired on a cpu memscrub thread currently
2662895Svb70745 * running. Memscrub thread has affinity set during memscrub_read(), hence
2672895Svb70745 * migration to new cpu not expected.
2682895Svb70745 */
2692895Svb70745 static int add_to_page_retire_list = 0;
2702895Svb70745
2712895Svb70745 /*
2720Sstevel@tonic-gate * Keep track of some interesting statistics
2730Sstevel@tonic-gate */
2740Sstevel@tonic-gate static struct memscrub_kstats {
2750Sstevel@tonic-gate kstat_named_t done_early; /* ahead of schedule */
2760Sstevel@tonic-gate kstat_named_t early_sec; /* by cumulative num secs */
2770Sstevel@tonic-gate kstat_named_t done_late; /* behind schedule */
2780Sstevel@tonic-gate kstat_named_t late_sec; /* by cumulative num secs */
2790Sstevel@tonic-gate kstat_named_t interval_ticks; /* num ticks between intervals */
2800Sstevel@tonic-gate kstat_named_t force_run; /* forced to run, non-timeout */
2810Sstevel@tonic-gate kstat_named_t errors_found; /* num errors found by memscrub */
2820Sstevel@tonic-gate } memscrub_counts = {
2830Sstevel@tonic-gate { "done_early", KSTAT_DATA_UINT32 },
2840Sstevel@tonic-gate { "early_sec", KSTAT_DATA_UINT32 },
2850Sstevel@tonic-gate { "done_late", KSTAT_DATA_UINT32 },
2860Sstevel@tonic-gate { "late_sec", KSTAT_DATA_UINT32 },
2870Sstevel@tonic-gate { "interval_ticks", KSTAT_DATA_UINT32 },
2880Sstevel@tonic-gate { "force_run", KSTAT_DATA_UINT32 },
2890Sstevel@tonic-gate { "errors_found", KSTAT_DATA_UINT32 },
2900Sstevel@tonic-gate };
29111873SVijay.Balakrishna@Sun.COM
29211873SVijay.Balakrishna@Sun.COM #define MEMSCRUB_STAT_INC(stat) memscrub_counts.stat.value.ui32++
29311873SVijay.Balakrishna@Sun.COM #define MEMSCRUB_STAT_SET(stat, val) memscrub_counts.stat.value.ui32 = (val)
29411873SVijay.Balakrishna@Sun.COM #define MEMSCRUB_STAT_NINC(stat, val) memscrub_counts.stat.value.ui32 += (val)
29511873SVijay.Balakrishna@Sun.COM
2960Sstevel@tonic-gate static struct kstat *memscrub_ksp = (struct kstat *)NULL;
2970Sstevel@tonic-gate
2980Sstevel@tonic-gate static timeout_id_t memscrub_tid = 0; /* keep track of timeout id */
2990Sstevel@tonic-gate
3000Sstevel@tonic-gate /*
3010Sstevel@tonic-gate * create memscrub_memlist from phys_install list
3020Sstevel@tonic-gate * initialize locks, set memscrub_phys_pages.
3030Sstevel@tonic-gate */
3040Sstevel@tonic-gate int
memscrub_init(void)3050Sstevel@tonic-gate memscrub_init(void)
3060Sstevel@tonic-gate {
3070Sstevel@tonic-gate struct memlist *src;
3080Sstevel@tonic-gate
3090Sstevel@tonic-gate /*
3100Sstevel@tonic-gate * only startup the scrubber if we have a minimum
3110Sstevel@tonic-gate * number of pages
3120Sstevel@tonic-gate */
3130Sstevel@tonic-gate if (physinstalled >= MEMSCRUB_MIN_PAGES) {
3140Sstevel@tonic-gate
3150Sstevel@tonic-gate /*
3160Sstevel@tonic-gate * initialize locks
3170Sstevel@tonic-gate */
3180Sstevel@tonic-gate mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
3190Sstevel@tonic-gate cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
3200Sstevel@tonic-gate
3210Sstevel@tonic-gate /*
3220Sstevel@tonic-gate * copy phys_install to memscrub_memlist
3230Sstevel@tonic-gate */
32411474SJonathan.Adams@Sun.COM for (src = phys_install; src; src = src->ml_next) {
3250Sstevel@tonic-gate if (memscrub_add_span(
32611474SJonathan.Adams@Sun.COM (pfn_t)(src->ml_address >> PAGESHIFT),
32711474SJonathan.Adams@Sun.COM (pgcnt_t)(src->ml_size >> PAGESHIFT))) {
3280Sstevel@tonic-gate memscrub_cleanup();
3290Sstevel@tonic-gate return (-1);
3300Sstevel@tonic-gate }
3310Sstevel@tonic-gate }
3320Sstevel@tonic-gate
3330Sstevel@tonic-gate /*
3340Sstevel@tonic-gate * initialize kstats
3350Sstevel@tonic-gate */
3360Sstevel@tonic-gate memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat",
33711474SJonathan.Adams@Sun.COM "misc", KSTAT_TYPE_NAMED,
33811474SJonathan.Adams@Sun.COM sizeof (memscrub_counts) / sizeof (kstat_named_t),
33911474SJonathan.Adams@Sun.COM KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
3400Sstevel@tonic-gate
3410Sstevel@tonic-gate if (memscrub_ksp) {
3420Sstevel@tonic-gate memscrub_ksp->ks_data = (void *)&memscrub_counts;
3430Sstevel@tonic-gate kstat_install(memscrub_ksp);
3440Sstevel@tonic-gate } else {
3450Sstevel@tonic-gate cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n");
3460Sstevel@tonic-gate }
3470Sstevel@tonic-gate
3480Sstevel@tonic-gate /*
3490Sstevel@tonic-gate * create memscrubber thread
3500Sstevel@tonic-gate */
3510Sstevel@tonic-gate (void) thread_create(NULL, 0, (void (*)())memscrubber,
3520Sstevel@tonic-gate NULL, 0, &p0, TS_RUN, memscrub_thread_pri);
3530Sstevel@tonic-gate
3540Sstevel@tonic-gate /*
3550Sstevel@tonic-gate * We don't want call backs changing the list
3560Sstevel@tonic-gate * if there is no thread running. We do not
3570Sstevel@tonic-gate * attempt to deal with stopping/starting scrubbing
3580Sstevel@tonic-gate * on memory size changes.
3590Sstevel@tonic-gate */
3600Sstevel@tonic-gate memscrub_init_mem_config();
3610Sstevel@tonic-gate }
3620Sstevel@tonic-gate
3630Sstevel@tonic-gate return (0);
3640Sstevel@tonic-gate }
3650Sstevel@tonic-gate
3660Sstevel@tonic-gate static void
memscrub_cleanup(void)3670Sstevel@tonic-gate memscrub_cleanup(void)
3680Sstevel@tonic-gate {
3690Sstevel@tonic-gate memscrub_uninit_mem_config();
3700Sstevel@tonic-gate while (memscrub_memlist) {
3710Sstevel@tonic-gate (void) memscrub_delete_span(
37211474SJonathan.Adams@Sun.COM (pfn_t)(memscrub_memlist->ml_address >> PAGESHIFT),
37311474SJonathan.Adams@Sun.COM (pgcnt_t)(memscrub_memlist->ml_size >> PAGESHIFT));
3740Sstevel@tonic-gate }
3750Sstevel@tonic-gate if (memscrub_ksp)
3760Sstevel@tonic-gate kstat_delete(memscrub_ksp);
3770Sstevel@tonic-gate cv_destroy(&memscrub_cv);
3780Sstevel@tonic-gate mutex_destroy(&memscrub_lock);
3790Sstevel@tonic-gate }
3800Sstevel@tonic-gate
3810Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
3820Sstevel@tonic-gate static void
memscrub_printmemlist(char * title,struct memlist * listp)3830Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp)
3840Sstevel@tonic-gate {
3850Sstevel@tonic-gate struct memlist *list;
3860Sstevel@tonic-gate
3870Sstevel@tonic-gate cmn_err(CE_CONT, "%s:\n", title);
3880Sstevel@tonic-gate
38911474SJonathan.Adams@Sun.COM for (list = listp; list; list = list->ml_next) {
3900Sstevel@tonic-gate cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
39111474SJonathan.Adams@Sun.COM list->ml_address, list->ml_size);
3920Sstevel@tonic-gate }
3930Sstevel@tonic-gate }
3940Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
3950Sstevel@tonic-gate
3960Sstevel@tonic-gate /* ARGSUSED */
3970Sstevel@tonic-gate static void
memscrub_wakeup(void * c)3980Sstevel@tonic-gate memscrub_wakeup(void *c)
3990Sstevel@tonic-gate {
4000Sstevel@tonic-gate /*
4010Sstevel@tonic-gate * grab mutex to guarantee that our wakeup call
4020Sstevel@tonic-gate * arrives after we go to sleep -- so we can't sleep forever.
4030Sstevel@tonic-gate */
4040Sstevel@tonic-gate mutex_enter(&memscrub_lock);
4050Sstevel@tonic-gate cv_signal(&memscrub_cv);
4060Sstevel@tonic-gate mutex_exit(&memscrub_lock);
4070Sstevel@tonic-gate }
4080Sstevel@tonic-gate
4090Sstevel@tonic-gate /*
4100Sstevel@tonic-gate * provide an interface external to the memscrubber
4110Sstevel@tonic-gate * which will force the memscrub thread to run vs.
4120Sstevel@tonic-gate * waiting for the timeout, if one is set
4130Sstevel@tonic-gate */
4140Sstevel@tonic-gate void
memscrub_run(void)4150Sstevel@tonic-gate memscrub_run(void)
4160Sstevel@tonic-gate {
41711873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(force_run);
4180Sstevel@tonic-gate if (memscrub_tid) {
4190Sstevel@tonic-gate (void) untimeout(memscrub_tid);
4200Sstevel@tonic-gate memscrub_wakeup((void *)NULL);
4210Sstevel@tonic-gate }
4220Sstevel@tonic-gate }
4230Sstevel@tonic-gate
4240Sstevel@tonic-gate /*
4250Sstevel@tonic-gate * this calculation doesn't account for the time
4260Sstevel@tonic-gate * that the actual scan consumes -- so we'd fall
4270Sstevel@tonic-gate * slightly behind schedule with this interval.
4280Sstevel@tonic-gate * It's very small.
4290Sstevel@tonic-gate */
4300Sstevel@tonic-gate
4310Sstevel@tonic-gate static uint_t
compute_interval_ticks(void)4320Sstevel@tonic-gate compute_interval_ticks(void)
4330Sstevel@tonic-gate {
4340Sstevel@tonic-gate /*
4350Sstevel@tonic-gate * We use msp_safe mpp_safe below to insure somebody
4360Sstevel@tonic-gate * doesn't set memscrub_span_pages or memscrub_phys_pages
4370Sstevel@tonic-gate * to 0 on us.
4380Sstevel@tonic-gate */
4390Sstevel@tonic-gate static uint_t msp_safe, mpp_safe;
4400Sstevel@tonic-gate static uint_t interval_ticks, period_ticks;
4410Sstevel@tonic-gate msp_safe = memscrub_span_pages;
4420Sstevel@tonic-gate mpp_safe = memscrub_phys_pages;
4430Sstevel@tonic-gate
4440Sstevel@tonic-gate period_ticks = memscrub_period_sec * hz;
4450Sstevel@tonic-gate interval_ticks = period_ticks;
4460Sstevel@tonic-gate
4470Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock));
4480Sstevel@tonic-gate
4490Sstevel@tonic-gate if ((msp_safe != 0) && (mpp_safe != 0)) {
4500Sstevel@tonic-gate if (memscrub_phys_pages <= msp_safe) {
4510Sstevel@tonic-gate interval_ticks = period_ticks;
4520Sstevel@tonic-gate } else {
4530Sstevel@tonic-gate interval_ticks = (period_ticks /
4540Sstevel@tonic-gate (mpp_safe / msp_safe));
4550Sstevel@tonic-gate }
4560Sstevel@tonic-gate }
4570Sstevel@tonic-gate return (interval_ticks);
4580Sstevel@tonic-gate }
4590Sstevel@tonic-gate
4600Sstevel@tonic-gate void
memscrubber(void)4610Sstevel@tonic-gate memscrubber(void)
4620Sstevel@tonic-gate {
4630Sstevel@tonic-gate ms_paddr_t address, addr;
4640Sstevel@tonic-gate time_t deadline;
4650Sstevel@tonic-gate pgcnt_t pages;
4660Sstevel@tonic-gate uint_t reached_end = 1;
4670Sstevel@tonic-gate uint_t paused_message = 0;
4680Sstevel@tonic-gate uint_t interval_ticks = 0;
4690Sstevel@tonic-gate uint_t sleep_warn_printed = 0;
4700Sstevel@tonic-gate callb_cpr_t cprinfo;
4710Sstevel@tonic-gate
4720Sstevel@tonic-gate /*
4730Sstevel@tonic-gate * notify CPR of our existence
4740Sstevel@tonic-gate */
4750Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
4760Sstevel@tonic-gate
4770Sstevel@tonic-gate mutex_enter(&memscrub_lock);
4780Sstevel@tonic-gate
4790Sstevel@tonic-gate if (memscrub_memlist == NULL) {
4800Sstevel@tonic-gate cmn_err(CE_WARN, "memscrub_memlist not initialized.");
4810Sstevel@tonic-gate goto memscrub_exit;
4820Sstevel@tonic-gate }
4830Sstevel@tonic-gate
48411474SJonathan.Adams@Sun.COM address = memscrub_memlist->ml_address;
4850Sstevel@tonic-gate
4860Sstevel@tonic-gate deadline = gethrestime_sec() + memscrub_delay_start_sec;
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate for (;;) {
4890Sstevel@tonic-gate if (disable_memscrub)
4900Sstevel@tonic-gate break;
4910Sstevel@tonic-gate
4920Sstevel@tonic-gate /*
4930Sstevel@tonic-gate * compute interval_ticks
4940Sstevel@tonic-gate */
4950Sstevel@tonic-gate interval_ticks = compute_interval_ticks();
4960Sstevel@tonic-gate
4970Sstevel@tonic-gate /*
4980Sstevel@tonic-gate * If the calculated sleep time is zero, and pause_memscrub
4990Sstevel@tonic-gate * has been set, make sure we sleep so that another thread
5000Sstevel@tonic-gate * can acquire memscrub_lock.
5010Sstevel@tonic-gate */
5020Sstevel@tonic-gate if (interval_ticks == 0 && pause_memscrub) {
5030Sstevel@tonic-gate interval_ticks = hz;
5040Sstevel@tonic-gate }
5050Sstevel@tonic-gate
5060Sstevel@tonic-gate /*
5070Sstevel@tonic-gate * And as a fail safe, under normal non-paused operation, do
5080Sstevel@tonic-gate * not allow the sleep time to be zero.
5090Sstevel@tonic-gate */
5100Sstevel@tonic-gate if (interval_ticks == 0) {
5110Sstevel@tonic-gate interval_ticks = memscrub_override_ticks;
5120Sstevel@tonic-gate if (!sleep_warn_printed) {
5130Sstevel@tonic-gate cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG);
5140Sstevel@tonic-gate sleep_warn_printed = 1;
5150Sstevel@tonic-gate }
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate
51811873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_SET(interval_ticks, interval_ticks);
5190Sstevel@tonic-gate
5200Sstevel@tonic-gate /*
5210Sstevel@tonic-gate * Did we just reach the end of memory? If we are at the
5220Sstevel@tonic-gate * end of memory, delay end of memory processing until
5230Sstevel@tonic-gate * pause_memscrub is not set.
5240Sstevel@tonic-gate */
5250Sstevel@tonic-gate if (reached_end && !pause_memscrub) {
5260Sstevel@tonic-gate time_t now = gethrestime_sec();
5270Sstevel@tonic-gate
5280Sstevel@tonic-gate if (now >= deadline) {
52911873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(done_late);
53011873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_NINC(late_sec, now - deadline);
5310Sstevel@tonic-gate /*
5320Sstevel@tonic-gate * past deadline, start right away
5330Sstevel@tonic-gate */
5340Sstevel@tonic-gate interval_ticks = 0;
5350Sstevel@tonic-gate
5360Sstevel@tonic-gate deadline = now + memscrub_period_sec;
5370Sstevel@tonic-gate } else {
5380Sstevel@tonic-gate /*
5390Sstevel@tonic-gate * we finished ahead of schedule.
5400Sstevel@tonic-gate * wait till previous deadline before re-start.
5410Sstevel@tonic-gate */
5420Sstevel@tonic-gate interval_ticks = (deadline - now) * hz;
54311873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(done_early);
54411873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_NINC(early_sec, deadline - now);
5450Sstevel@tonic-gate deadline += memscrub_period_sec;
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate reached_end = 0;
5480Sstevel@tonic-gate sleep_warn_printed = 0;
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate
5510Sstevel@tonic-gate if (interval_ticks != 0) {
5520Sstevel@tonic-gate /*
5530Sstevel@tonic-gate * it is safe from our standpoint for CPR to
5540Sstevel@tonic-gate * suspend the system
5550Sstevel@tonic-gate */
5560Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
5570Sstevel@tonic-gate
5580Sstevel@tonic-gate /*
5590Sstevel@tonic-gate * hit the snooze bar
5600Sstevel@tonic-gate */
5610Sstevel@tonic-gate memscrub_tid = timeout(memscrub_wakeup, NULL,
5620Sstevel@tonic-gate interval_ticks);
5630Sstevel@tonic-gate
5640Sstevel@tonic-gate /*
5650Sstevel@tonic-gate * go to sleep
5660Sstevel@tonic-gate */
5670Sstevel@tonic-gate cv_wait(&memscrub_cv, &memscrub_lock);
5680Sstevel@tonic-gate
5690Sstevel@tonic-gate /*
5700Sstevel@tonic-gate * at this point, no timeout should be set
5710Sstevel@tonic-gate */
5720Sstevel@tonic-gate memscrub_tid = 0;
5730Sstevel@tonic-gate
5740Sstevel@tonic-gate /*
5750Sstevel@tonic-gate * we need to goto work and will be modifying
5760Sstevel@tonic-gate * our internal state and mapping/unmapping
5770Sstevel@tonic-gate * TTEs
5780Sstevel@tonic-gate */
5790Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
5800Sstevel@tonic-gate }
5810Sstevel@tonic-gate
5820Sstevel@tonic-gate
5830Sstevel@tonic-gate if (memscrub_phys_pages == 0) {
5840Sstevel@tonic-gate cmn_err(CE_WARN, "Memory scrubber has 0 pages to read");
5850Sstevel@tonic-gate goto memscrub_exit;
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate
5880Sstevel@tonic-gate if (!pause_memscrub) {
5890Sstevel@tonic-gate if (paused_message) {
5900Sstevel@tonic-gate paused_message = 0;
5910Sstevel@tonic-gate if (memscrub_verbose)
5920Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber "
5930Sstevel@tonic-gate "resuming");
5940Sstevel@tonic-gate }
5950Sstevel@tonic-gate
5960Sstevel@tonic-gate if (read_all_memscrub) {
5970Sstevel@tonic-gate if (memscrub_verbose)
5980Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber "
5990Sstevel@tonic-gate "reading all memory per request");
6000Sstevel@tonic-gate
60111474SJonathan.Adams@Sun.COM addr = memscrub_memlist->ml_address;
6020Sstevel@tonic-gate reached_end = 0;
6030Sstevel@tonic-gate while (!reached_end) {
6040Sstevel@tonic-gate if (disable_memscrub)
6050Sstevel@tonic-gate break;
6060Sstevel@tonic-gate pages = memscrub_phys_pages;
6070Sstevel@tonic-gate reached_end = memscrub_verify_span(
6080Sstevel@tonic-gate &addr, &pages);
6090Sstevel@tonic-gate memscrub_scan(pages *
6100Sstevel@tonic-gate MEMSCRUB_BLOCKS_PER_PAGE, addr);
6110Sstevel@tonic-gate addr += ((uint64_t)pages * PAGESIZE);
6120Sstevel@tonic-gate }
6130Sstevel@tonic-gate read_all_memscrub = 0;
6140Sstevel@tonic-gate }
6150Sstevel@tonic-gate
6160Sstevel@tonic-gate /*
6170Sstevel@tonic-gate * read 1 span
6180Sstevel@tonic-gate */
6190Sstevel@tonic-gate pages = memscrub_span_pages;
6200Sstevel@tonic-gate
6210Sstevel@tonic-gate if (disable_memscrub)
6220Sstevel@tonic-gate break;
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate /*
6250Sstevel@tonic-gate * determine physical address range
6260Sstevel@tonic-gate */
6270Sstevel@tonic-gate reached_end = memscrub_verify_span(&address,
6280Sstevel@tonic-gate &pages);
6290Sstevel@tonic-gate
6300Sstevel@tonic-gate memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE,
6310Sstevel@tonic-gate address);
6320Sstevel@tonic-gate
6330Sstevel@tonic-gate address += ((uint64_t)pages * PAGESIZE);
6340Sstevel@tonic-gate }
6350Sstevel@tonic-gate
6360Sstevel@tonic-gate if (pause_memscrub && !paused_message) {
6370Sstevel@tonic-gate paused_message = 1;
6380Sstevel@tonic-gate if (memscrub_verbose)
6390Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber paused");
6400Sstevel@tonic-gate }
6410Sstevel@tonic-gate }
6420Sstevel@tonic-gate
6430Sstevel@tonic-gate memscrub_exit:
6440Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber exiting");
6450Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
6460Sstevel@tonic-gate memscrub_cleanup();
6470Sstevel@tonic-gate thread_exit();
6480Sstevel@tonic-gate /* NOTREACHED */
6490Sstevel@tonic-gate }
6500Sstevel@tonic-gate
6510Sstevel@tonic-gate /*
6520Sstevel@tonic-gate * condition address and size
6530Sstevel@tonic-gate * such that they span legal physical addresses.
6540Sstevel@tonic-gate *
6550Sstevel@tonic-gate * when appropriate, address will be rounded up to start of next
6560Sstevel@tonic-gate * struct memlist, and pages will be rounded down to the end of the
6570Sstevel@tonic-gate * memlist size.
6580Sstevel@tonic-gate *
6590Sstevel@tonic-gate * returns 1 if reached end of list, else returns 0.
6600Sstevel@tonic-gate */
6610Sstevel@tonic-gate static int
memscrub_verify_span(ms_paddr_t * addrp,pgcnt_t * pagesp)6620Sstevel@tonic-gate memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp)
6630Sstevel@tonic-gate {
6640Sstevel@tonic-gate struct memlist *mlp;
6650Sstevel@tonic-gate ms_paddr_t address = *addrp;
6660Sstevel@tonic-gate uint64_t bytes = (uint64_t)*pagesp * PAGESIZE;
6670Sstevel@tonic-gate uint64_t bytes_remaining;
6680Sstevel@tonic-gate int reached_end = 0;
6690Sstevel@tonic-gate
6700Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock));
6710Sstevel@tonic-gate
6720Sstevel@tonic-gate /*
6730Sstevel@tonic-gate * find memlist struct that contains addrp
6740Sstevel@tonic-gate * assumes memlist is sorted by ascending address.
6750Sstevel@tonic-gate */
67611474SJonathan.Adams@Sun.COM for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->ml_next) {
6770Sstevel@tonic-gate /*
6780Sstevel@tonic-gate * if before this chunk, round up to beginning
6790Sstevel@tonic-gate */
68011474SJonathan.Adams@Sun.COM if (address < mlp->ml_address) {
68111474SJonathan.Adams@Sun.COM address = mlp->ml_address;
6820Sstevel@tonic-gate break;
6830Sstevel@tonic-gate }
6840Sstevel@tonic-gate /*
6850Sstevel@tonic-gate * if before end of chunk, then we found it
6860Sstevel@tonic-gate */
68711474SJonathan.Adams@Sun.COM if (address < (mlp->ml_address + mlp->ml_size))
6880Sstevel@tonic-gate break;
6890Sstevel@tonic-gate
6900Sstevel@tonic-gate /* else go to next struct memlist */
6910Sstevel@tonic-gate }
6920Sstevel@tonic-gate /*
6930Sstevel@tonic-gate * if we hit end of list, start at beginning
6940Sstevel@tonic-gate */
6950Sstevel@tonic-gate if (mlp == NULL) {
6960Sstevel@tonic-gate mlp = memscrub_memlist;
69711474SJonathan.Adams@Sun.COM address = mlp->ml_address;
6980Sstevel@tonic-gate }
6990Sstevel@tonic-gate
7000Sstevel@tonic-gate /*
7010Sstevel@tonic-gate * now we have legal address, and its mlp, condition bytes
7020Sstevel@tonic-gate */
70311474SJonathan.Adams@Sun.COM bytes_remaining = (mlp->ml_address + mlp->ml_size) - address;
7040Sstevel@tonic-gate
7050Sstevel@tonic-gate if (bytes > bytes_remaining)
7060Sstevel@tonic-gate bytes = bytes_remaining;
7070Sstevel@tonic-gate
7080Sstevel@tonic-gate /*
7090Sstevel@tonic-gate * will this span take us to end of list?
7100Sstevel@tonic-gate */
71111474SJonathan.Adams@Sun.COM if ((mlp->ml_next == NULL) &&
71211474SJonathan.Adams@Sun.COM ((mlp->ml_address + mlp->ml_size) == (address + bytes)))
7130Sstevel@tonic-gate reached_end = 1;
7140Sstevel@tonic-gate
7150Sstevel@tonic-gate /* return values */
7160Sstevel@tonic-gate *addrp = address;
7170Sstevel@tonic-gate *pagesp = bytes / PAGESIZE;
7180Sstevel@tonic-gate
7190Sstevel@tonic-gate return (reached_end);
7200Sstevel@tonic-gate }
7210Sstevel@tonic-gate
7220Sstevel@tonic-gate /*
7230Sstevel@tonic-gate * add a span to the memscrub list
7240Sstevel@tonic-gate * add to memscrub_phys_pages
7250Sstevel@tonic-gate */
7260Sstevel@tonic-gate int
memscrub_add_span(pfn_t pfn,pgcnt_t pages)7270Sstevel@tonic-gate memscrub_add_span(pfn_t pfn, pgcnt_t pages)
7280Sstevel@tonic-gate {
7290Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
7300Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
7310Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT;
7320Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
7330Sstevel@tonic-gate
7340Sstevel@tonic-gate int retval;
7350Sstevel@tonic-gate
7360Sstevel@tonic-gate mutex_enter(&memscrub_lock);
7370Sstevel@tonic-gate
7380Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
7390Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
7400Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
7410Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
7420Sstevel@tonic-gate " size: 0x%llx\n", address, bytes);
7430Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
7440Sstevel@tonic-gate
7450Sstevel@tonic-gate retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist,
7460Sstevel@tonic-gate &memscrub_phys_pages);
7470Sstevel@tonic-gate
7480Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
7490Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
7500Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
7510Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
7520Sstevel@tonic-gate
7530Sstevel@tonic-gate mutex_exit(&memscrub_lock);
7540Sstevel@tonic-gate
7550Sstevel@tonic-gate return (retval);
7560Sstevel@tonic-gate }
7570Sstevel@tonic-gate
7580Sstevel@tonic-gate static int
memscrub_add_span_gen(pfn_t pfn,pgcnt_t pages,struct memlist ** list,uint_t * npgs)7590Sstevel@tonic-gate memscrub_add_span_gen(
7600Sstevel@tonic-gate pfn_t pfn,
7610Sstevel@tonic-gate pgcnt_t pages,
7620Sstevel@tonic-gate struct memlist **list,
7630Sstevel@tonic-gate uint_t *npgs)
7640Sstevel@tonic-gate {
7650Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
7660Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT;
7670Sstevel@tonic-gate struct memlist *dst;
7680Sstevel@tonic-gate struct memlist *prev, *next;
7690Sstevel@tonic-gate int retval = 0;
7700Sstevel@tonic-gate
7710Sstevel@tonic-gate /*
7720Sstevel@tonic-gate * allocate a new struct memlist
7730Sstevel@tonic-gate */
7740Sstevel@tonic-gate
7750Sstevel@tonic-gate dst = (struct memlist *)
7760Sstevel@tonic-gate kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
7770Sstevel@tonic-gate
7780Sstevel@tonic-gate if (dst == NULL) {
7790Sstevel@tonic-gate retval = -1;
7800Sstevel@tonic-gate goto add_done;
7810Sstevel@tonic-gate }
7820Sstevel@tonic-gate
78311474SJonathan.Adams@Sun.COM dst->ml_address = address;
78411474SJonathan.Adams@Sun.COM dst->ml_size = bytes;
7850Sstevel@tonic-gate
7860Sstevel@tonic-gate /*
7870Sstevel@tonic-gate * first insert
7880Sstevel@tonic-gate */
7890Sstevel@tonic-gate if (*list == NULL) {
79011474SJonathan.Adams@Sun.COM dst->ml_prev = NULL;
79111474SJonathan.Adams@Sun.COM dst->ml_next = NULL;
7920Sstevel@tonic-gate *list = dst;
7930Sstevel@tonic-gate
7940Sstevel@tonic-gate goto add_done;
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate
7970Sstevel@tonic-gate /*
7980Sstevel@tonic-gate * insert into sorted list
7990Sstevel@tonic-gate */
8000Sstevel@tonic-gate for (prev = NULL, next = *list;
8010Sstevel@tonic-gate next != NULL;
80211474SJonathan.Adams@Sun.COM prev = next, next = next->ml_next) {
80311474SJonathan.Adams@Sun.COM if (address > (next->ml_address + next->ml_size))
8040Sstevel@tonic-gate continue;
8050Sstevel@tonic-gate
8060Sstevel@tonic-gate /*
8070Sstevel@tonic-gate * else insert here
8080Sstevel@tonic-gate */
8090Sstevel@tonic-gate
8100Sstevel@tonic-gate /*
8110Sstevel@tonic-gate * prepend to next
8120Sstevel@tonic-gate */
81311474SJonathan.Adams@Sun.COM if ((address + bytes) == next->ml_address) {
8140Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist));
8150Sstevel@tonic-gate
81611474SJonathan.Adams@Sun.COM next->ml_address = address;
81711474SJonathan.Adams@Sun.COM next->ml_size += bytes;
8180Sstevel@tonic-gate
8190Sstevel@tonic-gate goto add_done;
8200Sstevel@tonic-gate }
8210Sstevel@tonic-gate
8220Sstevel@tonic-gate /*
8230Sstevel@tonic-gate * append to next
8240Sstevel@tonic-gate */
82511474SJonathan.Adams@Sun.COM if (address == (next->ml_address + next->ml_size)) {
8260Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist));
8270Sstevel@tonic-gate
82811474SJonathan.Adams@Sun.COM if (next->ml_next) {
8290Sstevel@tonic-gate /*
83011474SJonathan.Adams@Sun.COM * don't overlap with next->ml_next
8310Sstevel@tonic-gate */
83211474SJonathan.Adams@Sun.COM if ((address + bytes) >
83311474SJonathan.Adams@Sun.COM next->ml_next->ml_address) {
8340Sstevel@tonic-gate retval = -1;
8350Sstevel@tonic-gate goto add_done;
8360Sstevel@tonic-gate }
8370Sstevel@tonic-gate /*
83811474SJonathan.Adams@Sun.COM * concatenate next and next->ml_next
8390Sstevel@tonic-gate */
84011474SJonathan.Adams@Sun.COM if ((address + bytes) ==
84111474SJonathan.Adams@Sun.COM next->ml_next->ml_address) {
84211474SJonathan.Adams@Sun.COM struct memlist *mlp = next->ml_next;
8430Sstevel@tonic-gate
8440Sstevel@tonic-gate if (next == *list)
84511474SJonathan.Adams@Sun.COM *list = next->ml_next;
8460Sstevel@tonic-gate
84711474SJonathan.Adams@Sun.COM mlp->ml_address = next->ml_address;
84811474SJonathan.Adams@Sun.COM mlp->ml_size += next->ml_size;
84911474SJonathan.Adams@Sun.COM mlp->ml_size += bytes;
8500Sstevel@tonic-gate
85111474SJonathan.Adams@Sun.COM if (next->ml_prev)
85211474SJonathan.Adams@Sun.COM next->ml_prev->ml_next = mlp;
85311474SJonathan.Adams@Sun.COM mlp->ml_prev = next->ml_prev;
8540Sstevel@tonic-gate
8550Sstevel@tonic-gate kmem_free(next,
85611474SJonathan.Adams@Sun.COM sizeof (struct memlist));
8570Sstevel@tonic-gate goto add_done;
8580Sstevel@tonic-gate }
8590Sstevel@tonic-gate }
8600Sstevel@tonic-gate
86111474SJonathan.Adams@Sun.COM next->ml_size += bytes;
8620Sstevel@tonic-gate
8630Sstevel@tonic-gate goto add_done;
8640Sstevel@tonic-gate }
8650Sstevel@tonic-gate
8660Sstevel@tonic-gate /* don't overlap with next */
86711474SJonathan.Adams@Sun.COM if ((address + bytes) > next->ml_address) {
8680Sstevel@tonic-gate retval = -1;
8690Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist));
8700Sstevel@tonic-gate goto add_done;
8710Sstevel@tonic-gate }
8720Sstevel@tonic-gate
8730Sstevel@tonic-gate /*
8740Sstevel@tonic-gate * insert before next
8750Sstevel@tonic-gate */
87611474SJonathan.Adams@Sun.COM dst->ml_prev = prev;
87711474SJonathan.Adams@Sun.COM dst->ml_next = next;
87811474SJonathan.Adams@Sun.COM next->ml_prev = dst;
8790Sstevel@tonic-gate if (prev == NULL) {
8800Sstevel@tonic-gate *list = dst;
8810Sstevel@tonic-gate } else {
88211474SJonathan.Adams@Sun.COM prev->ml_next = dst;
8830Sstevel@tonic-gate }
8840Sstevel@tonic-gate goto add_done;
8850Sstevel@tonic-gate } /* end for */
8860Sstevel@tonic-gate
8870Sstevel@tonic-gate /*
8880Sstevel@tonic-gate * end of list, prev is valid and next is NULL
8890Sstevel@tonic-gate */
89011474SJonathan.Adams@Sun.COM prev->ml_next = dst;
89111474SJonathan.Adams@Sun.COM dst->ml_prev = prev;
89211474SJonathan.Adams@Sun.COM dst->ml_next = NULL;
8930Sstevel@tonic-gate
8940Sstevel@tonic-gate add_done:
8950Sstevel@tonic-gate
8960Sstevel@tonic-gate if (retval != -1)
8970Sstevel@tonic-gate *npgs += pages;
8980Sstevel@tonic-gate
8990Sstevel@tonic-gate return (retval);
9000Sstevel@tonic-gate }
9010Sstevel@tonic-gate
9020Sstevel@tonic-gate /*
9030Sstevel@tonic-gate * delete a span from the memscrub list
9040Sstevel@tonic-gate * subtract from memscrub_phys_pages
9050Sstevel@tonic-gate */
9060Sstevel@tonic-gate int
memscrub_delete_span(pfn_t pfn,pgcnt_t pages)9070Sstevel@tonic-gate memscrub_delete_span(pfn_t pfn, pgcnt_t pages)
9080Sstevel@tonic-gate {
9090Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
9100Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT;
9110Sstevel@tonic-gate struct memlist *dst, *next;
9120Sstevel@tonic-gate int retval = 0;
9130Sstevel@tonic-gate
9140Sstevel@tonic-gate mutex_enter(&memscrub_lock);
9150Sstevel@tonic-gate
9160Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
9170Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist);
9180Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
9190Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n",
9200Sstevel@tonic-gate address, bytes);
9210Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
9220Sstevel@tonic-gate
9230Sstevel@tonic-gate /*
9240Sstevel@tonic-gate * find struct memlist containing page
9250Sstevel@tonic-gate */
92611474SJonathan.Adams@Sun.COM for (next = memscrub_memlist; next != NULL; next = next->ml_next) {
92711474SJonathan.Adams@Sun.COM if ((address >= next->ml_address) &&
92811474SJonathan.Adams@Sun.COM (address < next->ml_address + next->ml_size))
9290Sstevel@tonic-gate break;
9300Sstevel@tonic-gate }
9310Sstevel@tonic-gate
9320Sstevel@tonic-gate /*
9330Sstevel@tonic-gate * if start address not in list
9340Sstevel@tonic-gate */
9350Sstevel@tonic-gate if (next == NULL) {
9360Sstevel@tonic-gate retval = -1;
9370Sstevel@tonic-gate goto delete_done;
9380Sstevel@tonic-gate }
9390Sstevel@tonic-gate
9400Sstevel@tonic-gate /*
9410Sstevel@tonic-gate * error if size goes off end of this struct memlist
9420Sstevel@tonic-gate */
94311474SJonathan.Adams@Sun.COM if (address + bytes > next->ml_address + next->ml_size) {
9440Sstevel@tonic-gate retval = -1;
9450Sstevel@tonic-gate goto delete_done;
9460Sstevel@tonic-gate }
9470Sstevel@tonic-gate
9480Sstevel@tonic-gate /*
9490Sstevel@tonic-gate * pages at beginning of struct memlist
9500Sstevel@tonic-gate */
95111474SJonathan.Adams@Sun.COM if (address == next->ml_address) {
9520Sstevel@tonic-gate /*
9530Sstevel@tonic-gate * if start & size match, delete from list
9540Sstevel@tonic-gate */
95511474SJonathan.Adams@Sun.COM if (bytes == next->ml_size) {
9560Sstevel@tonic-gate if (next == memscrub_memlist)
95711474SJonathan.Adams@Sun.COM memscrub_memlist = next->ml_next;
95811474SJonathan.Adams@Sun.COM if (next->ml_prev != NULL)
95911474SJonathan.Adams@Sun.COM next->ml_prev->ml_next = next->ml_next;
96011474SJonathan.Adams@Sun.COM if (next->ml_next != NULL)
96111474SJonathan.Adams@Sun.COM next->ml_next->ml_prev = next->ml_prev;
9620Sstevel@tonic-gate
9630Sstevel@tonic-gate kmem_free(next, sizeof (struct memlist));
9640Sstevel@tonic-gate } else {
9650Sstevel@tonic-gate /*
9660Sstevel@tonic-gate * increment start address by bytes
9670Sstevel@tonic-gate */
96811474SJonathan.Adams@Sun.COM next->ml_address += bytes;
96911474SJonathan.Adams@Sun.COM next->ml_size -= bytes;
9700Sstevel@tonic-gate }
9710Sstevel@tonic-gate goto delete_done;
9720Sstevel@tonic-gate }
9730Sstevel@tonic-gate
9740Sstevel@tonic-gate /*
9750Sstevel@tonic-gate * pages at end of struct memlist
9760Sstevel@tonic-gate */
97711474SJonathan.Adams@Sun.COM if (address + bytes == next->ml_address + next->ml_size) {
9780Sstevel@tonic-gate /*
9790Sstevel@tonic-gate * decrement size by bytes
9800Sstevel@tonic-gate */
98111474SJonathan.Adams@Sun.COM next->ml_size -= bytes;
9820Sstevel@tonic-gate goto delete_done;
9830Sstevel@tonic-gate }
9840Sstevel@tonic-gate
9850Sstevel@tonic-gate /*
9860Sstevel@tonic-gate * delete a span in the middle of the struct memlist
9870Sstevel@tonic-gate */
9880Sstevel@tonic-gate {
9890Sstevel@tonic-gate /*
9900Sstevel@tonic-gate * create a new struct memlist
9910Sstevel@tonic-gate */
9920Sstevel@tonic-gate dst = (struct memlist *)
9930Sstevel@tonic-gate kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
9940Sstevel@tonic-gate
9950Sstevel@tonic-gate if (dst == NULL) {
9960Sstevel@tonic-gate retval = -1;
9970Sstevel@tonic-gate goto delete_done;
9980Sstevel@tonic-gate }
9990Sstevel@tonic-gate
10000Sstevel@tonic-gate /*
10010Sstevel@tonic-gate * existing struct memlist gets address
10020Sstevel@tonic-gate * and size up to pfn
10030Sstevel@tonic-gate */
100411474SJonathan.Adams@Sun.COM dst->ml_address = address + bytes;
100511474SJonathan.Adams@Sun.COM dst->ml_size =
100611474SJonathan.Adams@Sun.COM (next->ml_address + next->ml_size) - dst->ml_address;
100711474SJonathan.Adams@Sun.COM next->ml_size = address - next->ml_address;
10080Sstevel@tonic-gate
10090Sstevel@tonic-gate /*
10100Sstevel@tonic-gate * new struct memlist gets address starting
10110Sstevel@tonic-gate * after pfn, until end
10120Sstevel@tonic-gate */
10130Sstevel@tonic-gate
10140Sstevel@tonic-gate /*
10150Sstevel@tonic-gate * link in new memlist after old
10160Sstevel@tonic-gate */
101711474SJonathan.Adams@Sun.COM dst->ml_next = next->ml_next;
101811474SJonathan.Adams@Sun.COM dst->ml_prev = next;
10190Sstevel@tonic-gate
102011474SJonathan.Adams@Sun.COM if (next->ml_next != NULL)
102111474SJonathan.Adams@Sun.COM next->ml_next->ml_prev = dst;
102211474SJonathan.Adams@Sun.COM next->ml_next = dst;
10230Sstevel@tonic-gate }
10240Sstevel@tonic-gate
10250Sstevel@tonic-gate delete_done:
10260Sstevel@tonic-gate if (retval != -1) {
10270Sstevel@tonic-gate memscrub_phys_pages -= pages;
10280Sstevel@tonic-gate if (memscrub_phys_pages == 0)
10290Sstevel@tonic-gate disable_memscrub = 1;
10300Sstevel@tonic-gate }
10310Sstevel@tonic-gate
10320Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
10330Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist After", memscrub_memlist);
10340Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
10350Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
10360Sstevel@tonic-gate
10370Sstevel@tonic-gate mutex_exit(&memscrub_lock);
10380Sstevel@tonic-gate return (retval);
10390Sstevel@tonic-gate }
10400Sstevel@tonic-gate
10410Sstevel@tonic-gate static void
memscrub_scan(uint_t blks,ms_paddr_t src)10420Sstevel@tonic-gate memscrub_scan(uint_t blks, ms_paddr_t src)
10430Sstevel@tonic-gate {
10440Sstevel@tonic-gate uint_t psz, bpp, pgsread;
10450Sstevel@tonic-gate pfn_t pfn;
10460Sstevel@tonic-gate ms_paddr_t pa;
10470Sstevel@tonic-gate caddr_t va;
10480Sstevel@tonic-gate on_trap_data_t otd;
10492895Svb70745 int scan_mmu_pagesize = 0;
10502895Svb70745 int retired_pages = 0;
10510Sstevel@tonic-gate
10520Sstevel@tonic-gate extern void memscrub_read(caddr_t src, uint_t blks);
10530Sstevel@tonic-gate
10540Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock));
10550Sstevel@tonic-gate
10560Sstevel@tonic-gate pgsread = 0;
10570Sstevel@tonic-gate pa = src;
10580Sstevel@tonic-gate
10592895Svb70745 if (memscrub_page_retire_span_list != NULL) {
10602895Svb70745 if (memscrub_page_retire_span_search(src)) {
10612895Svb70745 /* retired pages in current span */
10622895Svb70745 scan_mmu_pagesize = 1;
10632895Svb70745 }
10642895Svb70745 }
10652895Svb70745
10662895Svb70745 #ifdef MEMSCRUB_DEBUG
10672895Svb70745 cmn_err(CE_NOTE, "scan_mmu_pagesize = %d\n" scan_mmu_pagesize);
10682895Svb70745 #endif /* MEMSCRUB_DEBUG */
10692895Svb70745
10700Sstevel@tonic-gate while (blks != 0) {
10710Sstevel@tonic-gate /* Ensure the PA is properly aligned */
10720Sstevel@tonic-gate if (((pa & MMU_PAGEMASK4M) == pa) &&
107311474SJonathan.Adams@Sun.COM (blks >= MEMSCRUB_BPP4M)) {
10740Sstevel@tonic-gate psz = MMU_PAGESIZE4M;
10750Sstevel@tonic-gate bpp = MEMSCRUB_BPP4M;
10760Sstevel@tonic-gate } else if (((pa & MMU_PAGEMASK512K) == pa) &&
107711474SJonathan.Adams@Sun.COM (blks >= MEMSCRUB_BPP512K)) {
10780Sstevel@tonic-gate psz = MMU_PAGESIZE512K;
10790Sstevel@tonic-gate bpp = MEMSCRUB_BPP512K;
10800Sstevel@tonic-gate } else if (((pa & MMU_PAGEMASK64K) == pa) &&
108111474SJonathan.Adams@Sun.COM (blks >= MEMSCRUB_BPP64K)) {
10820Sstevel@tonic-gate psz = MMU_PAGESIZE64K;
10830Sstevel@tonic-gate bpp = MEMSCRUB_BPP64K;
10840Sstevel@tonic-gate } else if ((pa & MMU_PAGEMASK) == pa) {
10850Sstevel@tonic-gate psz = MMU_PAGESIZE;
10860Sstevel@tonic-gate bpp = MEMSCRUB_BPP;
10870Sstevel@tonic-gate } else {
10880Sstevel@tonic-gate if (memscrub_verbose) {
10890Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber ignoring "
10900Sstevel@tonic-gate "non-page aligned block starting at 0x%"
10910Sstevel@tonic-gate PRIx64, src);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate return;
10940Sstevel@tonic-gate }
10950Sstevel@tonic-gate if (blks < bpp) bpp = blks;
10960Sstevel@tonic-gate
10970Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
10980Sstevel@tonic-gate cmn_err(CE_NOTE, "Going to run psz=%x, "
10990Sstevel@tonic-gate "bpp=%x pa=%llx\n", psz, bpp, pa);
11000Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
11010Sstevel@tonic-gate
11020Sstevel@tonic-gate /*
11030Sstevel@tonic-gate * MEMSCRUBBASE is a 4MB aligned page in the
11040Sstevel@tonic-gate * kernel so that we can quickly map the PA
11050Sstevel@tonic-gate * to a VA for the block loads performed in
11060Sstevel@tonic-gate * memscrub_read.
11070Sstevel@tonic-gate */
11080Sstevel@tonic-gate pfn = mmu_btop(pa);
11090Sstevel@tonic-gate va = (caddr_t)MEMSCRUBBASE;
11100Sstevel@tonic-gate hat_devload(kas.a_hat, va, psz, pfn, PROT_READ,
111111474SJonathan.Adams@Sun.COM HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
11120Sstevel@tonic-gate
11130Sstevel@tonic-gate /*
11140Sstevel@tonic-gate * Can't allow the memscrubber to migrate across CPUs as
11150Sstevel@tonic-gate * we need to know whether CEEN is enabled for the current
11160Sstevel@tonic-gate * CPU to enable us to scrub the memory. Don't use
11170Sstevel@tonic-gate * kpreempt_disable as the time we take to scan a span (even
11180Sstevel@tonic-gate * without cpu_check_ce having to manually cpu_check_block)
11190Sstevel@tonic-gate * is too long to hold a higher priority thread (eg, RT)
11200Sstevel@tonic-gate * off cpu.
11210Sstevel@tonic-gate */
11220Sstevel@tonic-gate thread_affinity_set(curthread, CPU_CURRENT);
11230Sstevel@tonic-gate
11240Sstevel@tonic-gate /*
11250Sstevel@tonic-gate * Protect read scrub from async faults. For now, we simply
11260Sstevel@tonic-gate * maintain a count of such faults caught.
11270Sstevel@tonic-gate */
11280Sstevel@tonic-gate
1129*11874SVijay.Balakrishna@Sun.COM if (!on_trap(&otd, OT_DATA_EC) && !scan_mmu_pagesize) {
11300Sstevel@tonic-gate memscrub_read(va, bpp);
11310Sstevel@tonic-gate /*
11320Sstevel@tonic-gate * Check if CEs require logging
11330Sstevel@tonic-gate */
11340Sstevel@tonic-gate cpu_check_ce(SCRUBBER_CEEN_CHECK,
11350Sstevel@tonic-gate (uint64_t)pa, va, psz);
1136102Srjnoe no_trap();
11370Sstevel@tonic-gate thread_affinity_clear(curthread);
11380Sstevel@tonic-gate } else {
11390Sstevel@tonic-gate no_trap();
11400Sstevel@tonic-gate thread_affinity_clear(curthread);
11410Sstevel@tonic-gate
11420Sstevel@tonic-gate /*
11430Sstevel@tonic-gate * Got an async error..
11440Sstevel@tonic-gate * Try rescanning it at MMU_PAGESIZE
11450Sstevel@tonic-gate * granularity if we were trying to
11460Sstevel@tonic-gate * read at a larger page size.
11470Sstevel@tonic-gate * This is to ensure we continue to
11480Sstevel@tonic-gate * scan the rest of the span.
11492895Svb70745 * OR scanning MMU_PAGESIZE granularity to avoid
11502895Svb70745 * reading retired pages memory when scan_mmu_pagesize
11512895Svb70745 * is set.
11520Sstevel@tonic-gate */
11532895Svb70745 if (psz > MMU_PAGESIZE || scan_mmu_pagesize) {
115411873SVijay.Balakrishna@Sun.COM caddr_t vaddr = va;
115511873SVijay.Balakrishna@Sun.COM ms_paddr_t paddr = pa;
115611873SVijay.Balakrishna@Sun.COM int tmp = 0;
115711873SVijay.Balakrishna@Sun.COM for (; tmp < bpp; tmp += MEMSCRUB_BPP) {
115811873SVijay.Balakrishna@Sun.COM /* Don't scrub retired pages */
115911873SVijay.Balakrishna@Sun.COM if (page_retire_check(paddr, NULL)
116011873SVijay.Balakrishna@Sun.COM == 0) {
116111873SVijay.Balakrishna@Sun.COM vaddr += MMU_PAGESIZE;
116211873SVijay.Balakrishna@Sun.COM paddr += MMU_PAGESIZE;
116311873SVijay.Balakrishna@Sun.COM retired_pages++;
116411873SVijay.Balakrishna@Sun.COM continue;
116511873SVijay.Balakrishna@Sun.COM }
116611873SVijay.Balakrishna@Sun.COM thread_affinity_set(curthread,
116711873SVijay.Balakrishna@Sun.COM CPU_CURRENT);
116811873SVijay.Balakrishna@Sun.COM if (!on_trap(&otd, OT_DATA_EC)) {
116911873SVijay.Balakrishna@Sun.COM memscrub_read(vaddr,
117011873SVijay.Balakrishna@Sun.COM MEMSCRUB_BPP);
117111873SVijay.Balakrishna@Sun.COM cpu_check_ce(
117211873SVijay.Balakrishna@Sun.COM SCRUBBER_CEEN_CHECK,
117311873SVijay.Balakrishna@Sun.COM (uint64_t)paddr, vaddr,
117411873SVijay.Balakrishna@Sun.COM MMU_PAGESIZE);
117511873SVijay.Balakrishna@Sun.COM no_trap();
117611873SVijay.Balakrishna@Sun.COM } else {
117711873SVijay.Balakrishna@Sun.COM no_trap();
117811873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(errors_found);
117911873SVijay.Balakrishna@Sun.COM }
118011873SVijay.Balakrishna@Sun.COM thread_affinity_clear(curthread);
11812895Svb70745 vaddr += MMU_PAGESIZE;
11822895Svb70745 paddr += MMU_PAGESIZE;
11832895Svb70745 }
11840Sstevel@tonic-gate }
11850Sstevel@tonic-gate }
11860Sstevel@tonic-gate hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK);
11870Sstevel@tonic-gate
11880Sstevel@tonic-gate blks -= bpp;
11890Sstevel@tonic-gate pa += psz;
11900Sstevel@tonic-gate pgsread++;
11910Sstevel@tonic-gate }
11922895Svb70745
11932895Svb70745 /*
11942895Svb70745 * If just finished scrubbing MMU_PAGESIZE at a time, but no retired
11952895Svb70745 * pages found so delete span from global list.
11962895Svb70745 */
11972895Svb70745 if (scan_mmu_pagesize && retired_pages == 0)
11982895Svb70745 memscrub_page_retire_span_delete(src);
11992895Svb70745
12002895Svb70745 /*
12012895Svb70745 * Encountered CE/UE on a retired page during memscrub read of current
12022895Svb70745 * span. Adding span to global list to enable avoid reading further.
12032895Svb70745 */
12042895Svb70745 if (add_to_page_retire_list) {
12052895Svb70745 if (!memscrub_page_retire_span_search(src))
12062895Svb70745 memscrub_page_retire_span_add(src);
12072895Svb70745 add_to_page_retire_list = 0;
12082895Svb70745 }
12092895Svb70745
12100Sstevel@tonic-gate if (memscrub_verbose) {
12110Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting "
12120Sstevel@tonic-gate "at 0x%" PRIx64, pgsread, src);
12130Sstevel@tonic-gate }
12140Sstevel@tonic-gate }
12150Sstevel@tonic-gate
12160Sstevel@tonic-gate /*
12172895Svb70745 * Called by cpu_async_log_err() when memscrub read causes
12182895Svb70745 * CE/UE on a retired page.
12192895Svb70745 */
12202895Svb70745 void
memscrub_induced_error(void)12212895Svb70745 memscrub_induced_error(void)
12222895Svb70745 {
12232895Svb70745 add_to_page_retire_list = 1;
12242895Svb70745 }
12252895Svb70745
122611873SVijay.Balakrishna@Sun.COM /*
122711873SVijay.Balakrishna@Sun.COM * Called by page_retire() when toxic pages cannot be retired
122811873SVijay.Balakrishna@Sun.COM * immediately and are scheduled for retire. Memscrubber stops
122911873SVijay.Balakrishna@Sun.COM * scrubbing them to avoid further CE/UEs.
123011873SVijay.Balakrishna@Sun.COM */
123111873SVijay.Balakrishna@Sun.COM void
memscrub_notify(ms_paddr_t pa)123211873SVijay.Balakrishna@Sun.COM memscrub_notify(ms_paddr_t pa)
123311873SVijay.Balakrishna@Sun.COM {
123411873SVijay.Balakrishna@Sun.COM mutex_enter(&memscrub_lock);
123511873SVijay.Balakrishna@Sun.COM if (!memscrub_page_retire_span_search(pa))
123611873SVijay.Balakrishna@Sun.COM memscrub_page_retire_span_add(pa);
123711873SVijay.Balakrishna@Sun.COM mutex_exit(&memscrub_lock);
123811873SVijay.Balakrishna@Sun.COM }
12392895Svb70745
12402895Svb70745 /*
124111873SVijay.Balakrishna@Sun.COM * Called by memscrub_scan() and memscrub_notify().
12422895Svb70745 * pa: physical address of span with CE/UE, add to global list.
12432895Svb70745 */
12442895Svb70745 static void
memscrub_page_retire_span_add(ms_paddr_t pa)12452895Svb70745 memscrub_page_retire_span_add(ms_paddr_t pa)
12462895Svb70745 {
12472895Svb70745 memscrub_page_retire_span_t *new_span;
12482895Svb70745
12492895Svb70745 new_span = (memscrub_page_retire_span_t *)
12502895Svb70745 kmem_zalloc(sizeof (memscrub_page_retire_span_t), KM_NOSLEEP);
12512895Svb70745
12522895Svb70745 if (new_span == NULL) {
12532895Svb70745 #ifdef MEMSCRUB_DEBUG
12542895Svb70745 cmn_err(CE_NOTE, "failed to allocate new span - span with"
12552895Svb70745 " retired page/s not tracked.\n");
12562895Svb70745 #endif /* MEMSCRUB_DEBUG */
12572895Svb70745 return;
12582895Svb70745 }
12592895Svb70745
12602895Svb70745 new_span->address = pa;
12612895Svb70745 new_span->next = memscrub_page_retire_span_list;
12622895Svb70745 memscrub_page_retire_span_list = new_span;
12632895Svb70745 }
12642895Svb70745
12652895Svb70745 /*
12662895Svb70745 * Called by memscrub_scan().
12672895Svb70745 * pa: physical address of span to be removed from global list.
12682895Svb70745 */
12692895Svb70745 static void
memscrub_page_retire_span_delete(ms_paddr_t pa)12702895Svb70745 memscrub_page_retire_span_delete(ms_paddr_t pa)
12712895Svb70745 {
12722895Svb70745 memscrub_page_retire_span_t *prev_span, *next_span;
12732895Svb70745
12742895Svb70745 prev_span = memscrub_page_retire_span_list;
12752895Svb70745 next_span = memscrub_page_retire_span_list->next;
12762895Svb70745
12772895Svb70745 if (pa == prev_span->address) {
12782895Svb70745 memscrub_page_retire_span_list = next_span;
12792895Svb70745 kmem_free(prev_span, sizeof (memscrub_page_retire_span_t));
12802895Svb70745 return;
12812895Svb70745 }
12822895Svb70745
12832895Svb70745 while (next_span) {
12842895Svb70745 if (pa == next_span->address) {
12852895Svb70745 prev_span->next = next_span->next;
12862895Svb70745 kmem_free(next_span,
12872895Svb70745 sizeof (memscrub_page_retire_span_t));
12882895Svb70745 return;
12892895Svb70745 }
12902895Svb70745 prev_span = next_span;
12912895Svb70745 next_span = next_span->next;
12922895Svb70745 }
12932895Svb70745 }
12942895Svb70745
12952895Svb70745 /*
129611873SVijay.Balakrishna@Sun.COM * Called by memscrub_scan() and memscrub_notify().
12972895Svb70745 * pa: physical address of span to be searched in global list.
12982895Svb70745 */
12992895Svb70745 static int
memscrub_page_retire_span_search(ms_paddr_t pa)13002895Svb70745 memscrub_page_retire_span_search(ms_paddr_t pa)
13012895Svb70745 {
13022895Svb70745 memscrub_page_retire_span_t *next_span = memscrub_page_retire_span_list;
13032895Svb70745
13042895Svb70745 while (next_span) {
13052895Svb70745 if (pa == next_span->address)
13062895Svb70745 return (1);
13072895Svb70745 next_span = next_span->next;
13082895Svb70745 }
13092895Svb70745 return (0);
13102895Svb70745 }
13112895Svb70745
13122895Svb70745 /*
13132895Svb70745 * Called from new_memscrub() as a result of memory delete.
13142895Svb70745 * Using page_numtopp_nolock() to determine if we have valid PA.
13152895Svb70745 */
13162895Svb70745 static void
memscrub_page_retire_span_list_update(void)13172895Svb70745 memscrub_page_retire_span_list_update(void)
13182895Svb70745 {
13192895Svb70745 memscrub_page_retire_span_t *prev, *cur, *next;
13202895Svb70745
13212895Svb70745 if (memscrub_page_retire_span_list == NULL)
13222895Svb70745 return;
13232895Svb70745
13242895Svb70745 prev = cur = memscrub_page_retire_span_list;
13252895Svb70745 next = cur->next;
13262895Svb70745
13272895Svb70745 while (cur) {
13282895Svb70745 if (page_numtopp_nolock(mmu_btop(cur->address)) == NULL) {
13292895Svb70745 if (cur == memscrub_page_retire_span_list) {
13302895Svb70745 memscrub_page_retire_span_list = next;
13312895Svb70745 kmem_free(cur,
13322895Svb70745 sizeof (memscrub_page_retire_span_t));
13332895Svb70745 prev = cur = memscrub_page_retire_span_list;
13342895Svb70745 } else {
13352895Svb70745 prev->next = cur->next;
13362895Svb70745 kmem_free(cur,
13372895Svb70745 sizeof (memscrub_page_retire_span_t));
13382895Svb70745 cur = next;
13392895Svb70745 }
13402895Svb70745 } else {
13412895Svb70745 prev = cur;
13422895Svb70745 cur = next;
13432895Svb70745 }
13442895Svb70745 if (cur != NULL)
13452895Svb70745 next = cur->next;
13462895Svb70745 }
13472895Svb70745 }
13482895Svb70745
13492895Svb70745 /*
13500Sstevel@tonic-gate * The memory add/delete callback mechanism does not pass in the
13510Sstevel@tonic-gate * page ranges. The phys_install list has been updated though, so
13520Sstevel@tonic-gate * create a new scrub list from it.
13530Sstevel@tonic-gate */
13540Sstevel@tonic-gate
13550Sstevel@tonic-gate static int
new_memscrub(int update_page_retire_list)13562895Svb70745 new_memscrub(int update_page_retire_list)
13570Sstevel@tonic-gate {
13580Sstevel@tonic-gate struct memlist *src, *list, *old_list;
13590Sstevel@tonic-gate uint_t npgs;
13600Sstevel@tonic-gate
13610Sstevel@tonic-gate /*
13620Sstevel@tonic-gate * copy phys_install to memscrub_memlist
13630Sstevel@tonic-gate */
13640Sstevel@tonic-gate list = NULL;
13650Sstevel@tonic-gate npgs = 0;
13660Sstevel@tonic-gate memlist_read_lock();
136711474SJonathan.Adams@Sun.COM for (src = phys_install; src; src = src->ml_next) {
136811474SJonathan.Adams@Sun.COM if (memscrub_add_span_gen((pfn_t)(src->ml_address >> PAGESHIFT),
136911474SJonathan.Adams@Sun.COM (pgcnt_t)(src->ml_size >> PAGESHIFT), &list, &npgs)) {
13700Sstevel@tonic-gate memlist_read_unlock();
13710Sstevel@tonic-gate while (list) {
13720Sstevel@tonic-gate struct memlist *el;
13730Sstevel@tonic-gate
13740Sstevel@tonic-gate el = list;
137511474SJonathan.Adams@Sun.COM list = list->ml_next;
13760Sstevel@tonic-gate kmem_free(el, sizeof (struct memlist));
13770Sstevel@tonic-gate }
13780Sstevel@tonic-gate return (-1);
13790Sstevel@tonic-gate }
13800Sstevel@tonic-gate }
13810Sstevel@tonic-gate memlist_read_unlock();
13820Sstevel@tonic-gate
13830Sstevel@tonic-gate mutex_enter(&memscrub_lock);
13840Sstevel@tonic-gate memscrub_phys_pages = npgs;
13850Sstevel@tonic-gate old_list = memscrub_memlist;
13860Sstevel@tonic-gate memscrub_memlist = list;
13872895Svb70745
13882895Svb70745 if (update_page_retire_list)
13892895Svb70745 memscrub_page_retire_span_list_update();
13902895Svb70745
13910Sstevel@tonic-gate mutex_exit(&memscrub_lock);
13920Sstevel@tonic-gate
13930Sstevel@tonic-gate while (old_list) {
13940Sstevel@tonic-gate struct memlist *el;
13950Sstevel@tonic-gate
13960Sstevel@tonic-gate el = old_list;
139711474SJonathan.Adams@Sun.COM old_list = old_list->ml_next;
13980Sstevel@tonic-gate kmem_free(el, sizeof (struct memlist));
13990Sstevel@tonic-gate }
14002895Svb70745
14010Sstevel@tonic-gate return (0);
14020Sstevel@tonic-gate }
14030Sstevel@tonic-gate
14040Sstevel@tonic-gate /*ARGSUSED*/
14050Sstevel@tonic-gate static void
memscrub_mem_config_post_add(void * arg,pgcnt_t delta_pages)14060Sstevel@tonic-gate memscrub_mem_config_post_add(
14070Sstevel@tonic-gate void *arg,
14080Sstevel@tonic-gate pgcnt_t delta_pages)
14090Sstevel@tonic-gate {
14100Sstevel@tonic-gate /*
14110Sstevel@tonic-gate * We increment pause_memscrub before entering new_memscrub(). This
14120Sstevel@tonic-gate * will force the memscrubber to sleep, allowing the DR callback
14130Sstevel@tonic-gate * thread to acquire memscrub_lock in new_memscrub(). The use of
14140Sstevel@tonic-gate * atomic_add_32() allows concurrent memory DR operations to use the
14150Sstevel@tonic-gate * callbacks safely.
14160Sstevel@tonic-gate */
14170Sstevel@tonic-gate atomic_add_32(&pause_memscrub, 1);
14180Sstevel@tonic-gate ASSERT(pause_memscrub != 0);
14190Sstevel@tonic-gate
14200Sstevel@tonic-gate /*
14210Sstevel@tonic-gate * "Don't care" if we are not scrubbing new memory.
14220Sstevel@tonic-gate */
14232895Svb70745 (void) new_memscrub(0); /* retain page retire list */
14240Sstevel@tonic-gate
14250Sstevel@tonic-gate /* Restore the pause setting. */
14260Sstevel@tonic-gate atomic_add_32(&pause_memscrub, -1);
14270Sstevel@tonic-gate }
14280Sstevel@tonic-gate
14290Sstevel@tonic-gate /*ARGSUSED*/
14300Sstevel@tonic-gate static int
memscrub_mem_config_pre_del(void * arg,pgcnt_t delta_pages)14310Sstevel@tonic-gate memscrub_mem_config_pre_del(
14320Sstevel@tonic-gate void *arg,
14330Sstevel@tonic-gate pgcnt_t delta_pages)
14340Sstevel@tonic-gate {
14350Sstevel@tonic-gate /* Nothing to do. */
14360Sstevel@tonic-gate return (0);
14370Sstevel@tonic-gate }
14380Sstevel@tonic-gate
14390Sstevel@tonic-gate /*ARGSUSED*/
14400Sstevel@tonic-gate static void
memscrub_mem_config_post_del(void * arg,pgcnt_t delta_pages,int cancelled)14410Sstevel@tonic-gate memscrub_mem_config_post_del(
14420Sstevel@tonic-gate void *arg,
14430Sstevel@tonic-gate pgcnt_t delta_pages,
14440Sstevel@tonic-gate int cancelled)
14450Sstevel@tonic-gate {
14460Sstevel@tonic-gate /*
14470Sstevel@tonic-gate * We increment pause_memscrub before entering new_memscrub(). This
14480Sstevel@tonic-gate * will force the memscrubber to sleep, allowing the DR callback
14490Sstevel@tonic-gate * thread to acquire memscrub_lock in new_memscrub(). The use of
14500Sstevel@tonic-gate * atomic_add_32() allows concurrent memory DR operations to use the
14510Sstevel@tonic-gate * callbacks safely.
14520Sstevel@tonic-gate */
14530Sstevel@tonic-gate atomic_add_32(&pause_memscrub, 1);
14540Sstevel@tonic-gate ASSERT(pause_memscrub != 0);
14550Sstevel@tonic-gate
14560Sstevel@tonic-gate /*
14570Sstevel@tonic-gate * Must stop scrubbing deleted memory as it may be disconnected.
14580Sstevel@tonic-gate */
14592895Svb70745 if (new_memscrub(1)) { /* update page retire list */
14600Sstevel@tonic-gate disable_memscrub = 1;
14610Sstevel@tonic-gate }
14620Sstevel@tonic-gate
14630Sstevel@tonic-gate /* Restore the pause setting. */
14640Sstevel@tonic-gate atomic_add_32(&pause_memscrub, -1);
14650Sstevel@tonic-gate }
14660Sstevel@tonic-gate
14670Sstevel@tonic-gate static kphysm_setup_vector_t memscrub_mem_config_vec = {
14680Sstevel@tonic-gate KPHYSM_SETUP_VECTOR_VERSION,
14690Sstevel@tonic-gate memscrub_mem_config_post_add,
14700Sstevel@tonic-gate memscrub_mem_config_pre_del,
14710Sstevel@tonic-gate memscrub_mem_config_post_del,
14720Sstevel@tonic-gate };
14730Sstevel@tonic-gate
14740Sstevel@tonic-gate static void
memscrub_init_mem_config()14750Sstevel@tonic-gate memscrub_init_mem_config()
14760Sstevel@tonic-gate {
14770Sstevel@tonic-gate int ret;
14780Sstevel@tonic-gate
14790Sstevel@tonic-gate ret = kphysm_setup_func_register(&memscrub_mem_config_vec,
14800Sstevel@tonic-gate (void *)NULL);
14810Sstevel@tonic-gate ASSERT(ret == 0);
14820Sstevel@tonic-gate }
14830Sstevel@tonic-gate
14840Sstevel@tonic-gate static void
memscrub_uninit_mem_config()14850Sstevel@tonic-gate memscrub_uninit_mem_config()
14860Sstevel@tonic-gate {
14870Sstevel@tonic-gate /* This call is OK if the register call was not done. */
14880Sstevel@tonic-gate kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL);
14890Sstevel@tonic-gate }
1490