10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52895Svb70745 * Common Development and Distribution License (the "License"). 62895Svb70745 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 2211474SJonathan.Adams@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* 270Sstevel@tonic-gate * sun4u Memory Scrubbing 280Sstevel@tonic-gate * 290Sstevel@tonic-gate * On detection of a correctable memory ECC error, the sun4u kernel 300Sstevel@tonic-gate * returns the corrected data to the requester and re-writes it 310Sstevel@tonic-gate * to memory (DRAM). So if the correctable error was transient, 320Sstevel@tonic-gate * the read has effectively been cleaned (scrubbed) from memory. 330Sstevel@tonic-gate * 340Sstevel@tonic-gate * Scrubbing thus reduces the likelyhood that multiple transient errors 350Sstevel@tonic-gate * will occur in the same memory word, making uncorrectable errors due 360Sstevel@tonic-gate * to transients less likely. 370Sstevel@tonic-gate * 380Sstevel@tonic-gate * Thus is born the desire that every memory location be periodically 390Sstevel@tonic-gate * accessed. 400Sstevel@tonic-gate * 410Sstevel@tonic-gate * This file implements a memory scrubbing thread. This scrubber 420Sstevel@tonic-gate * guarantees that all of physical memory is accessed periodically 430Sstevel@tonic-gate * (memscrub_period_sec -- 12 hours). 440Sstevel@tonic-gate * 450Sstevel@tonic-gate * It attempts to do this as unobtrusively as possible. The thread 460Sstevel@tonic-gate * schedules itself to wake up at an interval such that if it reads 473876Spt157919 * memscrub_span_pages (32MB) on each wakeup, it will read all of physical 480Sstevel@tonic-gate * memory in in memscrub_period_sec (12 hours). 490Sstevel@tonic-gate * 503876Spt157919 * The scrubber uses the block load and prefetch hardware to read memory 513876Spt157919 * @ 1300MB/s, so it reads spans of 32MB in 0.025 seconds. Unlike the 523876Spt157919 * original sun4d scrubber the sun4u scrubber does not read ahead if the 533876Spt157919 * system is idle because we can read memory very efficently. 540Sstevel@tonic-gate * 550Sstevel@tonic-gate * The scrubber maintains a private copy of the phys_install memory list 560Sstevel@tonic-gate * to keep track of what memory should be scrubbed. 570Sstevel@tonic-gate * 580Sstevel@tonic-gate * The global routines memscrub_add_span() and memscrub_delete_span() are 590Sstevel@tonic-gate * used to add and delete from this list. If hotplug memory is later 600Sstevel@tonic-gate * supported these two routines can be used to notify the scrubber of 610Sstevel@tonic-gate * memory configuration changes. 620Sstevel@tonic-gate * 630Sstevel@tonic-gate * The following parameters can be set via /etc/system 640Sstevel@tonic-gate * 650Sstevel@tonic-gate * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB) 660Sstevel@tonic-gate * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 670Sstevel@tonic-gate * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) 680Sstevel@tonic-gate * memscrub_delay_start_sec = (5 minutes) 690Sstevel@tonic-gate * memscrub_verbose = (0) 700Sstevel@tonic-gate * memscrub_override_ticks = (1 tick) 710Sstevel@tonic-gate * disable_memscrub = (0) 720Sstevel@tonic-gate * pause_memscrub = (0) 730Sstevel@tonic-gate * read_all_memscrub = (0) 740Sstevel@tonic-gate * 750Sstevel@tonic-gate * The scrubber will print NOTICE messages of what it is doing if 760Sstevel@tonic-gate * "memscrub_verbose" is set. 770Sstevel@tonic-gate * 780Sstevel@tonic-gate * If the scrubber's sleep time calculation drops to zero ticks, 790Sstevel@tonic-gate * memscrub_override_ticks will be used as the sleep time instead. The 803876Spt157919 * sleep time should only drop to zero on a system with over 131.84 810Sstevel@tonic-gate * terabytes of memory, or where the default scrubber parameters have 820Sstevel@tonic-gate * been adjusted. For example, reducing memscrub_span_pages or 830Sstevel@tonic-gate * memscrub_period_sec causes the sleep time to drop to zero with less 840Sstevel@tonic-gate * memory. Note that since the sleep time is calculated in clock ticks, 850Sstevel@tonic-gate * using hires clock ticks allows for more memory before the sleep time 860Sstevel@tonic-gate * becomes zero. 870Sstevel@tonic-gate * 880Sstevel@tonic-gate * The scrubber will exit (or never be started) if it finds the variable 890Sstevel@tonic-gate * "disable_memscrub" set. 900Sstevel@tonic-gate * 910Sstevel@tonic-gate * The scrubber will pause (not read memory) when "pause_memscrub" 920Sstevel@tonic-gate * is set. It will check the state of pause_memscrub at each wakeup 930Sstevel@tonic-gate * period. The scrubber will not make up for lost time. If you 940Sstevel@tonic-gate * pause the scrubber for a prolonged period of time you can use 950Sstevel@tonic-gate * the "read_all_memscrub" switch (see below) to catch up. In addition, 960Sstevel@tonic-gate * pause_memscrub is used internally by the post memory DR callbacks. 970Sstevel@tonic-gate * It is set for the small period of time during which the callbacks 980Sstevel@tonic-gate * are executing. This ensures "memscrub_lock" will be released, 990Sstevel@tonic-gate * allowing the callbacks to finish. 1000Sstevel@tonic-gate * 1010Sstevel@tonic-gate * The scrubber will read all memory if "read_all_memscrub" is set. 1020Sstevel@tonic-gate * The normal span read will also occur during the wakeup. 1030Sstevel@tonic-gate * 1040Sstevel@tonic-gate * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system 1050Sstevel@tonic-gate * must have before we'll start the scrubber. 1060Sstevel@tonic-gate * 1073876Spt157919 * MEMSCRUB_DFL_SPAN_PAGES (32MB) is based on the guess that 0.025 sec 1080Sstevel@tonic-gate * is a "good" amount of minimum time for the thread to run at a time. 1090Sstevel@tonic-gate * 1100Sstevel@tonic-gate * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 1110Sstevel@tonic-gate * twice the frequency the hardware folk estimated would be necessary. 1120Sstevel@tonic-gate * 1130Sstevel@tonic-gate * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption 1140Sstevel@tonic-gate * that the scurbber should get its fair share of time (since it 1150Sstevel@tonic-gate * is short). At a priority of 0 the scrubber will be starved. 1160Sstevel@tonic-gate */ 1170Sstevel@tonic-gate 1180Sstevel@tonic-gate #include <sys/systm.h> /* timeout, types, t_lock */ 1190Sstevel@tonic-gate #include <sys/cmn_err.h> 1200Sstevel@tonic-gate #include <sys/sysmacros.h> /* MIN */ 1210Sstevel@tonic-gate #include <sys/memlist.h> /* memlist */ 1220Sstevel@tonic-gate #include <sys/mem_config.h> /* memory add/delete */ 1230Sstevel@tonic-gate #include <sys/kmem.h> /* KMEM_NOSLEEP */ 1240Sstevel@tonic-gate #include <sys/cpuvar.h> /* ncpus_online */ 1250Sstevel@tonic-gate #include <sys/debug.h> /* ASSERTs */ 1260Sstevel@tonic-gate #include <sys/machsystm.h> /* lddphys */ 1270Sstevel@tonic-gate #include <sys/cpu_module.h> /* vtag_flushpage */ 1280Sstevel@tonic-gate #include <sys/kstat.h> 1290Sstevel@tonic-gate #include <sys/atomic.h> /* atomic_add_32 */ 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate #include <vm/hat.h> 1320Sstevel@tonic-gate #include <vm/seg_kmem.h> 1330Sstevel@tonic-gate #include <vm/hat_sfmmu.h> /* XXX FIXME - delete */ 1340Sstevel@tonic-gate 1350Sstevel@tonic-gate #include <sys/time.h> 1360Sstevel@tonic-gate #include <sys/callb.h> /* CPR callback */ 1370Sstevel@tonic-gate #include <sys/ontrap.h> 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate /* 1400Sstevel@tonic-gate * Should really have paddr_t defined, but it is broken. Use 1410Sstevel@tonic-gate * ms_paddr_t in the meantime to make the code cleaner 1420Sstevel@tonic-gate */ 1430Sstevel@tonic-gate typedef uint64_t ms_paddr_t; 1440Sstevel@tonic-gate 1450Sstevel@tonic-gate /* 1460Sstevel@tonic-gate * Global Routines: 1470Sstevel@tonic-gate */ 1480Sstevel@tonic-gate int memscrub_add_span(pfn_t pfn, pgcnt_t pages); 1490Sstevel@tonic-gate int memscrub_delete_span(pfn_t pfn, pgcnt_t pages); 1500Sstevel@tonic-gate int memscrub_init(void); 1512895Svb70745 void memscrub_induced_error(void); 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate /* 1540Sstevel@tonic-gate * Global Data: 1550Sstevel@tonic-gate */ 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate /* 1580Sstevel@tonic-gate * scrub if we have at least this many pages 1590Sstevel@tonic-gate */ 1600Sstevel@tonic-gate #define MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE) 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate /* 1630Sstevel@tonic-gate * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 1640Sstevel@tonic-gate */ 1650Sstevel@tonic-gate #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate /* 1680Sstevel@tonic-gate * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 1690Sstevel@tonic-gate */ 1703876Spt157919 #define MEMSCRUB_DFL_SPAN_PAGES ((32 * 1024 * 1024) / PAGESIZE) 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate /* 1730Sstevel@tonic-gate * almost anything is higher priority than scrubbing 1740Sstevel@tonic-gate */ 1750Sstevel@tonic-gate #define MEMSCRUB_DFL_THREAD_PRI MINCLSYSPRI 1760Sstevel@tonic-gate 1770Sstevel@tonic-gate /* 1780Sstevel@tonic-gate * size used when scanning memory 1790Sstevel@tonic-gate */ 1800Sstevel@tonic-gate #define MEMSCRUB_BLOCK_SIZE 256 1810Sstevel@tonic-gate #define MEMSCRUB_BLOCK_SIZE_SHIFT 8 /* log2(MEMSCRUB_BLOCK_SIZE) */ 1820Sstevel@tonic-gate #define MEMSCRUB_BLOCKS_PER_PAGE (PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT) 1830Sstevel@tonic-gate 1840Sstevel@tonic-gate #define MEMSCRUB_BPP4M MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT 1850Sstevel@tonic-gate #define MEMSCRUB_BPP512K MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT 1860Sstevel@tonic-gate #define MEMSCRUB_BPP64K MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT 1870Sstevel@tonic-gate #define MEMSCRUB_BPP MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT 1880Sstevel@tonic-gate 1890Sstevel@tonic-gate /* 1900Sstevel@tonic-gate * This message indicates that we have exceeded the limitations of 1910Sstevel@tonic-gate * the memscrubber. See the comments above regarding what would 1920Sstevel@tonic-gate * cause the sleep time to become zero. In DEBUG mode, this message 1930Sstevel@tonic-gate * is logged on the console and in the messages file. In non-DEBUG 1940Sstevel@tonic-gate * mode, it is only logged in the messages file. 1950Sstevel@tonic-gate */ 1960Sstevel@tonic-gate #ifdef DEBUG 1970Sstevel@tonic-gate #define MEMSCRUB_OVERRIDE_MSG "Memory scrubber sleep time is zero " \ 1980Sstevel@tonic-gate "seconds, consuming entire CPU." 1990Sstevel@tonic-gate #else 2000Sstevel@tonic-gate #define MEMSCRUB_OVERRIDE_MSG "!Memory scrubber sleep time is zero " \ 2010Sstevel@tonic-gate "seconds, consuming entire CPU." 2020Sstevel@tonic-gate #endif /* DEBUG */ 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate /* 2050Sstevel@tonic-gate * we can patch these defaults in /etc/system if necessary 2060Sstevel@tonic-gate */ 2070Sstevel@tonic-gate uint_t disable_memscrub = 0; 2080Sstevel@tonic-gate uint_t pause_memscrub = 0; 2090Sstevel@tonic-gate uint_t read_all_memscrub = 0; 2100Sstevel@tonic-gate uint_t memscrub_verbose = 0; 2110Sstevel@tonic-gate uint_t memscrub_all_idle = 0; 2120Sstevel@tonic-gate uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 2130Sstevel@tonic-gate uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 2140Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 2150Sstevel@tonic-gate uint_t memscrub_delay_start_sec = 5 * 60; 2160Sstevel@tonic-gate uint_t memscrub_override_ticks = 1; 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate /* 2190Sstevel@tonic-gate * Static Routines 2200Sstevel@tonic-gate */ 2210Sstevel@tonic-gate static void memscrubber(void); 2220Sstevel@tonic-gate static void memscrub_cleanup(void); 2230Sstevel@tonic-gate static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *); 2240Sstevel@tonic-gate static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp); 2250Sstevel@tonic-gate static void memscrub_scan(uint_t blks, ms_paddr_t src); 2260Sstevel@tonic-gate 2270Sstevel@tonic-gate /* 2280Sstevel@tonic-gate * Static Data 2290Sstevel@tonic-gate */ 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate static struct memlist *memscrub_memlist; 2320Sstevel@tonic-gate static uint_t memscrub_phys_pages; 2330Sstevel@tonic-gate 2340Sstevel@tonic-gate static kcondvar_t memscrub_cv; 2350Sstevel@tonic-gate static kmutex_t memscrub_lock; 2360Sstevel@tonic-gate /* 2370Sstevel@tonic-gate * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ... 2380Sstevel@tonic-gate */ 2390Sstevel@tonic-gate static void memscrub_init_mem_config(void); 2400Sstevel@tonic-gate static void memscrub_uninit_mem_config(void); 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate /* 2432895Svb70745 * Linked list of memscrub aware spans having retired pages. 2442895Svb70745 * Currently enabled only on sun4u USIII-based platforms. 2452895Svb70745 */ 2462895Svb70745 typedef struct memscrub_page_retire_span { 2472895Svb70745 ms_paddr_t address; 2482895Svb70745 struct memscrub_page_retire_span *next; 2492895Svb70745 } memscrub_page_retire_span_t; 2502895Svb70745 2512895Svb70745 static memscrub_page_retire_span_t *memscrub_page_retire_span_list = NULL; 2522895Svb70745 2532895Svb70745 static void memscrub_page_retire_span_add(ms_paddr_t); 2542895Svb70745 static void memscrub_page_retire_span_delete(ms_paddr_t); 2552895Svb70745 static int memscrub_page_retire_span_search(ms_paddr_t); 2562895Svb70745 static void memscrub_page_retire_span_list_update(void); 2572895Svb70745 2582895Svb70745 /* 2592895Svb70745 * add_to_page_retire_list: Set by cpu_async_log_err() routine 2602895Svb70745 * by calling memscrub_induced_error() when CE/UE occurs on a retired 2612895Svb70745 * page due to memscrub reading. Cleared by memscrub after updating 2622895Svb70745 * global page retire span list. Piggybacking on protection of 2632895Svb70745 * memscrub_lock, which is held during set and clear. 2642895Svb70745 * Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running 2652895Svb70745 * on softint context, which gets fired on a cpu memscrub thread currently 2662895Svb70745 * running. Memscrub thread has affinity set during memscrub_read(), hence 2672895Svb70745 * migration to new cpu not expected. 2682895Svb70745 */ 2692895Svb70745 static int add_to_page_retire_list = 0; 2702895Svb70745 2712895Svb70745 /* 2720Sstevel@tonic-gate * Keep track of some interesting statistics 2730Sstevel@tonic-gate */ 2740Sstevel@tonic-gate static struct memscrub_kstats { 2750Sstevel@tonic-gate kstat_named_t done_early; /* ahead of schedule */ 2760Sstevel@tonic-gate kstat_named_t early_sec; /* by cumulative num secs */ 2770Sstevel@tonic-gate kstat_named_t done_late; /* behind schedule */ 2780Sstevel@tonic-gate kstat_named_t late_sec; /* by cumulative num secs */ 2790Sstevel@tonic-gate kstat_named_t interval_ticks; /* num ticks between intervals */ 2800Sstevel@tonic-gate kstat_named_t force_run; /* forced to run, non-timeout */ 2810Sstevel@tonic-gate kstat_named_t errors_found; /* num errors found by memscrub */ 2820Sstevel@tonic-gate } memscrub_counts = { 2830Sstevel@tonic-gate { "done_early", KSTAT_DATA_UINT32 }, 2840Sstevel@tonic-gate { "early_sec", KSTAT_DATA_UINT32 }, 2850Sstevel@tonic-gate { "done_late", KSTAT_DATA_UINT32 }, 2860Sstevel@tonic-gate { "late_sec", KSTAT_DATA_UINT32 }, 2870Sstevel@tonic-gate { "interval_ticks", KSTAT_DATA_UINT32 }, 2880Sstevel@tonic-gate { "force_run", KSTAT_DATA_UINT32 }, 2890Sstevel@tonic-gate { "errors_found", KSTAT_DATA_UINT32 }, 2900Sstevel@tonic-gate }; 291*11873SVijay.Balakrishna@Sun.COM 292*11873SVijay.Balakrishna@Sun.COM #define MEMSCRUB_STAT_INC(stat) memscrub_counts.stat.value.ui32++ 293*11873SVijay.Balakrishna@Sun.COM #define MEMSCRUB_STAT_SET(stat, val) memscrub_counts.stat.value.ui32 = (val) 294*11873SVijay.Balakrishna@Sun.COM #define MEMSCRUB_STAT_NINC(stat, val) memscrub_counts.stat.value.ui32 += (val) 295*11873SVijay.Balakrishna@Sun.COM 2960Sstevel@tonic-gate static struct kstat *memscrub_ksp = (struct kstat *)NULL; 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate static timeout_id_t memscrub_tid = 0; /* keep track of timeout id */ 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate /* 3010Sstevel@tonic-gate * create memscrub_memlist from phys_install list 3020Sstevel@tonic-gate * initialize locks, set memscrub_phys_pages. 3030Sstevel@tonic-gate */ 3040Sstevel@tonic-gate int 3050Sstevel@tonic-gate memscrub_init(void) 3060Sstevel@tonic-gate { 3070Sstevel@tonic-gate struct memlist *src; 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate /* 3100Sstevel@tonic-gate * only startup the scrubber if we have a minimum 3110Sstevel@tonic-gate * number of pages 3120Sstevel@tonic-gate */ 3130Sstevel@tonic-gate if (physinstalled >= MEMSCRUB_MIN_PAGES) { 3140Sstevel@tonic-gate 3150Sstevel@tonic-gate /* 3160Sstevel@tonic-gate * initialize locks 3170Sstevel@tonic-gate */ 3180Sstevel@tonic-gate mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 3190Sstevel@tonic-gate cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 3200Sstevel@tonic-gate 3210Sstevel@tonic-gate /* 3220Sstevel@tonic-gate * copy phys_install to memscrub_memlist 3230Sstevel@tonic-gate */ 32411474SJonathan.Adams@Sun.COM for (src = phys_install; src; src = src->ml_next) { 3250Sstevel@tonic-gate if (memscrub_add_span( 32611474SJonathan.Adams@Sun.COM (pfn_t)(src->ml_address >> PAGESHIFT), 32711474SJonathan.Adams@Sun.COM (pgcnt_t)(src->ml_size >> PAGESHIFT))) { 3280Sstevel@tonic-gate memscrub_cleanup(); 3290Sstevel@tonic-gate return (-1); 3300Sstevel@tonic-gate } 3310Sstevel@tonic-gate } 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate /* 3340Sstevel@tonic-gate * initialize kstats 3350Sstevel@tonic-gate */ 3360Sstevel@tonic-gate memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat", 33711474SJonathan.Adams@Sun.COM "misc", KSTAT_TYPE_NAMED, 33811474SJonathan.Adams@Sun.COM sizeof (memscrub_counts) / sizeof (kstat_named_t), 33911474SJonathan.Adams@Sun.COM KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate if (memscrub_ksp) { 3420Sstevel@tonic-gate memscrub_ksp->ks_data = (void *)&memscrub_counts; 3430Sstevel@tonic-gate kstat_install(memscrub_ksp); 3440Sstevel@tonic-gate } else { 3450Sstevel@tonic-gate cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n"); 3460Sstevel@tonic-gate } 3470Sstevel@tonic-gate 3480Sstevel@tonic-gate /* 3490Sstevel@tonic-gate * create memscrubber thread 3500Sstevel@tonic-gate */ 3510Sstevel@tonic-gate (void) thread_create(NULL, 0, (void (*)())memscrubber, 3520Sstevel@tonic-gate NULL, 0, &p0, TS_RUN, memscrub_thread_pri); 3530Sstevel@tonic-gate 3540Sstevel@tonic-gate /* 3550Sstevel@tonic-gate * We don't want call backs changing the list 3560Sstevel@tonic-gate * if there is no thread running. We do not 3570Sstevel@tonic-gate * attempt to deal with stopping/starting scrubbing 3580Sstevel@tonic-gate * on memory size changes. 3590Sstevel@tonic-gate */ 3600Sstevel@tonic-gate memscrub_init_mem_config(); 3610Sstevel@tonic-gate } 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate return (0); 3640Sstevel@tonic-gate } 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate static void 3670Sstevel@tonic-gate memscrub_cleanup(void) 3680Sstevel@tonic-gate { 3690Sstevel@tonic-gate memscrub_uninit_mem_config(); 3700Sstevel@tonic-gate while (memscrub_memlist) { 3710Sstevel@tonic-gate (void) memscrub_delete_span( 37211474SJonathan.Adams@Sun.COM (pfn_t)(memscrub_memlist->ml_address >> PAGESHIFT), 37311474SJonathan.Adams@Sun.COM (pgcnt_t)(memscrub_memlist->ml_size >> PAGESHIFT)); 3740Sstevel@tonic-gate } 3750Sstevel@tonic-gate if (memscrub_ksp) 3760Sstevel@tonic-gate kstat_delete(memscrub_ksp); 3770Sstevel@tonic-gate cv_destroy(&memscrub_cv); 3780Sstevel@tonic-gate mutex_destroy(&memscrub_lock); 3790Sstevel@tonic-gate } 3800Sstevel@tonic-gate 3810Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 3820Sstevel@tonic-gate static void 3830Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp) 3840Sstevel@tonic-gate { 3850Sstevel@tonic-gate struct memlist *list; 3860Sstevel@tonic-gate 3870Sstevel@tonic-gate cmn_err(CE_CONT, "%s:\n", title); 3880Sstevel@tonic-gate 38911474SJonathan.Adams@Sun.COM for (list = listp; list; list = list->ml_next) { 3900Sstevel@tonic-gate cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 39111474SJonathan.Adams@Sun.COM list->ml_address, list->ml_size); 3920Sstevel@tonic-gate } 3930Sstevel@tonic-gate } 3940Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate /* ARGSUSED */ 3970Sstevel@tonic-gate static void 3980Sstevel@tonic-gate memscrub_wakeup(void *c) 3990Sstevel@tonic-gate { 4000Sstevel@tonic-gate /* 4010Sstevel@tonic-gate * grab mutex to guarantee that our wakeup call 4020Sstevel@tonic-gate * arrives after we go to sleep -- so we can't sleep forever. 4030Sstevel@tonic-gate */ 4040Sstevel@tonic-gate mutex_enter(&memscrub_lock); 4050Sstevel@tonic-gate cv_signal(&memscrub_cv); 4060Sstevel@tonic-gate mutex_exit(&memscrub_lock); 4070Sstevel@tonic-gate } 4080Sstevel@tonic-gate 4090Sstevel@tonic-gate /* 4100Sstevel@tonic-gate * provide an interface external to the memscrubber 4110Sstevel@tonic-gate * which will force the memscrub thread to run vs. 4120Sstevel@tonic-gate * waiting for the timeout, if one is set 4130Sstevel@tonic-gate */ 4140Sstevel@tonic-gate void 4150Sstevel@tonic-gate memscrub_run(void) 4160Sstevel@tonic-gate { 417*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(force_run); 4180Sstevel@tonic-gate if (memscrub_tid) { 4190Sstevel@tonic-gate (void) untimeout(memscrub_tid); 4200Sstevel@tonic-gate memscrub_wakeup((void *)NULL); 4210Sstevel@tonic-gate } 4220Sstevel@tonic-gate } 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate /* 4250Sstevel@tonic-gate * this calculation doesn't account for the time 4260Sstevel@tonic-gate * that the actual scan consumes -- so we'd fall 4270Sstevel@tonic-gate * slightly behind schedule with this interval. 4280Sstevel@tonic-gate * It's very small. 4290Sstevel@tonic-gate */ 4300Sstevel@tonic-gate 4310Sstevel@tonic-gate static uint_t 4320Sstevel@tonic-gate compute_interval_ticks(void) 4330Sstevel@tonic-gate { 4340Sstevel@tonic-gate /* 4350Sstevel@tonic-gate * We use msp_safe mpp_safe below to insure somebody 4360Sstevel@tonic-gate * doesn't set memscrub_span_pages or memscrub_phys_pages 4370Sstevel@tonic-gate * to 0 on us. 4380Sstevel@tonic-gate */ 4390Sstevel@tonic-gate static uint_t msp_safe, mpp_safe; 4400Sstevel@tonic-gate static uint_t interval_ticks, period_ticks; 4410Sstevel@tonic-gate msp_safe = memscrub_span_pages; 4420Sstevel@tonic-gate mpp_safe = memscrub_phys_pages; 4430Sstevel@tonic-gate 4440Sstevel@tonic-gate period_ticks = memscrub_period_sec * hz; 4450Sstevel@tonic-gate interval_ticks = period_ticks; 4460Sstevel@tonic-gate 4470Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock)); 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate if ((msp_safe != 0) && (mpp_safe != 0)) { 4500Sstevel@tonic-gate if (memscrub_phys_pages <= msp_safe) { 4510Sstevel@tonic-gate interval_ticks = period_ticks; 4520Sstevel@tonic-gate } else { 4530Sstevel@tonic-gate interval_ticks = (period_ticks / 4540Sstevel@tonic-gate (mpp_safe / msp_safe)); 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate } 4570Sstevel@tonic-gate return (interval_ticks); 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate void 4610Sstevel@tonic-gate memscrubber(void) 4620Sstevel@tonic-gate { 4630Sstevel@tonic-gate ms_paddr_t address, addr; 4640Sstevel@tonic-gate time_t deadline; 4650Sstevel@tonic-gate pgcnt_t pages; 4660Sstevel@tonic-gate uint_t reached_end = 1; 4670Sstevel@tonic-gate uint_t paused_message = 0; 4680Sstevel@tonic-gate uint_t interval_ticks = 0; 4690Sstevel@tonic-gate uint_t sleep_warn_printed = 0; 4700Sstevel@tonic-gate callb_cpr_t cprinfo; 4710Sstevel@tonic-gate 4720Sstevel@tonic-gate /* 4730Sstevel@tonic-gate * notify CPR of our existence 4740Sstevel@tonic-gate */ 4750Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub"); 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate mutex_enter(&memscrub_lock); 4780Sstevel@tonic-gate 4790Sstevel@tonic-gate if (memscrub_memlist == NULL) { 4800Sstevel@tonic-gate cmn_err(CE_WARN, "memscrub_memlist not initialized."); 4810Sstevel@tonic-gate goto memscrub_exit; 4820Sstevel@tonic-gate } 4830Sstevel@tonic-gate 48411474SJonathan.Adams@Sun.COM address = memscrub_memlist->ml_address; 4850Sstevel@tonic-gate 4860Sstevel@tonic-gate deadline = gethrestime_sec() + memscrub_delay_start_sec; 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate for (;;) { 4890Sstevel@tonic-gate if (disable_memscrub) 4900Sstevel@tonic-gate break; 4910Sstevel@tonic-gate 4920Sstevel@tonic-gate /* 4930Sstevel@tonic-gate * compute interval_ticks 4940Sstevel@tonic-gate */ 4950Sstevel@tonic-gate interval_ticks = compute_interval_ticks(); 4960Sstevel@tonic-gate 4970Sstevel@tonic-gate /* 4980Sstevel@tonic-gate * If the calculated sleep time is zero, and pause_memscrub 4990Sstevel@tonic-gate * has been set, make sure we sleep so that another thread 5000Sstevel@tonic-gate * can acquire memscrub_lock. 5010Sstevel@tonic-gate */ 5020Sstevel@tonic-gate if (interval_ticks == 0 && pause_memscrub) { 5030Sstevel@tonic-gate interval_ticks = hz; 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate /* 5070Sstevel@tonic-gate * And as a fail safe, under normal non-paused operation, do 5080Sstevel@tonic-gate * not allow the sleep time to be zero. 5090Sstevel@tonic-gate */ 5100Sstevel@tonic-gate if (interval_ticks == 0) { 5110Sstevel@tonic-gate interval_ticks = memscrub_override_ticks; 5120Sstevel@tonic-gate if (!sleep_warn_printed) { 5130Sstevel@tonic-gate cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG); 5140Sstevel@tonic-gate sleep_warn_printed = 1; 5150Sstevel@tonic-gate } 5160Sstevel@tonic-gate } 5170Sstevel@tonic-gate 518*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_SET(interval_ticks, interval_ticks); 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate /* 5210Sstevel@tonic-gate * Did we just reach the end of memory? If we are at the 5220Sstevel@tonic-gate * end of memory, delay end of memory processing until 5230Sstevel@tonic-gate * pause_memscrub is not set. 5240Sstevel@tonic-gate */ 5250Sstevel@tonic-gate if (reached_end && !pause_memscrub) { 5260Sstevel@tonic-gate time_t now = gethrestime_sec(); 5270Sstevel@tonic-gate 5280Sstevel@tonic-gate if (now >= deadline) { 529*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(done_late); 530*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_NINC(late_sec, now - deadline); 5310Sstevel@tonic-gate /* 5320Sstevel@tonic-gate * past deadline, start right away 5330Sstevel@tonic-gate */ 5340Sstevel@tonic-gate interval_ticks = 0; 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate deadline = now + memscrub_period_sec; 5370Sstevel@tonic-gate } else { 5380Sstevel@tonic-gate /* 5390Sstevel@tonic-gate * we finished ahead of schedule. 5400Sstevel@tonic-gate * wait till previous deadline before re-start. 5410Sstevel@tonic-gate */ 5420Sstevel@tonic-gate interval_ticks = (deadline - now) * hz; 543*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(done_early); 544*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_NINC(early_sec, deadline - now); 5450Sstevel@tonic-gate deadline += memscrub_period_sec; 5460Sstevel@tonic-gate } 5470Sstevel@tonic-gate reached_end = 0; 5480Sstevel@tonic-gate sleep_warn_printed = 0; 5490Sstevel@tonic-gate } 5500Sstevel@tonic-gate 5510Sstevel@tonic-gate if (interval_ticks != 0) { 5520Sstevel@tonic-gate /* 5530Sstevel@tonic-gate * it is safe from our standpoint for CPR to 5540Sstevel@tonic-gate * suspend the system 5550Sstevel@tonic-gate */ 5560Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 5570Sstevel@tonic-gate 5580Sstevel@tonic-gate /* 5590Sstevel@tonic-gate * hit the snooze bar 5600Sstevel@tonic-gate */ 5610Sstevel@tonic-gate memscrub_tid = timeout(memscrub_wakeup, NULL, 5620Sstevel@tonic-gate interval_ticks); 5630Sstevel@tonic-gate 5640Sstevel@tonic-gate /* 5650Sstevel@tonic-gate * go to sleep 5660Sstevel@tonic-gate */ 5670Sstevel@tonic-gate cv_wait(&memscrub_cv, &memscrub_lock); 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate /* 5700Sstevel@tonic-gate * at this point, no timeout should be set 5710Sstevel@tonic-gate */ 5720Sstevel@tonic-gate memscrub_tid = 0; 5730Sstevel@tonic-gate 5740Sstevel@tonic-gate /* 5750Sstevel@tonic-gate * we need to goto work and will be modifying 5760Sstevel@tonic-gate * our internal state and mapping/unmapping 5770Sstevel@tonic-gate * TTEs 5780Sstevel@tonic-gate */ 5790Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock); 5800Sstevel@tonic-gate } 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate 5830Sstevel@tonic-gate if (memscrub_phys_pages == 0) { 5840Sstevel@tonic-gate cmn_err(CE_WARN, "Memory scrubber has 0 pages to read"); 5850Sstevel@tonic-gate goto memscrub_exit; 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate if (!pause_memscrub) { 5890Sstevel@tonic-gate if (paused_message) { 5900Sstevel@tonic-gate paused_message = 0; 5910Sstevel@tonic-gate if (memscrub_verbose) 5920Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber " 5930Sstevel@tonic-gate "resuming"); 5940Sstevel@tonic-gate } 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate if (read_all_memscrub) { 5970Sstevel@tonic-gate if (memscrub_verbose) 5980Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber " 5990Sstevel@tonic-gate "reading all memory per request"); 6000Sstevel@tonic-gate 60111474SJonathan.Adams@Sun.COM addr = memscrub_memlist->ml_address; 6020Sstevel@tonic-gate reached_end = 0; 6030Sstevel@tonic-gate while (!reached_end) { 6040Sstevel@tonic-gate if (disable_memscrub) 6050Sstevel@tonic-gate break; 6060Sstevel@tonic-gate pages = memscrub_phys_pages; 6070Sstevel@tonic-gate reached_end = memscrub_verify_span( 6080Sstevel@tonic-gate &addr, &pages); 6090Sstevel@tonic-gate memscrub_scan(pages * 6100Sstevel@tonic-gate MEMSCRUB_BLOCKS_PER_PAGE, addr); 6110Sstevel@tonic-gate addr += ((uint64_t)pages * PAGESIZE); 6120Sstevel@tonic-gate } 6130Sstevel@tonic-gate read_all_memscrub = 0; 6140Sstevel@tonic-gate } 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate /* 6170Sstevel@tonic-gate * read 1 span 6180Sstevel@tonic-gate */ 6190Sstevel@tonic-gate pages = memscrub_span_pages; 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate if (disable_memscrub) 6220Sstevel@tonic-gate break; 6230Sstevel@tonic-gate 6240Sstevel@tonic-gate /* 6250Sstevel@tonic-gate * determine physical address range 6260Sstevel@tonic-gate */ 6270Sstevel@tonic-gate reached_end = memscrub_verify_span(&address, 6280Sstevel@tonic-gate &pages); 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE, 6310Sstevel@tonic-gate address); 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate address += ((uint64_t)pages * PAGESIZE); 6340Sstevel@tonic-gate } 6350Sstevel@tonic-gate 6360Sstevel@tonic-gate if (pause_memscrub && !paused_message) { 6370Sstevel@tonic-gate paused_message = 1; 6380Sstevel@tonic-gate if (memscrub_verbose) 6390Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber paused"); 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate } 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate memscrub_exit: 6440Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber exiting"); 6450Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 6460Sstevel@tonic-gate memscrub_cleanup(); 6470Sstevel@tonic-gate thread_exit(); 6480Sstevel@tonic-gate /* NOTREACHED */ 6490Sstevel@tonic-gate } 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate /* 6520Sstevel@tonic-gate * condition address and size 6530Sstevel@tonic-gate * such that they span legal physical addresses. 6540Sstevel@tonic-gate * 6550Sstevel@tonic-gate * when appropriate, address will be rounded up to start of next 6560Sstevel@tonic-gate * struct memlist, and pages will be rounded down to the end of the 6570Sstevel@tonic-gate * memlist size. 6580Sstevel@tonic-gate * 6590Sstevel@tonic-gate * returns 1 if reached end of list, else returns 0. 6600Sstevel@tonic-gate */ 6610Sstevel@tonic-gate static int 6620Sstevel@tonic-gate memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp) 6630Sstevel@tonic-gate { 6640Sstevel@tonic-gate struct memlist *mlp; 6650Sstevel@tonic-gate ms_paddr_t address = *addrp; 6660Sstevel@tonic-gate uint64_t bytes = (uint64_t)*pagesp * PAGESIZE; 6670Sstevel@tonic-gate uint64_t bytes_remaining; 6680Sstevel@tonic-gate int reached_end = 0; 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock)); 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate /* 6730Sstevel@tonic-gate * find memlist struct that contains addrp 6740Sstevel@tonic-gate * assumes memlist is sorted by ascending address. 6750Sstevel@tonic-gate */ 67611474SJonathan.Adams@Sun.COM for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->ml_next) { 6770Sstevel@tonic-gate /* 6780Sstevel@tonic-gate * if before this chunk, round up to beginning 6790Sstevel@tonic-gate */ 68011474SJonathan.Adams@Sun.COM if (address < mlp->ml_address) { 68111474SJonathan.Adams@Sun.COM address = mlp->ml_address; 6820Sstevel@tonic-gate break; 6830Sstevel@tonic-gate } 6840Sstevel@tonic-gate /* 6850Sstevel@tonic-gate * if before end of chunk, then we found it 6860Sstevel@tonic-gate */ 68711474SJonathan.Adams@Sun.COM if (address < (mlp->ml_address + mlp->ml_size)) 6880Sstevel@tonic-gate break; 6890Sstevel@tonic-gate 6900Sstevel@tonic-gate /* else go to next struct memlist */ 6910Sstevel@tonic-gate } 6920Sstevel@tonic-gate /* 6930Sstevel@tonic-gate * if we hit end of list, start at beginning 6940Sstevel@tonic-gate */ 6950Sstevel@tonic-gate if (mlp == NULL) { 6960Sstevel@tonic-gate mlp = memscrub_memlist; 69711474SJonathan.Adams@Sun.COM address = mlp->ml_address; 6980Sstevel@tonic-gate } 6990Sstevel@tonic-gate 7000Sstevel@tonic-gate /* 7010Sstevel@tonic-gate * now we have legal address, and its mlp, condition bytes 7020Sstevel@tonic-gate */ 70311474SJonathan.Adams@Sun.COM bytes_remaining = (mlp->ml_address + mlp->ml_size) - address; 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate if (bytes > bytes_remaining) 7060Sstevel@tonic-gate bytes = bytes_remaining; 7070Sstevel@tonic-gate 7080Sstevel@tonic-gate /* 7090Sstevel@tonic-gate * will this span take us to end of list? 7100Sstevel@tonic-gate */ 71111474SJonathan.Adams@Sun.COM if ((mlp->ml_next == NULL) && 71211474SJonathan.Adams@Sun.COM ((mlp->ml_address + mlp->ml_size) == (address + bytes))) 7130Sstevel@tonic-gate reached_end = 1; 7140Sstevel@tonic-gate 7150Sstevel@tonic-gate /* return values */ 7160Sstevel@tonic-gate *addrp = address; 7170Sstevel@tonic-gate *pagesp = bytes / PAGESIZE; 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate return (reached_end); 7200Sstevel@tonic-gate } 7210Sstevel@tonic-gate 7220Sstevel@tonic-gate /* 7230Sstevel@tonic-gate * add a span to the memscrub list 7240Sstevel@tonic-gate * add to memscrub_phys_pages 7250Sstevel@tonic-gate */ 7260Sstevel@tonic-gate int 7270Sstevel@tonic-gate memscrub_add_span(pfn_t pfn, pgcnt_t pages) 7280Sstevel@tonic-gate { 7290Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 7300Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 7310Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT; 7320Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate int retval; 7350Sstevel@tonic-gate 7360Sstevel@tonic-gate mutex_enter(&memscrub_lock); 7370Sstevel@tonic-gate 7380Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 7390Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 7400Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 7410Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 7420Sstevel@tonic-gate " size: 0x%llx\n", address, bytes); 7430Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 7440Sstevel@tonic-gate 7450Sstevel@tonic-gate retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist, 7460Sstevel@tonic-gate &memscrub_phys_pages); 7470Sstevel@tonic-gate 7480Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 7490Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 7500Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 7510Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate mutex_exit(&memscrub_lock); 7540Sstevel@tonic-gate 7550Sstevel@tonic-gate return (retval); 7560Sstevel@tonic-gate } 7570Sstevel@tonic-gate 7580Sstevel@tonic-gate static int 7590Sstevel@tonic-gate memscrub_add_span_gen( 7600Sstevel@tonic-gate pfn_t pfn, 7610Sstevel@tonic-gate pgcnt_t pages, 7620Sstevel@tonic-gate struct memlist **list, 7630Sstevel@tonic-gate uint_t *npgs) 7640Sstevel@tonic-gate { 7650Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 7660Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT; 7670Sstevel@tonic-gate struct memlist *dst; 7680Sstevel@tonic-gate struct memlist *prev, *next; 7690Sstevel@tonic-gate int retval = 0; 7700Sstevel@tonic-gate 7710Sstevel@tonic-gate /* 7720Sstevel@tonic-gate * allocate a new struct memlist 7730Sstevel@tonic-gate */ 7740Sstevel@tonic-gate 7750Sstevel@tonic-gate dst = (struct memlist *) 7760Sstevel@tonic-gate kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 7770Sstevel@tonic-gate 7780Sstevel@tonic-gate if (dst == NULL) { 7790Sstevel@tonic-gate retval = -1; 7800Sstevel@tonic-gate goto add_done; 7810Sstevel@tonic-gate } 7820Sstevel@tonic-gate 78311474SJonathan.Adams@Sun.COM dst->ml_address = address; 78411474SJonathan.Adams@Sun.COM dst->ml_size = bytes; 7850Sstevel@tonic-gate 7860Sstevel@tonic-gate /* 7870Sstevel@tonic-gate * first insert 7880Sstevel@tonic-gate */ 7890Sstevel@tonic-gate if (*list == NULL) { 79011474SJonathan.Adams@Sun.COM dst->ml_prev = NULL; 79111474SJonathan.Adams@Sun.COM dst->ml_next = NULL; 7920Sstevel@tonic-gate *list = dst; 7930Sstevel@tonic-gate 7940Sstevel@tonic-gate goto add_done; 7950Sstevel@tonic-gate } 7960Sstevel@tonic-gate 7970Sstevel@tonic-gate /* 7980Sstevel@tonic-gate * insert into sorted list 7990Sstevel@tonic-gate */ 8000Sstevel@tonic-gate for (prev = NULL, next = *list; 8010Sstevel@tonic-gate next != NULL; 80211474SJonathan.Adams@Sun.COM prev = next, next = next->ml_next) { 80311474SJonathan.Adams@Sun.COM if (address > (next->ml_address + next->ml_size)) 8040Sstevel@tonic-gate continue; 8050Sstevel@tonic-gate 8060Sstevel@tonic-gate /* 8070Sstevel@tonic-gate * else insert here 8080Sstevel@tonic-gate */ 8090Sstevel@tonic-gate 8100Sstevel@tonic-gate /* 8110Sstevel@tonic-gate * prepend to next 8120Sstevel@tonic-gate */ 81311474SJonathan.Adams@Sun.COM if ((address + bytes) == next->ml_address) { 8140Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist)); 8150Sstevel@tonic-gate 81611474SJonathan.Adams@Sun.COM next->ml_address = address; 81711474SJonathan.Adams@Sun.COM next->ml_size += bytes; 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate goto add_done; 8200Sstevel@tonic-gate } 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate /* 8230Sstevel@tonic-gate * append to next 8240Sstevel@tonic-gate */ 82511474SJonathan.Adams@Sun.COM if (address == (next->ml_address + next->ml_size)) { 8260Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist)); 8270Sstevel@tonic-gate 82811474SJonathan.Adams@Sun.COM if (next->ml_next) { 8290Sstevel@tonic-gate /* 83011474SJonathan.Adams@Sun.COM * don't overlap with next->ml_next 8310Sstevel@tonic-gate */ 83211474SJonathan.Adams@Sun.COM if ((address + bytes) > 83311474SJonathan.Adams@Sun.COM next->ml_next->ml_address) { 8340Sstevel@tonic-gate retval = -1; 8350Sstevel@tonic-gate goto add_done; 8360Sstevel@tonic-gate } 8370Sstevel@tonic-gate /* 83811474SJonathan.Adams@Sun.COM * concatenate next and next->ml_next 8390Sstevel@tonic-gate */ 84011474SJonathan.Adams@Sun.COM if ((address + bytes) == 84111474SJonathan.Adams@Sun.COM next->ml_next->ml_address) { 84211474SJonathan.Adams@Sun.COM struct memlist *mlp = next->ml_next; 8430Sstevel@tonic-gate 8440Sstevel@tonic-gate if (next == *list) 84511474SJonathan.Adams@Sun.COM *list = next->ml_next; 8460Sstevel@tonic-gate 84711474SJonathan.Adams@Sun.COM mlp->ml_address = next->ml_address; 84811474SJonathan.Adams@Sun.COM mlp->ml_size += next->ml_size; 84911474SJonathan.Adams@Sun.COM mlp->ml_size += bytes; 8500Sstevel@tonic-gate 85111474SJonathan.Adams@Sun.COM if (next->ml_prev) 85211474SJonathan.Adams@Sun.COM next->ml_prev->ml_next = mlp; 85311474SJonathan.Adams@Sun.COM mlp->ml_prev = next->ml_prev; 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate kmem_free(next, 85611474SJonathan.Adams@Sun.COM sizeof (struct memlist)); 8570Sstevel@tonic-gate goto add_done; 8580Sstevel@tonic-gate } 8590Sstevel@tonic-gate } 8600Sstevel@tonic-gate 86111474SJonathan.Adams@Sun.COM next->ml_size += bytes; 8620Sstevel@tonic-gate 8630Sstevel@tonic-gate goto add_done; 8640Sstevel@tonic-gate } 8650Sstevel@tonic-gate 8660Sstevel@tonic-gate /* don't overlap with next */ 86711474SJonathan.Adams@Sun.COM if ((address + bytes) > next->ml_address) { 8680Sstevel@tonic-gate retval = -1; 8690Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist)); 8700Sstevel@tonic-gate goto add_done; 8710Sstevel@tonic-gate } 8720Sstevel@tonic-gate 8730Sstevel@tonic-gate /* 8740Sstevel@tonic-gate * insert before next 8750Sstevel@tonic-gate */ 87611474SJonathan.Adams@Sun.COM dst->ml_prev = prev; 87711474SJonathan.Adams@Sun.COM dst->ml_next = next; 87811474SJonathan.Adams@Sun.COM next->ml_prev = dst; 8790Sstevel@tonic-gate if (prev == NULL) { 8800Sstevel@tonic-gate *list = dst; 8810Sstevel@tonic-gate } else { 88211474SJonathan.Adams@Sun.COM prev->ml_next = dst; 8830Sstevel@tonic-gate } 8840Sstevel@tonic-gate goto add_done; 8850Sstevel@tonic-gate } /* end for */ 8860Sstevel@tonic-gate 8870Sstevel@tonic-gate /* 8880Sstevel@tonic-gate * end of list, prev is valid and next is NULL 8890Sstevel@tonic-gate */ 89011474SJonathan.Adams@Sun.COM prev->ml_next = dst; 89111474SJonathan.Adams@Sun.COM dst->ml_prev = prev; 89211474SJonathan.Adams@Sun.COM dst->ml_next = NULL; 8930Sstevel@tonic-gate 8940Sstevel@tonic-gate add_done: 8950Sstevel@tonic-gate 8960Sstevel@tonic-gate if (retval != -1) 8970Sstevel@tonic-gate *npgs += pages; 8980Sstevel@tonic-gate 8990Sstevel@tonic-gate return (retval); 9000Sstevel@tonic-gate } 9010Sstevel@tonic-gate 9020Sstevel@tonic-gate /* 9030Sstevel@tonic-gate * delete a span from the memscrub list 9040Sstevel@tonic-gate * subtract from memscrub_phys_pages 9050Sstevel@tonic-gate */ 9060Sstevel@tonic-gate int 9070Sstevel@tonic-gate memscrub_delete_span(pfn_t pfn, pgcnt_t pages) 9080Sstevel@tonic-gate { 9090Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 9100Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT; 9110Sstevel@tonic-gate struct memlist *dst, *next; 9120Sstevel@tonic-gate int retval = 0; 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate mutex_enter(&memscrub_lock); 9150Sstevel@tonic-gate 9160Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 9170Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist); 9180Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 9190Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n", 9200Sstevel@tonic-gate address, bytes); 9210Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 9220Sstevel@tonic-gate 9230Sstevel@tonic-gate /* 9240Sstevel@tonic-gate * find struct memlist containing page 9250Sstevel@tonic-gate */ 92611474SJonathan.Adams@Sun.COM for (next = memscrub_memlist; next != NULL; next = next->ml_next) { 92711474SJonathan.Adams@Sun.COM if ((address >= next->ml_address) && 92811474SJonathan.Adams@Sun.COM (address < next->ml_address + next->ml_size)) 9290Sstevel@tonic-gate break; 9300Sstevel@tonic-gate } 9310Sstevel@tonic-gate 9320Sstevel@tonic-gate /* 9330Sstevel@tonic-gate * if start address not in list 9340Sstevel@tonic-gate */ 9350Sstevel@tonic-gate if (next == NULL) { 9360Sstevel@tonic-gate retval = -1; 9370Sstevel@tonic-gate goto delete_done; 9380Sstevel@tonic-gate } 9390Sstevel@tonic-gate 9400Sstevel@tonic-gate /* 9410Sstevel@tonic-gate * error if size goes off end of this struct memlist 9420Sstevel@tonic-gate */ 94311474SJonathan.Adams@Sun.COM if (address + bytes > next->ml_address + next->ml_size) { 9440Sstevel@tonic-gate retval = -1; 9450Sstevel@tonic-gate goto delete_done; 9460Sstevel@tonic-gate } 9470Sstevel@tonic-gate 9480Sstevel@tonic-gate /* 9490Sstevel@tonic-gate * pages at beginning of struct memlist 9500Sstevel@tonic-gate */ 95111474SJonathan.Adams@Sun.COM if (address == next->ml_address) { 9520Sstevel@tonic-gate /* 9530Sstevel@tonic-gate * if start & size match, delete from list 9540Sstevel@tonic-gate */ 95511474SJonathan.Adams@Sun.COM if (bytes == next->ml_size) { 9560Sstevel@tonic-gate if (next == memscrub_memlist) 95711474SJonathan.Adams@Sun.COM memscrub_memlist = next->ml_next; 95811474SJonathan.Adams@Sun.COM if (next->ml_prev != NULL) 95911474SJonathan.Adams@Sun.COM next->ml_prev->ml_next = next->ml_next; 96011474SJonathan.Adams@Sun.COM if (next->ml_next != NULL) 96111474SJonathan.Adams@Sun.COM next->ml_next->ml_prev = next->ml_prev; 9620Sstevel@tonic-gate 9630Sstevel@tonic-gate kmem_free(next, sizeof (struct memlist)); 9640Sstevel@tonic-gate } else { 9650Sstevel@tonic-gate /* 9660Sstevel@tonic-gate * increment start address by bytes 9670Sstevel@tonic-gate */ 96811474SJonathan.Adams@Sun.COM next->ml_address += bytes; 96911474SJonathan.Adams@Sun.COM next->ml_size -= bytes; 9700Sstevel@tonic-gate } 9710Sstevel@tonic-gate goto delete_done; 9720Sstevel@tonic-gate } 9730Sstevel@tonic-gate 9740Sstevel@tonic-gate /* 9750Sstevel@tonic-gate * pages at end of struct memlist 9760Sstevel@tonic-gate */ 97711474SJonathan.Adams@Sun.COM if (address + bytes == next->ml_address + next->ml_size) { 9780Sstevel@tonic-gate /* 9790Sstevel@tonic-gate * decrement size by bytes 9800Sstevel@tonic-gate */ 98111474SJonathan.Adams@Sun.COM next->ml_size -= bytes; 9820Sstevel@tonic-gate goto delete_done; 9830Sstevel@tonic-gate } 9840Sstevel@tonic-gate 9850Sstevel@tonic-gate /* 9860Sstevel@tonic-gate * delete a span in the middle of the struct memlist 9870Sstevel@tonic-gate */ 9880Sstevel@tonic-gate { 9890Sstevel@tonic-gate /* 9900Sstevel@tonic-gate * create a new struct memlist 9910Sstevel@tonic-gate */ 9920Sstevel@tonic-gate dst = (struct memlist *) 9930Sstevel@tonic-gate kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 9940Sstevel@tonic-gate 9950Sstevel@tonic-gate if (dst == NULL) { 9960Sstevel@tonic-gate retval = -1; 9970Sstevel@tonic-gate goto delete_done; 9980Sstevel@tonic-gate } 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate /* 10010Sstevel@tonic-gate * existing struct memlist gets address 10020Sstevel@tonic-gate * and size up to pfn 10030Sstevel@tonic-gate */ 100411474SJonathan.Adams@Sun.COM dst->ml_address = address + bytes; 100511474SJonathan.Adams@Sun.COM dst->ml_size = 100611474SJonathan.Adams@Sun.COM (next->ml_address + next->ml_size) - dst->ml_address; 100711474SJonathan.Adams@Sun.COM next->ml_size = address - next->ml_address; 10080Sstevel@tonic-gate 10090Sstevel@tonic-gate /* 10100Sstevel@tonic-gate * new struct memlist gets address starting 10110Sstevel@tonic-gate * after pfn, until end 10120Sstevel@tonic-gate */ 10130Sstevel@tonic-gate 10140Sstevel@tonic-gate /* 10150Sstevel@tonic-gate * link in new memlist after old 10160Sstevel@tonic-gate */ 101711474SJonathan.Adams@Sun.COM dst->ml_next = next->ml_next; 101811474SJonathan.Adams@Sun.COM dst->ml_prev = next; 10190Sstevel@tonic-gate 102011474SJonathan.Adams@Sun.COM if (next->ml_next != NULL) 102111474SJonathan.Adams@Sun.COM next->ml_next->ml_prev = dst; 102211474SJonathan.Adams@Sun.COM next->ml_next = dst; 10230Sstevel@tonic-gate } 10240Sstevel@tonic-gate 10250Sstevel@tonic-gate delete_done: 10260Sstevel@tonic-gate if (retval != -1) { 10270Sstevel@tonic-gate memscrub_phys_pages -= pages; 10280Sstevel@tonic-gate if (memscrub_phys_pages == 0) 10290Sstevel@tonic-gate disable_memscrub = 1; 10300Sstevel@tonic-gate } 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 10330Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist After", memscrub_memlist); 10340Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 10350Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 10360Sstevel@tonic-gate 10370Sstevel@tonic-gate mutex_exit(&memscrub_lock); 10380Sstevel@tonic-gate return (retval); 10390Sstevel@tonic-gate } 10400Sstevel@tonic-gate 10410Sstevel@tonic-gate static void 10420Sstevel@tonic-gate memscrub_scan(uint_t blks, ms_paddr_t src) 10430Sstevel@tonic-gate { 10440Sstevel@tonic-gate uint_t psz, bpp, pgsread; 10450Sstevel@tonic-gate pfn_t pfn; 10460Sstevel@tonic-gate ms_paddr_t pa; 10470Sstevel@tonic-gate caddr_t va; 10480Sstevel@tonic-gate on_trap_data_t otd; 10492895Svb70745 int scan_mmu_pagesize = 0; 10502895Svb70745 int retired_pages = 0; 10510Sstevel@tonic-gate 10520Sstevel@tonic-gate extern void memscrub_read(caddr_t src, uint_t blks); 10530Sstevel@tonic-gate 10540Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock)); 10550Sstevel@tonic-gate 10560Sstevel@tonic-gate pgsread = 0; 10570Sstevel@tonic-gate pa = src; 10580Sstevel@tonic-gate 10592895Svb70745 if (memscrub_page_retire_span_list != NULL) { 10602895Svb70745 if (memscrub_page_retire_span_search(src)) { 10612895Svb70745 /* retired pages in current span */ 10622895Svb70745 scan_mmu_pagesize = 1; 10632895Svb70745 } 10642895Svb70745 } 10652895Svb70745 10662895Svb70745 #ifdef MEMSCRUB_DEBUG 10672895Svb70745 cmn_err(CE_NOTE, "scan_mmu_pagesize = %d\n" scan_mmu_pagesize); 10682895Svb70745 #endif /* MEMSCRUB_DEBUG */ 10692895Svb70745 10700Sstevel@tonic-gate while (blks != 0) { 10710Sstevel@tonic-gate /* Ensure the PA is properly aligned */ 10720Sstevel@tonic-gate if (((pa & MMU_PAGEMASK4M) == pa) && 107311474SJonathan.Adams@Sun.COM (blks >= MEMSCRUB_BPP4M)) { 10740Sstevel@tonic-gate psz = MMU_PAGESIZE4M; 10750Sstevel@tonic-gate bpp = MEMSCRUB_BPP4M; 10760Sstevel@tonic-gate } else if (((pa & MMU_PAGEMASK512K) == pa) && 107711474SJonathan.Adams@Sun.COM (blks >= MEMSCRUB_BPP512K)) { 10780Sstevel@tonic-gate psz = MMU_PAGESIZE512K; 10790Sstevel@tonic-gate bpp = MEMSCRUB_BPP512K; 10800Sstevel@tonic-gate } else if (((pa & MMU_PAGEMASK64K) == pa) && 108111474SJonathan.Adams@Sun.COM (blks >= MEMSCRUB_BPP64K)) { 10820Sstevel@tonic-gate psz = MMU_PAGESIZE64K; 10830Sstevel@tonic-gate bpp = MEMSCRUB_BPP64K; 10840Sstevel@tonic-gate } else if ((pa & MMU_PAGEMASK) == pa) { 10850Sstevel@tonic-gate psz = MMU_PAGESIZE; 10860Sstevel@tonic-gate bpp = MEMSCRUB_BPP; 10870Sstevel@tonic-gate } else { 10880Sstevel@tonic-gate if (memscrub_verbose) { 10890Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber ignoring " 10900Sstevel@tonic-gate "non-page aligned block starting at 0x%" 10910Sstevel@tonic-gate PRIx64, src); 10920Sstevel@tonic-gate } 10930Sstevel@tonic-gate return; 10940Sstevel@tonic-gate } 10950Sstevel@tonic-gate if (blks < bpp) bpp = blks; 10960Sstevel@tonic-gate 10970Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 10980Sstevel@tonic-gate cmn_err(CE_NOTE, "Going to run psz=%x, " 10990Sstevel@tonic-gate "bpp=%x pa=%llx\n", psz, bpp, pa); 11000Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 11010Sstevel@tonic-gate 11020Sstevel@tonic-gate /* 11030Sstevel@tonic-gate * MEMSCRUBBASE is a 4MB aligned page in the 11040Sstevel@tonic-gate * kernel so that we can quickly map the PA 11050Sstevel@tonic-gate * to a VA for the block loads performed in 11060Sstevel@tonic-gate * memscrub_read. 11070Sstevel@tonic-gate */ 11080Sstevel@tonic-gate pfn = mmu_btop(pa); 11090Sstevel@tonic-gate va = (caddr_t)MEMSCRUBBASE; 11100Sstevel@tonic-gate hat_devload(kas.a_hat, va, psz, pfn, PROT_READ, 111111474SJonathan.Adams@Sun.COM HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 11120Sstevel@tonic-gate 11130Sstevel@tonic-gate /* 11140Sstevel@tonic-gate * Can't allow the memscrubber to migrate across CPUs as 11150Sstevel@tonic-gate * we need to know whether CEEN is enabled for the current 11160Sstevel@tonic-gate * CPU to enable us to scrub the memory. Don't use 11170Sstevel@tonic-gate * kpreempt_disable as the time we take to scan a span (even 11180Sstevel@tonic-gate * without cpu_check_ce having to manually cpu_check_block) 11190Sstevel@tonic-gate * is too long to hold a higher priority thread (eg, RT) 11200Sstevel@tonic-gate * off cpu. 11210Sstevel@tonic-gate */ 11220Sstevel@tonic-gate thread_affinity_set(curthread, CPU_CURRENT); 11230Sstevel@tonic-gate 11240Sstevel@tonic-gate /* 11250Sstevel@tonic-gate * Protect read scrub from async faults. For now, we simply 11260Sstevel@tonic-gate * maintain a count of such faults caught. 11270Sstevel@tonic-gate */ 11280Sstevel@tonic-gate 11292895Svb70745 if (!scan_mmu_pagesize && !on_trap(&otd, OT_DATA_EC)) { 11300Sstevel@tonic-gate memscrub_read(va, bpp); 11310Sstevel@tonic-gate /* 11320Sstevel@tonic-gate * Check if CEs require logging 11330Sstevel@tonic-gate */ 11340Sstevel@tonic-gate cpu_check_ce(SCRUBBER_CEEN_CHECK, 11350Sstevel@tonic-gate (uint64_t)pa, va, psz); 1136102Srjnoe no_trap(); 11370Sstevel@tonic-gate thread_affinity_clear(curthread); 11380Sstevel@tonic-gate } else { 11390Sstevel@tonic-gate no_trap(); 11400Sstevel@tonic-gate thread_affinity_clear(curthread); 11410Sstevel@tonic-gate 11420Sstevel@tonic-gate /* 11430Sstevel@tonic-gate * Got an async error.. 11440Sstevel@tonic-gate * Try rescanning it at MMU_PAGESIZE 11450Sstevel@tonic-gate * granularity if we were trying to 11460Sstevel@tonic-gate * read at a larger page size. 11470Sstevel@tonic-gate * This is to ensure we continue to 11480Sstevel@tonic-gate * scan the rest of the span. 11492895Svb70745 * OR scanning MMU_PAGESIZE granularity to avoid 11502895Svb70745 * reading retired pages memory when scan_mmu_pagesize 11512895Svb70745 * is set. 11520Sstevel@tonic-gate */ 11532895Svb70745 if (psz > MMU_PAGESIZE || scan_mmu_pagesize) { 1154*11873SVijay.Balakrishna@Sun.COM caddr_t vaddr = va; 1155*11873SVijay.Balakrishna@Sun.COM ms_paddr_t paddr = pa; 1156*11873SVijay.Balakrishna@Sun.COM int tmp = 0; 1157*11873SVijay.Balakrishna@Sun.COM for (; tmp < bpp; tmp += MEMSCRUB_BPP) { 1158*11873SVijay.Balakrishna@Sun.COM /* Don't scrub retired pages */ 1159*11873SVijay.Balakrishna@Sun.COM if (page_retire_check(paddr, NULL) 1160*11873SVijay.Balakrishna@Sun.COM == 0) { 1161*11873SVijay.Balakrishna@Sun.COM vaddr += MMU_PAGESIZE; 1162*11873SVijay.Balakrishna@Sun.COM paddr += MMU_PAGESIZE; 1163*11873SVijay.Balakrishna@Sun.COM retired_pages++; 1164*11873SVijay.Balakrishna@Sun.COM continue; 1165*11873SVijay.Balakrishna@Sun.COM } 1166*11873SVijay.Balakrishna@Sun.COM thread_affinity_set(curthread, 1167*11873SVijay.Balakrishna@Sun.COM CPU_CURRENT); 1168*11873SVijay.Balakrishna@Sun.COM if (!on_trap(&otd, OT_DATA_EC)) { 1169*11873SVijay.Balakrishna@Sun.COM memscrub_read(vaddr, 1170*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_BPP); 1171*11873SVijay.Balakrishna@Sun.COM cpu_check_ce( 1172*11873SVijay.Balakrishna@Sun.COM SCRUBBER_CEEN_CHECK, 1173*11873SVijay.Balakrishna@Sun.COM (uint64_t)paddr, vaddr, 1174*11873SVijay.Balakrishna@Sun.COM MMU_PAGESIZE); 1175*11873SVijay.Balakrishna@Sun.COM no_trap(); 1176*11873SVijay.Balakrishna@Sun.COM } else { 1177*11873SVijay.Balakrishna@Sun.COM no_trap(); 1178*11873SVijay.Balakrishna@Sun.COM MEMSCRUB_STAT_INC(errors_found); 1179*11873SVijay.Balakrishna@Sun.COM } 1180*11873SVijay.Balakrishna@Sun.COM thread_affinity_clear(curthread); 11812895Svb70745 vaddr += MMU_PAGESIZE; 11822895Svb70745 paddr += MMU_PAGESIZE; 11832895Svb70745 } 11840Sstevel@tonic-gate } 11850Sstevel@tonic-gate } 11860Sstevel@tonic-gate hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK); 11870Sstevel@tonic-gate 11880Sstevel@tonic-gate blks -= bpp; 11890Sstevel@tonic-gate pa += psz; 11900Sstevel@tonic-gate pgsread++; 11910Sstevel@tonic-gate } 11922895Svb70745 11932895Svb70745 /* 11942895Svb70745 * If just finished scrubbing MMU_PAGESIZE at a time, but no retired 11952895Svb70745 * pages found so delete span from global list. 11962895Svb70745 */ 11972895Svb70745 if (scan_mmu_pagesize && retired_pages == 0) 11982895Svb70745 memscrub_page_retire_span_delete(src); 11992895Svb70745 12002895Svb70745 /* 12012895Svb70745 * Encountered CE/UE on a retired page during memscrub read of current 12022895Svb70745 * span. Adding span to global list to enable avoid reading further. 12032895Svb70745 */ 12042895Svb70745 if (add_to_page_retire_list) { 12052895Svb70745 if (!memscrub_page_retire_span_search(src)) 12062895Svb70745 memscrub_page_retire_span_add(src); 12072895Svb70745 add_to_page_retire_list = 0; 12082895Svb70745 } 12092895Svb70745 12100Sstevel@tonic-gate if (memscrub_verbose) { 12110Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting " 12120Sstevel@tonic-gate "at 0x%" PRIx64, pgsread, src); 12130Sstevel@tonic-gate } 12140Sstevel@tonic-gate } 12150Sstevel@tonic-gate 12160Sstevel@tonic-gate /* 12172895Svb70745 * Called by cpu_async_log_err() when memscrub read causes 12182895Svb70745 * CE/UE on a retired page. 12192895Svb70745 */ 12202895Svb70745 void 12212895Svb70745 memscrub_induced_error(void) 12222895Svb70745 { 12232895Svb70745 add_to_page_retire_list = 1; 12242895Svb70745 } 12252895Svb70745 1226*11873SVijay.Balakrishna@Sun.COM /* 1227*11873SVijay.Balakrishna@Sun.COM * Called by page_retire() when toxic pages cannot be retired 1228*11873SVijay.Balakrishna@Sun.COM * immediately and are scheduled for retire. Memscrubber stops 1229*11873SVijay.Balakrishna@Sun.COM * scrubbing them to avoid further CE/UEs. 1230*11873SVijay.Balakrishna@Sun.COM */ 1231*11873SVijay.Balakrishna@Sun.COM void 1232*11873SVijay.Balakrishna@Sun.COM memscrub_notify(ms_paddr_t pa) 1233*11873SVijay.Balakrishna@Sun.COM { 1234*11873SVijay.Balakrishna@Sun.COM mutex_enter(&memscrub_lock); 1235*11873SVijay.Balakrishna@Sun.COM if (!memscrub_page_retire_span_search(pa)) 1236*11873SVijay.Balakrishna@Sun.COM memscrub_page_retire_span_add(pa); 1237*11873SVijay.Balakrishna@Sun.COM mutex_exit(&memscrub_lock); 1238*11873SVijay.Balakrishna@Sun.COM } 12392895Svb70745 12402895Svb70745 /* 1241*11873SVijay.Balakrishna@Sun.COM * Called by memscrub_scan() and memscrub_notify(). 12422895Svb70745 * pa: physical address of span with CE/UE, add to global list. 12432895Svb70745 */ 12442895Svb70745 static void 12452895Svb70745 memscrub_page_retire_span_add(ms_paddr_t pa) 12462895Svb70745 { 12472895Svb70745 memscrub_page_retire_span_t *new_span; 12482895Svb70745 12492895Svb70745 new_span = (memscrub_page_retire_span_t *) 12502895Svb70745 kmem_zalloc(sizeof (memscrub_page_retire_span_t), KM_NOSLEEP); 12512895Svb70745 12522895Svb70745 if (new_span == NULL) { 12532895Svb70745 #ifdef MEMSCRUB_DEBUG 12542895Svb70745 cmn_err(CE_NOTE, "failed to allocate new span - span with" 12552895Svb70745 " retired page/s not tracked.\n"); 12562895Svb70745 #endif /* MEMSCRUB_DEBUG */ 12572895Svb70745 return; 12582895Svb70745 } 12592895Svb70745 12602895Svb70745 new_span->address = pa; 12612895Svb70745 new_span->next = memscrub_page_retire_span_list; 12622895Svb70745 memscrub_page_retire_span_list = new_span; 12632895Svb70745 } 12642895Svb70745 12652895Svb70745 /* 12662895Svb70745 * Called by memscrub_scan(). 12672895Svb70745 * pa: physical address of span to be removed from global list. 12682895Svb70745 */ 12692895Svb70745 static void 12702895Svb70745 memscrub_page_retire_span_delete(ms_paddr_t pa) 12712895Svb70745 { 12722895Svb70745 memscrub_page_retire_span_t *prev_span, *next_span; 12732895Svb70745 12742895Svb70745 prev_span = memscrub_page_retire_span_list; 12752895Svb70745 next_span = memscrub_page_retire_span_list->next; 12762895Svb70745 12772895Svb70745 if (pa == prev_span->address) { 12782895Svb70745 memscrub_page_retire_span_list = next_span; 12792895Svb70745 kmem_free(prev_span, sizeof (memscrub_page_retire_span_t)); 12802895Svb70745 return; 12812895Svb70745 } 12822895Svb70745 12832895Svb70745 while (next_span) { 12842895Svb70745 if (pa == next_span->address) { 12852895Svb70745 prev_span->next = next_span->next; 12862895Svb70745 kmem_free(next_span, 12872895Svb70745 sizeof (memscrub_page_retire_span_t)); 12882895Svb70745 return; 12892895Svb70745 } 12902895Svb70745 prev_span = next_span; 12912895Svb70745 next_span = next_span->next; 12922895Svb70745 } 12932895Svb70745 } 12942895Svb70745 12952895Svb70745 /* 1296*11873SVijay.Balakrishna@Sun.COM * Called by memscrub_scan() and memscrub_notify(). 12972895Svb70745 * pa: physical address of span to be searched in global list. 12982895Svb70745 */ 12992895Svb70745 static int 13002895Svb70745 memscrub_page_retire_span_search(ms_paddr_t pa) 13012895Svb70745 { 13022895Svb70745 memscrub_page_retire_span_t *next_span = memscrub_page_retire_span_list; 13032895Svb70745 13042895Svb70745 while (next_span) { 13052895Svb70745 if (pa == next_span->address) 13062895Svb70745 return (1); 13072895Svb70745 next_span = next_span->next; 13082895Svb70745 } 13092895Svb70745 return (0); 13102895Svb70745 } 13112895Svb70745 13122895Svb70745 /* 13132895Svb70745 * Called from new_memscrub() as a result of memory delete. 13142895Svb70745 * Using page_numtopp_nolock() to determine if we have valid PA. 13152895Svb70745 */ 13162895Svb70745 static void 13172895Svb70745 memscrub_page_retire_span_list_update(void) 13182895Svb70745 { 13192895Svb70745 memscrub_page_retire_span_t *prev, *cur, *next; 13202895Svb70745 13212895Svb70745 if (memscrub_page_retire_span_list == NULL) 13222895Svb70745 return; 13232895Svb70745 13242895Svb70745 prev = cur = memscrub_page_retire_span_list; 13252895Svb70745 next = cur->next; 13262895Svb70745 13272895Svb70745 while (cur) { 13282895Svb70745 if (page_numtopp_nolock(mmu_btop(cur->address)) == NULL) { 13292895Svb70745 if (cur == memscrub_page_retire_span_list) { 13302895Svb70745 memscrub_page_retire_span_list = next; 13312895Svb70745 kmem_free(cur, 13322895Svb70745 sizeof (memscrub_page_retire_span_t)); 13332895Svb70745 prev = cur = memscrub_page_retire_span_list; 13342895Svb70745 } else { 13352895Svb70745 prev->next = cur->next; 13362895Svb70745 kmem_free(cur, 13372895Svb70745 sizeof (memscrub_page_retire_span_t)); 13382895Svb70745 cur = next; 13392895Svb70745 } 13402895Svb70745 } else { 13412895Svb70745 prev = cur; 13422895Svb70745 cur = next; 13432895Svb70745 } 13442895Svb70745 if (cur != NULL) 13452895Svb70745 next = cur->next; 13462895Svb70745 } 13472895Svb70745 } 13482895Svb70745 13492895Svb70745 /* 13500Sstevel@tonic-gate * The memory add/delete callback mechanism does not pass in the 13510Sstevel@tonic-gate * page ranges. The phys_install list has been updated though, so 13520Sstevel@tonic-gate * create a new scrub list from it. 13530Sstevel@tonic-gate */ 13540Sstevel@tonic-gate 13550Sstevel@tonic-gate static int 13562895Svb70745 new_memscrub(int update_page_retire_list) 13570Sstevel@tonic-gate { 13580Sstevel@tonic-gate struct memlist *src, *list, *old_list; 13590Sstevel@tonic-gate uint_t npgs; 13600Sstevel@tonic-gate 13610Sstevel@tonic-gate /* 13620Sstevel@tonic-gate * copy phys_install to memscrub_memlist 13630Sstevel@tonic-gate */ 13640Sstevel@tonic-gate list = NULL; 13650Sstevel@tonic-gate npgs = 0; 13660Sstevel@tonic-gate memlist_read_lock(); 136711474SJonathan.Adams@Sun.COM for (src = phys_install; src; src = src->ml_next) { 136811474SJonathan.Adams@Sun.COM if (memscrub_add_span_gen((pfn_t)(src->ml_address >> PAGESHIFT), 136911474SJonathan.Adams@Sun.COM (pgcnt_t)(src->ml_size >> PAGESHIFT), &list, &npgs)) { 13700Sstevel@tonic-gate memlist_read_unlock(); 13710Sstevel@tonic-gate while (list) { 13720Sstevel@tonic-gate struct memlist *el; 13730Sstevel@tonic-gate 13740Sstevel@tonic-gate el = list; 137511474SJonathan.Adams@Sun.COM list = list->ml_next; 13760Sstevel@tonic-gate kmem_free(el, sizeof (struct memlist)); 13770Sstevel@tonic-gate } 13780Sstevel@tonic-gate return (-1); 13790Sstevel@tonic-gate } 13800Sstevel@tonic-gate } 13810Sstevel@tonic-gate memlist_read_unlock(); 13820Sstevel@tonic-gate 13830Sstevel@tonic-gate mutex_enter(&memscrub_lock); 13840Sstevel@tonic-gate memscrub_phys_pages = npgs; 13850Sstevel@tonic-gate old_list = memscrub_memlist; 13860Sstevel@tonic-gate memscrub_memlist = list; 13872895Svb70745 13882895Svb70745 if (update_page_retire_list) 13892895Svb70745 memscrub_page_retire_span_list_update(); 13902895Svb70745 13910Sstevel@tonic-gate mutex_exit(&memscrub_lock); 13920Sstevel@tonic-gate 13930Sstevel@tonic-gate while (old_list) { 13940Sstevel@tonic-gate struct memlist *el; 13950Sstevel@tonic-gate 13960Sstevel@tonic-gate el = old_list; 139711474SJonathan.Adams@Sun.COM old_list = old_list->ml_next; 13980Sstevel@tonic-gate kmem_free(el, sizeof (struct memlist)); 13990Sstevel@tonic-gate } 14002895Svb70745 14010Sstevel@tonic-gate return (0); 14020Sstevel@tonic-gate } 14030Sstevel@tonic-gate 14040Sstevel@tonic-gate /*ARGSUSED*/ 14050Sstevel@tonic-gate static void 14060Sstevel@tonic-gate memscrub_mem_config_post_add( 14070Sstevel@tonic-gate void *arg, 14080Sstevel@tonic-gate pgcnt_t delta_pages) 14090Sstevel@tonic-gate { 14100Sstevel@tonic-gate /* 14110Sstevel@tonic-gate * We increment pause_memscrub before entering new_memscrub(). This 14120Sstevel@tonic-gate * will force the memscrubber to sleep, allowing the DR callback 14130Sstevel@tonic-gate * thread to acquire memscrub_lock in new_memscrub(). The use of 14140Sstevel@tonic-gate * atomic_add_32() allows concurrent memory DR operations to use the 14150Sstevel@tonic-gate * callbacks safely. 14160Sstevel@tonic-gate */ 14170Sstevel@tonic-gate atomic_add_32(&pause_memscrub, 1); 14180Sstevel@tonic-gate ASSERT(pause_memscrub != 0); 14190Sstevel@tonic-gate 14200Sstevel@tonic-gate /* 14210Sstevel@tonic-gate * "Don't care" if we are not scrubbing new memory. 14220Sstevel@tonic-gate */ 14232895Svb70745 (void) new_memscrub(0); /* retain page retire list */ 14240Sstevel@tonic-gate 14250Sstevel@tonic-gate /* Restore the pause setting. */ 14260Sstevel@tonic-gate atomic_add_32(&pause_memscrub, -1); 14270Sstevel@tonic-gate } 14280Sstevel@tonic-gate 14290Sstevel@tonic-gate /*ARGSUSED*/ 14300Sstevel@tonic-gate static int 14310Sstevel@tonic-gate memscrub_mem_config_pre_del( 14320Sstevel@tonic-gate void *arg, 14330Sstevel@tonic-gate pgcnt_t delta_pages) 14340Sstevel@tonic-gate { 14350Sstevel@tonic-gate /* Nothing to do. */ 14360Sstevel@tonic-gate return (0); 14370Sstevel@tonic-gate } 14380Sstevel@tonic-gate 14390Sstevel@tonic-gate /*ARGSUSED*/ 14400Sstevel@tonic-gate static void 14410Sstevel@tonic-gate memscrub_mem_config_post_del( 14420Sstevel@tonic-gate void *arg, 14430Sstevel@tonic-gate pgcnt_t delta_pages, 14440Sstevel@tonic-gate int cancelled) 14450Sstevel@tonic-gate { 14460Sstevel@tonic-gate /* 14470Sstevel@tonic-gate * We increment pause_memscrub before entering new_memscrub(). This 14480Sstevel@tonic-gate * will force the memscrubber to sleep, allowing the DR callback 14490Sstevel@tonic-gate * thread to acquire memscrub_lock in new_memscrub(). The use of 14500Sstevel@tonic-gate * atomic_add_32() allows concurrent memory DR operations to use the 14510Sstevel@tonic-gate * callbacks safely. 14520Sstevel@tonic-gate */ 14530Sstevel@tonic-gate atomic_add_32(&pause_memscrub, 1); 14540Sstevel@tonic-gate ASSERT(pause_memscrub != 0); 14550Sstevel@tonic-gate 14560Sstevel@tonic-gate /* 14570Sstevel@tonic-gate * Must stop scrubbing deleted memory as it may be disconnected. 14580Sstevel@tonic-gate */ 14592895Svb70745 if (new_memscrub(1)) { /* update page retire list */ 14600Sstevel@tonic-gate disable_memscrub = 1; 14610Sstevel@tonic-gate } 14620Sstevel@tonic-gate 14630Sstevel@tonic-gate /* Restore the pause setting. */ 14640Sstevel@tonic-gate atomic_add_32(&pause_memscrub, -1); 14650Sstevel@tonic-gate } 14660Sstevel@tonic-gate 14670Sstevel@tonic-gate static kphysm_setup_vector_t memscrub_mem_config_vec = { 14680Sstevel@tonic-gate KPHYSM_SETUP_VECTOR_VERSION, 14690Sstevel@tonic-gate memscrub_mem_config_post_add, 14700Sstevel@tonic-gate memscrub_mem_config_pre_del, 14710Sstevel@tonic-gate memscrub_mem_config_post_del, 14720Sstevel@tonic-gate }; 14730Sstevel@tonic-gate 14740Sstevel@tonic-gate static void 14750Sstevel@tonic-gate memscrub_init_mem_config() 14760Sstevel@tonic-gate { 14770Sstevel@tonic-gate int ret; 14780Sstevel@tonic-gate 14790Sstevel@tonic-gate ret = kphysm_setup_func_register(&memscrub_mem_config_vec, 14800Sstevel@tonic-gate (void *)NULL); 14810Sstevel@tonic-gate ASSERT(ret == 0); 14820Sstevel@tonic-gate } 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate static void 14850Sstevel@tonic-gate memscrub_uninit_mem_config() 14860Sstevel@tonic-gate { 14870Sstevel@tonic-gate /* This call is OK if the register call was not done. */ 14880Sstevel@tonic-gate kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL); 14890Sstevel@tonic-gate } 1490