1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23eda14cbcSMatt Macy * Copyright (c) 2018, Joyent, Inc. 24eda14cbcSMatt Macy * Copyright (c) 2011, 2019 by Delphix. All rights reserved. 25eda14cbcSMatt Macy * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. 26eda14cbcSMatt Macy * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 27eda14cbcSMatt Macy */ 28eda14cbcSMatt Macy 29eda14cbcSMatt Macy #include <sys/spa.h> 30eda14cbcSMatt Macy #include <sys/zio.h> 31eda14cbcSMatt Macy #include <sys/spa_impl.h> 32eda14cbcSMatt Macy #include <sys/zio_compress.h> 33eda14cbcSMatt Macy #include <sys/zio_checksum.h> 34eda14cbcSMatt Macy #include <sys/zfs_context.h> 35eda14cbcSMatt Macy #include <sys/arc.h> 36eda14cbcSMatt Macy #include <sys/zfs_refcount.h> 37eda14cbcSMatt Macy #include <sys/vdev.h> 38eda14cbcSMatt Macy #include <sys/vdev_trim.h> 39eda14cbcSMatt Macy #include <sys/vdev_impl.h> 40eda14cbcSMatt Macy #include <sys/dsl_pool.h> 41eda14cbcSMatt Macy #include <sys/multilist.h> 42eda14cbcSMatt Macy #include <sys/abd.h> 43eda14cbcSMatt Macy #include <sys/zil.h> 44eda14cbcSMatt Macy #include <sys/fm/fs/zfs.h> 45eda14cbcSMatt Macy #include <sys/shrinker.h> 46eda14cbcSMatt Macy #include <sys/vmsystm.h> 47eda14cbcSMatt Macy #include <sys/zpl.h> 48eda14cbcSMatt Macy #include <linux/page_compat.h> 497877fdebSMatt Macy #include <linux/notifier.h> 507877fdebSMatt Macy #include <linux/memory.h> 51ce4dcb97SMartin Matuska #include <linux/version.h> 52eda14cbcSMatt Macy #include <sys/callb.h> 53eda14cbcSMatt Macy #include <sys/kstat.h> 54eda14cbcSMatt Macy #include <sys/zthr.h> 55eda14cbcSMatt Macy #include <zfs_fletcher.h> 56eda14cbcSMatt Macy #include <sys/arc_impl.h> 57eda14cbcSMatt Macy #include <sys/trace_zfs.h> 58eda14cbcSMatt Macy #include <sys/aggsum.h> 59eda14cbcSMatt Macy 60eda14cbcSMatt Macy /* 61eda14cbcSMatt Macy * This is a limit on how many pages the ARC shrinker makes available for 62eda14cbcSMatt Macy * eviction in response to one page allocation attempt. Note that in 63eda14cbcSMatt Macy * practice, the kernel's shrinker can ask us to evict up to about 4x this 64eda14cbcSMatt Macy * for one allocation attempt. 65eda14cbcSMatt Macy * 66*dd215568SMartin Matuska * For example a value of 10,000 (in practice, 160MB per allocation attempt 67eda14cbcSMatt Macy * with 4K pages) limits the amount of time spent attempting to reclaim ARC 68eda14cbcSMatt Macy * memory to less than 100ms per allocation attempt, even with a small 69eda14cbcSMatt Macy * average compressed block size of ~8KB. 70eda14cbcSMatt Macy * 71eda14cbcSMatt Macy * See also the comment in arc_shrinker_count(). 72eda14cbcSMatt Macy * Set to 0 to disable limit. 73eda14cbcSMatt Macy */ 74*dd215568SMartin Matuska static int zfs_arc_shrinker_limit = 0; 75ce4dcb97SMartin Matuska 76ce4dcb97SMartin Matuska /* 77ce4dcb97SMartin Matuska * Relative cost of ARC eviction, AKA number of seeks needed to restore evicted 78ce4dcb97SMartin Matuska * page. Bigger values make ARC more precious and evictions smaller comparing 79ce4dcb97SMartin Matuska * to other kernel subsystems. Value of 4 means parity with page cache, 80ce4dcb97SMartin Matuska * according to my reading of kernel's do_shrink_slab() and other code. 81ce4dcb97SMartin Matuska */ 82ce4dcb97SMartin Matuska static int zfs_arc_shrinker_seeks = DEFAULT_SEEKS; 83eda14cbcSMatt Macy 847877fdebSMatt Macy #ifdef CONFIG_MEMORY_HOTPLUG 857877fdebSMatt Macy static struct notifier_block arc_hotplug_callback_mem_nb; 867877fdebSMatt Macy #endif 87eda14cbcSMatt Macy 88eda14cbcSMatt Macy /* 89eda14cbcSMatt Macy * Return a default max arc size based on the amount of physical memory. 906c1e79dfSMartin Matuska * This may be overridden by tuning the zfs_arc_max module parameter. 91eda14cbcSMatt Macy */ 92eda14cbcSMatt Macy uint64_t 93eda14cbcSMatt Macy arc_default_max(uint64_t min, uint64_t allmem) 94eda14cbcSMatt Macy { 956c1e79dfSMartin Matuska uint64_t size; 966c1e79dfSMartin Matuska 976c1e79dfSMartin Matuska if (allmem >= 1 << 30) 986c1e79dfSMartin Matuska size = allmem - (1 << 30); 996c1e79dfSMartin Matuska else 1006c1e79dfSMartin Matuska size = min; 1016c1e79dfSMartin Matuska return (MAX(allmem * 5 / 8, size)); 102eda14cbcSMatt Macy } 103eda14cbcSMatt Macy 104eda14cbcSMatt Macy /* 105eda14cbcSMatt Macy * Return maximum amount of memory that we could possibly use. Reduced 106eda14cbcSMatt Macy * to half of all memory in user space which is primarily used for testing. 107eda14cbcSMatt Macy */ 108eda14cbcSMatt Macy uint64_t 109eda14cbcSMatt Macy arc_all_memory(void) 110eda14cbcSMatt Macy { 111eda14cbcSMatt Macy #ifdef CONFIG_HIGHMEM 112eda14cbcSMatt Macy return (ptob(zfs_totalram_pages - zfs_totalhigh_pages)); 113eda14cbcSMatt Macy #else 114eda14cbcSMatt Macy return (ptob(zfs_totalram_pages)); 115eda14cbcSMatt Macy #endif /* CONFIG_HIGHMEM */ 116eda14cbcSMatt Macy } 117eda14cbcSMatt Macy 118eda14cbcSMatt Macy /* 119eda14cbcSMatt Macy * Return the amount of memory that is considered free. In user space 120eda14cbcSMatt Macy * which is primarily used for testing we pretend that free memory ranges 121eda14cbcSMatt Macy * from 0-20% of all memory. 122eda14cbcSMatt Macy */ 123eda14cbcSMatt Macy uint64_t 124eda14cbcSMatt Macy arc_free_memory(void) 125eda14cbcSMatt Macy { 126eda14cbcSMatt Macy #ifdef CONFIG_HIGHMEM 127eda14cbcSMatt Macy struct sysinfo si; 128eda14cbcSMatt Macy si_meminfo(&si); 129eda14cbcSMatt Macy return (ptob(si.freeram - si.freehigh)); 130eda14cbcSMatt Macy #else 131eda14cbcSMatt Macy return (ptob(nr_free_pages() + 132eac7052fSMatt Macy nr_inactive_file_pages())); 133eda14cbcSMatt Macy #endif /* CONFIG_HIGHMEM */ 134eda14cbcSMatt Macy } 135eda14cbcSMatt Macy 136eda14cbcSMatt Macy /* 137eda14cbcSMatt Macy * Return the amount of memory that can be consumed before reclaim will be 138eda14cbcSMatt Macy * needed. Positive if there is sufficient free memory, negative indicates 139eda14cbcSMatt Macy * the amount of memory that needs to be freed up. 140eda14cbcSMatt Macy */ 141eda14cbcSMatt Macy int64_t 142eda14cbcSMatt Macy arc_available_memory(void) 143eda14cbcSMatt Macy { 144eda14cbcSMatt Macy return (arc_free_memory() - arc_sys_free); 145eda14cbcSMatt Macy } 146eda14cbcSMatt Macy 147eda14cbcSMatt Macy static uint64_t 148eda14cbcSMatt Macy arc_evictable_memory(void) 149eda14cbcSMatt Macy { 1500d8fe237SMartin Matuska int64_t asize = aggsum_value(&arc_sums.arcstat_size); 151eda14cbcSMatt Macy uint64_t arc_clean = 152eda14cbcSMatt Macy zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) + 153eda14cbcSMatt Macy zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) + 154eda14cbcSMatt Macy zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_DATA]) + 155eda14cbcSMatt Macy zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]); 156eda14cbcSMatt Macy uint64_t arc_dirty = MAX((int64_t)asize - (int64_t)arc_clean, 0); 157eda14cbcSMatt Macy 158eda14cbcSMatt Macy /* 159eda14cbcSMatt Macy * Scale reported evictable memory in proportion to page cache, cap 160eda14cbcSMatt Macy * at specified min/max. 161eda14cbcSMatt Macy */ 162eda14cbcSMatt Macy uint64_t min = (ptob(nr_file_pages()) / 100) * zfs_arc_pc_percent; 163eda14cbcSMatt Macy min = MAX(arc_c_min, MIN(arc_c_max, min)); 164eda14cbcSMatt Macy 165eda14cbcSMatt Macy if (arc_dirty >= min) 166eda14cbcSMatt Macy return (arc_clean); 167eda14cbcSMatt Macy 168eda14cbcSMatt Macy return (MAX((int64_t)asize - (int64_t)min, 0)); 169eda14cbcSMatt Macy } 170eda14cbcSMatt Macy 171eda14cbcSMatt Macy /* 172eda14cbcSMatt Macy * The _count() function returns the number of free-able objects. 173eda14cbcSMatt Macy * The _scan() function returns the number of objects that were freed. 174eda14cbcSMatt Macy */ 175eda14cbcSMatt Macy static unsigned long 176eda14cbcSMatt Macy arc_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) 177eda14cbcSMatt Macy { 178eda14cbcSMatt Macy /* 179ce4dcb97SMartin Matuska * The kernel's shrinker code may not understand how many pages the 180eda14cbcSMatt Macy * ARC's callback actually frees, so it may ask the ARC to shrink a 181eda14cbcSMatt Macy * lot for one page allocation. This is problematic because it may 182eda14cbcSMatt Macy * take a long time, thus delaying the page allocation, and because 183eda14cbcSMatt Macy * it may force the ARC to unnecessarily shrink very small. 184eda14cbcSMatt Macy * 185eda14cbcSMatt Macy * Therefore, we limit the amount of data that we say is evictable, 186eda14cbcSMatt Macy * which limits the amount that the shrinker will ask us to evict for 187eda14cbcSMatt Macy * one page allocation attempt. 188eda14cbcSMatt Macy * 189eda14cbcSMatt Macy * In practice, we may be asked to shrink 4x the limit to satisfy one 190eda14cbcSMatt Macy * page allocation, before the kernel's shrinker code gives up on us. 191eda14cbcSMatt Macy * When that happens, we rely on the kernel code to find the pages 192eda14cbcSMatt Macy * that we freed before invoking the OOM killer. This happens in 193eda14cbcSMatt Macy * __alloc_pages_slowpath(), which retries and finds the pages we 194eda14cbcSMatt Macy * freed when it calls get_page_from_freelist(). 195eda14cbcSMatt Macy * 196eda14cbcSMatt Macy * See also the comment above zfs_arc_shrinker_limit. 197eda14cbcSMatt Macy */ 198ce4dcb97SMartin Matuska int64_t can_free = btop(arc_evictable_memory()); 199e2df9bb4SMartin Matuska if (current_is_kswapd() && zfs_arc_shrinker_limit) 200e2df9bb4SMartin Matuska can_free = MIN(can_free, zfs_arc_shrinker_limit); 201e2df9bb4SMartin Matuska return (can_free); 202eda14cbcSMatt Macy } 203eda14cbcSMatt Macy 204eda14cbcSMatt Macy static unsigned long 205eda14cbcSMatt Macy arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) 206eda14cbcSMatt Macy { 207eda14cbcSMatt Macy /* The arc is considered warm once reclaim has occurred */ 208eda14cbcSMatt Macy if (unlikely(arc_warm == B_FALSE)) 209eda14cbcSMatt Macy arc_warm = B_TRUE; 210eda14cbcSMatt Macy 211eda14cbcSMatt Macy /* 212eda14cbcSMatt Macy * We are experiencing memory pressure which the arc_evict_zthr was 213ce4dcb97SMartin Matuska * unable to keep up with. Set arc_no_grow to briefly pause ARC 214eda14cbcSMatt Macy * growth to avoid compounding the memory pressure. 215eda14cbcSMatt Macy */ 216eda14cbcSMatt Macy arc_no_grow = B_TRUE; 217eda14cbcSMatt Macy 218eda14cbcSMatt Macy /* 219ce4dcb97SMartin Matuska * Evict the requested number of pages by reducing arc_c and waiting 220ce4dcb97SMartin Matuska * for the requested amount of data to be evicted. To avoid deadlock 221ce4dcb97SMartin Matuska * do not wait for eviction if we may be called from ZFS itself (see 222ce4dcb97SMartin Matuska * kmem_flags_convert() removing __GFP_FS). It may cause excessive 223ce4dcb97SMartin Matuska * eviction later if many evictions are accumulated, but just skipping 224ce4dcb97SMartin Matuska * the eviction is not good either if most of memory is used by ARC. 225ce4dcb97SMartin Matuska */ 226ce4dcb97SMartin Matuska uint64_t to_free = arc_reduce_target_size(ptob(sc->nr_to_scan)); 227ce4dcb97SMartin Matuska if (sc->gfp_mask & __GFP_FS) 228ce4dcb97SMartin Matuska arc_wait_for_eviction(to_free, B_FALSE, B_FALSE); 229ce4dcb97SMartin Matuska if (current->reclaim_state != NULL) 230ce4dcb97SMartin Matuska #ifdef HAVE_RECLAIM_STATE_RECLAIMED 231ce4dcb97SMartin Matuska current->reclaim_state->reclaimed += btop(to_free); 232ce4dcb97SMartin Matuska #else 233ce4dcb97SMartin Matuska current->reclaim_state->reclaimed_slab += btop(to_free); 234ce4dcb97SMartin Matuska #endif 235ce4dcb97SMartin Matuska 236ce4dcb97SMartin Matuska /* 237eda14cbcSMatt Macy * When direct reclaim is observed it usually indicates a rapid 238eda14cbcSMatt Macy * increase in memory pressure. This occurs because the kswapd 239eda14cbcSMatt Macy * threads were unable to asynchronously keep enough free memory 240eda14cbcSMatt Macy * available. 241eda14cbcSMatt Macy */ 242eda14cbcSMatt Macy if (current_is_kswapd()) { 243eda14cbcSMatt Macy ARCSTAT_BUMP(arcstat_memory_indirect_count); 244eda14cbcSMatt Macy } else { 245eda14cbcSMatt Macy ARCSTAT_BUMP(arcstat_memory_direct_count); 246eda14cbcSMatt Macy } 247eda14cbcSMatt Macy 248ce4dcb97SMartin Matuska return (btop(to_free)); 249eda14cbcSMatt Macy } 250eda14cbcSMatt Macy 251b356da80SMartin Matuska static struct shrinker *arc_shrinker = NULL; 252eda14cbcSMatt Macy 253eda14cbcSMatt Macy int 254eda14cbcSMatt Macy arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) 255eda14cbcSMatt Macy { 256eda14cbcSMatt Macy uint64_t free_memory = arc_free_memory(); 257eda14cbcSMatt Macy 258eda14cbcSMatt Macy if (free_memory > arc_all_memory() * arc_lotsfree_percent / 100) 259eda14cbcSMatt Macy return (0); 260eda14cbcSMatt Macy 261eda14cbcSMatt Macy if (txg > spa->spa_lowmem_last_txg) { 262eda14cbcSMatt Macy spa->spa_lowmem_last_txg = txg; 263eda14cbcSMatt Macy spa->spa_lowmem_page_load = 0; 264eda14cbcSMatt Macy } 265eda14cbcSMatt Macy /* 266eda14cbcSMatt Macy * If we are in pageout, we know that memory is already tight, 267eda14cbcSMatt Macy * the arc is already going to be evicting, so we just want to 268eda14cbcSMatt Macy * continue to let page writes occur as quickly as possible. 269eda14cbcSMatt Macy */ 270eda14cbcSMatt Macy if (current_is_kswapd()) { 271eda14cbcSMatt Macy if (spa->spa_lowmem_page_load > 272eda14cbcSMatt Macy MAX(arc_sys_free / 4, free_memory) / 4) { 273eda14cbcSMatt Macy DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim); 274eda14cbcSMatt Macy return (SET_ERROR(ERESTART)); 275eda14cbcSMatt Macy } 276eda14cbcSMatt Macy /* Note: reserve is inflated, so we deflate */ 277eda14cbcSMatt Macy atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8); 278eda14cbcSMatt Macy return (0); 279eda14cbcSMatt Macy } else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) { 280eda14cbcSMatt Macy /* memory is low, delay before restarting */ 281eda14cbcSMatt Macy ARCSTAT_INCR(arcstat_memory_throttle_count, 1); 282eda14cbcSMatt Macy DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim); 283eda14cbcSMatt Macy return (SET_ERROR(EAGAIN)); 284eda14cbcSMatt Macy } 285eda14cbcSMatt Macy spa->spa_lowmem_page_load = 0; 286eda14cbcSMatt Macy return (0); 287eda14cbcSMatt Macy } 288eda14cbcSMatt Macy 2897877fdebSMatt Macy static void 2907877fdebSMatt Macy arc_set_sys_free(uint64_t allmem) 291eda14cbcSMatt Macy { 292eda14cbcSMatt Macy /* 293eda14cbcSMatt Macy * The ARC tries to keep at least this much memory available for the 294eda14cbcSMatt Macy * system. This gives the ARC time to shrink in response to memory 295eda14cbcSMatt Macy * pressure, before running completely out of memory and invoking the 296eda14cbcSMatt Macy * direct-reclaim ARC shrinker. 297eda14cbcSMatt Macy * 298eda14cbcSMatt Macy * This should be more than twice high_wmark_pages(), so that 299eda14cbcSMatt Macy * arc_wait_for_eviction() will wait until at least the 300eda14cbcSMatt Macy * high_wmark_pages() are free (see arc_evict_state_impl()). 301eda14cbcSMatt Macy * 302ce4dcb97SMartin Matuska * Note: If concurrent allocations consume these pages, there may 303eda14cbcSMatt Macy * still be insufficient free pages, and the OOM killer takes action. 304eda14cbcSMatt Macy * 305eda14cbcSMatt Macy * By setting arc_sys_free large enough, and having 306eda14cbcSMatt Macy * arc_wait_for_eviction() wait until there is at least arc_sys_free/2 307eda14cbcSMatt Macy * free memory, it is much less likely that concurrent allocations can 308eda14cbcSMatt Macy * consume all the memory that was evicted before checking for 309eda14cbcSMatt Macy * OOM. 310eda14cbcSMatt Macy * 311eda14cbcSMatt Macy * It's hard to iterate the zones from a linux kernel module, which 312eda14cbcSMatt Macy * makes it difficult to determine the watermark dynamically. Instead 313eda14cbcSMatt Macy * we compute the maximum high watermark for this system, based 314ce4dcb97SMartin Matuska * on the amount of memory, using the same method as the kernel uses 315ce4dcb97SMartin Matuska * to calculate its internal `min_free_kbytes` variable. See 316ce4dcb97SMartin Matuska * torvalds/linux@ee8eb9a5fe86 for the change in the upper clamp value 317ce4dcb97SMartin Matuska * from 64M to 256M. 318eda14cbcSMatt Macy */ 319eda14cbcSMatt Macy 320eda14cbcSMatt Macy /* 321eda14cbcSMatt Macy * Base wmark_low is 4 * the square root of Kbytes of RAM. 322eda14cbcSMatt Macy */ 323ce4dcb97SMartin Matuska long wmark = int_sqrt(allmem / 1024 * 16) * 1024; 324eda14cbcSMatt Macy 325eda14cbcSMatt Macy /* 326ce4dcb97SMartin Matuska * Clamp to between 128K and 256/64MB. 327eda14cbcSMatt Macy */ 328eda14cbcSMatt Macy wmark = MAX(wmark, 128 * 1024); 329ce4dcb97SMartin Matuska #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0) 330ce4dcb97SMartin Matuska wmark = MIN(wmark, 256 * 1024 * 1024); 331ce4dcb97SMartin Matuska #else 332eda14cbcSMatt Macy wmark = MIN(wmark, 64 * 1024 * 1024); 333ce4dcb97SMartin Matuska #endif 334eda14cbcSMatt Macy 335eda14cbcSMatt Macy /* 336eda14cbcSMatt Macy * watermark_boost can increase the wmark by up to 150%. 337eda14cbcSMatt Macy */ 338eda14cbcSMatt Macy wmark += wmark * 150 / 100; 339eda14cbcSMatt Macy 340eda14cbcSMatt Macy /* 341eda14cbcSMatt Macy * arc_sys_free needs to be more than 2x the watermark, because 342eda14cbcSMatt Macy * arc_wait_for_eviction() waits for half of arc_sys_free. Bump this up 343eda14cbcSMatt Macy * to 3x to ensure we're above it. 344eda14cbcSMatt Macy */ 345eda14cbcSMatt Macy arc_sys_free = wmark * 3 + allmem / 32; 346eda14cbcSMatt Macy } 347eda14cbcSMatt Macy 348eda14cbcSMatt Macy void 3497877fdebSMatt Macy arc_lowmem_init(void) 3507877fdebSMatt Macy { 3517877fdebSMatt Macy uint64_t allmem = arc_all_memory(); 3527877fdebSMatt Macy 3537877fdebSMatt Macy /* 3547877fdebSMatt Macy * Register a shrinker to support synchronous (direct) memory 3557877fdebSMatt Macy * reclaim from the arc. This is done to prevent kswapd from 3567877fdebSMatt Macy * swapping out pages when it is preferable to shrink the arc. 3577877fdebSMatt Macy */ 358b356da80SMartin Matuska arc_shrinker = spl_register_shrinker("zfs-arc-shrinker", 359ce4dcb97SMartin Matuska arc_shrinker_count, arc_shrinker_scan, zfs_arc_shrinker_seeks); 360b356da80SMartin Matuska VERIFY(arc_shrinker); 361b356da80SMartin Matuska 3627877fdebSMatt Macy arc_set_sys_free(allmem); 3637877fdebSMatt Macy } 3647877fdebSMatt Macy 3657877fdebSMatt Macy void 366eda14cbcSMatt Macy arc_lowmem_fini(void) 367eda14cbcSMatt Macy { 368b356da80SMartin Matuska spl_unregister_shrinker(arc_shrinker); 369b356da80SMartin Matuska arc_shrinker = NULL; 370eda14cbcSMatt Macy } 371eda14cbcSMatt Macy 372eda14cbcSMatt Macy int 373dbd5678dSMartin Matuska param_set_arc_u64(const char *buf, zfs_kernel_param_t *kp) 374eda14cbcSMatt Macy { 375eda14cbcSMatt Macy int error; 376eda14cbcSMatt Macy 377dbd5678dSMartin Matuska error = spl_param_set_u64(buf, kp); 378eda14cbcSMatt Macy if (error < 0) 379eda14cbcSMatt Macy return (SET_ERROR(error)); 380eda14cbcSMatt Macy 381eda14cbcSMatt Macy arc_tuning_update(B_TRUE); 382eda14cbcSMatt Macy 383eda14cbcSMatt Macy return (0); 384eda14cbcSMatt Macy } 385eda14cbcSMatt Macy 386eda14cbcSMatt Macy int 3872faf504dSMartin Matuska param_set_arc_min(const char *buf, zfs_kernel_param_t *kp) 3882faf504dSMartin Matuska { 389dbd5678dSMartin Matuska return (param_set_arc_u64(buf, kp)); 3902faf504dSMartin Matuska } 3912faf504dSMartin Matuska 3922faf504dSMartin Matuska int 3932faf504dSMartin Matuska param_set_arc_max(const char *buf, zfs_kernel_param_t *kp) 3942faf504dSMartin Matuska { 395dbd5678dSMartin Matuska return (param_set_arc_u64(buf, kp)); 3962faf504dSMartin Matuska } 3972faf504dSMartin Matuska 3982faf504dSMartin Matuska int 399eda14cbcSMatt Macy param_set_arc_int(const char *buf, zfs_kernel_param_t *kp) 400eda14cbcSMatt Macy { 401eda14cbcSMatt Macy int error; 402eda14cbcSMatt Macy 403eda14cbcSMatt Macy error = param_set_int(buf, kp); 404eda14cbcSMatt Macy if (error < 0) 405eda14cbcSMatt Macy return (SET_ERROR(error)); 406eda14cbcSMatt Macy 407eda14cbcSMatt Macy arc_tuning_update(B_TRUE); 408eda14cbcSMatt Macy 409eda14cbcSMatt Macy return (0); 410eda14cbcSMatt Macy } 4117877fdebSMatt Macy 4127877fdebSMatt Macy #ifdef CONFIG_MEMORY_HOTPLUG 4137877fdebSMatt Macy static int 4147877fdebSMatt Macy arc_hotplug_callback(struct notifier_block *self, unsigned long action, 4157877fdebSMatt Macy void *arg) 4167877fdebSMatt Macy { 417e92ffd9bSMartin Matuska (void) self, (void) arg; 4187877fdebSMatt Macy uint64_t allmem = arc_all_memory(); 4197877fdebSMatt Macy if (action != MEM_ONLINE) 4207877fdebSMatt Macy return (NOTIFY_OK); 4217877fdebSMatt Macy 4227877fdebSMatt Macy arc_set_limits(allmem); 4237877fdebSMatt Macy 4247877fdebSMatt Macy #ifdef __LP64__ 4257877fdebSMatt Macy if (zfs_dirty_data_max_max == 0) 4267877fdebSMatt Macy zfs_dirty_data_max_max = MIN(4ULL * 1024 * 1024 * 1024, 4277877fdebSMatt Macy allmem * zfs_dirty_data_max_max_percent / 100); 4287877fdebSMatt Macy #else 4297877fdebSMatt Macy if (zfs_dirty_data_max_max == 0) 4307877fdebSMatt Macy zfs_dirty_data_max_max = MIN(1ULL * 1024 * 1024 * 1024, 4317877fdebSMatt Macy allmem * zfs_dirty_data_max_max_percent / 100); 4327877fdebSMatt Macy #endif 4337877fdebSMatt Macy 4347877fdebSMatt Macy arc_set_sys_free(allmem); 4357877fdebSMatt Macy return (NOTIFY_OK); 4367877fdebSMatt Macy } 4377877fdebSMatt Macy #endif 4387877fdebSMatt Macy 4397877fdebSMatt Macy void 4407877fdebSMatt Macy arc_register_hotplug(void) 4417877fdebSMatt Macy { 4427877fdebSMatt Macy #ifdef CONFIG_MEMORY_HOTPLUG 4437877fdebSMatt Macy arc_hotplug_callback_mem_nb.notifier_call = arc_hotplug_callback; 4447877fdebSMatt Macy /* There is no significance to the value 100 */ 4457877fdebSMatt Macy arc_hotplug_callback_mem_nb.priority = 100; 4467877fdebSMatt Macy register_memory_notifier(&arc_hotplug_callback_mem_nb); 4477877fdebSMatt Macy #endif 4487877fdebSMatt Macy } 4497877fdebSMatt Macy 4507877fdebSMatt Macy void 4517877fdebSMatt Macy arc_unregister_hotplug(void) 4527877fdebSMatt Macy { 4537877fdebSMatt Macy #ifdef CONFIG_MEMORY_HOTPLUG 4547877fdebSMatt Macy unregister_memory_notifier(&arc_hotplug_callback_mem_nb); 4557877fdebSMatt Macy #endif 4567877fdebSMatt Macy } 457eda14cbcSMatt Macy 458eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, 459eda14cbcSMatt Macy "Limit on number of pages that ARC shrinker can reclaim at once"); 460ce4dcb97SMartin Matuska ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_seeks, INT, ZMOD_RD, 461ce4dcb97SMartin Matuska "Relative cost of ARC eviction vs other kernel subsystems"); 462