1 /* $NetBSD: uvm_pgflcache.c,v 1.6 2020/10/18 18:31:31 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * uvm_pgflcache.c: page freelist cache. 34 * 35 * This implements a tiny per-CPU cache of pages that sits between the main 36 * page allocator and the freelists. By allocating and freeing pages in 37 * batch, it reduces freelist contention by an order of magnitude. 38 * 39 * The cache can be paused & resumed at runtime so that UVM_HOTPLUG, 40 * uvm_pglistalloc() and uvm_page_redim() can have a consistent view of the 41 * world. On system with one CPU per physical package (e.g. a uniprocessor) 42 * the cache is not enabled. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: uvm_pgflcache.c,v 1.6 2020/10/18 18:31:31 chs Exp $"); 47 48 #include "opt_uvm.h" 49 #include "opt_multiprocessor.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/sched.h> 54 #include <sys/kernel.h> 55 #include <sys/vnode.h> 56 #include <sys/proc.h> 57 #include <sys/atomic.h> 58 #include <sys/cpu.h> 59 #include <sys/xcall.h> 60 61 #include <uvm/uvm.h> 62 #include <uvm/uvm_pglist.h> 63 #include <uvm/uvm_pgflcache.h> 64 65 /* There is no point doing any of this on a uniprocessor. */ 66 #ifdef MULTIPROCESSOR 67 68 /* 69 * MAXPGS - maximum pages per color, per bucket. 70 * FILLPGS - number of pages to allocate at once, per color, per bucket. 71 * 72 * Why the chosen values: 73 * 74 * (1) In 2019, an average Intel system has 4kB pages and 8x L2 cache 75 * colors. We make the assumption that most of the time allocation activity 76 * will be centered around one UVM freelist, so most of the time there will 77 * be no more than 224kB worth of cached pages per-CPU. That's tiny, but 78 * enough to hugely reduce contention on the freelist locks, and give us a 79 * small pool of pages which if we're very lucky may have some L1/L2 cache 80 * locality, and do so without subtracting too much from the L2/L3 cache 81 * benefits of having per-package free lists in the page allocator. 82 * 83 * (2) With the chosen values on _LP64, the data structure for each color 84 * takes up a single cache line (64 bytes) giving this very low overhead 85 * even in the "miss" case. 86 * 87 * (3) We don't want to cause too much pressure by hiding away memory that 88 * could otherwise be put to good use. 89 */ 90 #define MAXPGS 7 91 #define FILLPGS 6 92 93 /* Variable size, according to # colors. */ 94 struct pgflcache { 95 struct pccolor { 96 intptr_t count; 97 struct vm_page *pages[MAXPGS]; 98 } color[1]; 99 }; 100 101 static kmutex_t uvm_pgflcache_lock; 102 static int uvm_pgflcache_sem; 103 104 /* 105 * uvm_pgflcache_fill: fill specified freelist/color from global list 106 * 107 * => must be called at IPL_VM 108 * => must be called with given bucket lock held 109 * => must only fill from the correct bucket for this CPU 110 */ 111 112 void 113 uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c) 114 { 115 struct pgflbucket *pgb; 116 struct pgflcache *pc; 117 struct pccolor *pcc; 118 struct pgflist *head; 119 struct vm_page *pg; 120 int count; 121 122 KASSERT(mutex_owned(&uvm_freelist_locks[b].lock)); 123 KASSERT(ucpu->pgflbucket == b); 124 125 /* If caching is off, then bail out. */ 126 if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 127 return; 128 } 129 130 /* Fill only to the limit. */ 131 pcc = &pc->color[c]; 132 pgb = uvm.page_free[fl].pgfl_buckets[b]; 133 head = &pgb->pgb_colors[c]; 134 if (pcc->count >= FILLPGS) { 135 return; 136 } 137 138 /* Pull pages from the bucket until it's empty, or we are full. */ 139 count = pcc->count; 140 pg = LIST_FIRST(head); 141 while (__predict_true(pg != NULL && count < FILLPGS)) { 142 KASSERT(pg->flags & PG_FREE); 143 KASSERT(uvm_page_get_bucket(pg) == b); 144 pcc->pages[count++] = pg; 145 pg = LIST_NEXT(pg, pageq.list); 146 } 147 148 /* Violate LIST abstraction to remove all pages at once. */ 149 head->lh_first = pg; 150 if (__predict_true(pg != NULL)) { 151 pg->pageq.list.le_prev = &head->lh_first; 152 } 153 pgb->pgb_nfree -= (count - pcc->count); 154 CPU_COUNT(CPU_COUNT_FREEPAGES, -(count - pcc->count)); 155 pcc->count = count; 156 } 157 158 /* 159 * uvm_pgflcache_spill: spill specified freelist/color to global list 160 * 161 * => must be called at IPL_VM 162 * => mark __noinline so we don't pull it into uvm_pgflcache_free() 163 */ 164 165 static void __noinline 166 uvm_pgflcache_spill(struct uvm_cpu *ucpu, int fl, int c) 167 { 168 struct pgflbucket *pgb; 169 struct pgfreelist *pgfl; 170 struct pgflcache *pc; 171 struct pccolor *pcc; 172 struct pgflist *head; 173 kmutex_t *lock; 174 int b, adj; 175 176 pc = ucpu->pgflcache[fl]; 177 pcc = &pc->color[c]; 178 pgfl = &uvm.page_free[fl]; 179 b = ucpu->pgflbucket; 180 pgb = pgfl->pgfl_buckets[b]; 181 head = &pgb->pgb_colors[c]; 182 lock = &uvm_freelist_locks[b].lock; 183 184 mutex_spin_enter(lock); 185 for (adj = pcc->count; pcc->count != 0;) { 186 pcc->count--; 187 KASSERT(pcc->pages[pcc->count] != NULL); 188 KASSERT(pcc->pages[pcc->count]->flags & PG_FREE); 189 LIST_INSERT_HEAD(head, pcc->pages[pcc->count], pageq.list); 190 } 191 pgb->pgb_nfree += adj; 192 CPU_COUNT(CPU_COUNT_FREEPAGES, adj); 193 mutex_spin_exit(lock); 194 } 195 196 /* 197 * uvm_pgflcache_alloc: try to allocate a cached page. 198 * 199 * => must be called at IPL_VM 200 * => allocate only from the given freelist and given page color 201 */ 202 203 struct vm_page * 204 uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c) 205 { 206 struct pgflcache *pc; 207 struct pccolor *pcc; 208 struct vm_page *pg; 209 210 /* If caching is off, then bail out. */ 211 if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 212 return NULL; 213 } 214 215 /* Very simple: if we have a page then return it. */ 216 pcc = &pc->color[c]; 217 if (__predict_false(pcc->count == 0)) { 218 return NULL; 219 } 220 pg = pcc->pages[--(pcc->count)]; 221 KASSERT(pg != NULL); 222 KASSERT(pg->flags == PG_FREE); 223 KASSERT(uvm_page_get_freelist(pg) == fl); 224 KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket); 225 pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 226 return pg; 227 } 228 229 /* 230 * uvm_pgflcache_free: cache a page, if possible. 231 * 232 * => must be called at IPL_VM 233 * => must only send pages for the correct bucket for this CPU 234 */ 235 236 bool 237 uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg) 238 { 239 struct pgflcache *pc; 240 struct pccolor *pcc; 241 int fl, c; 242 243 KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket); 244 245 /* If caching is off, then bail out. */ 246 fl = uvm_page_get_freelist(pg); 247 if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 248 return false; 249 } 250 251 /* If the array is full spill it first, then add page to array. */ 252 c = VM_PGCOLOR(pg); 253 pcc = &pc->color[c]; 254 KASSERT((pg->flags & PG_FREE) == 0); 255 if (__predict_false(pcc->count == MAXPGS)) { 256 uvm_pgflcache_spill(ucpu, fl, c); 257 } 258 pg->flags = PG_FREE; 259 pcc->pages[pcc->count] = pg; 260 pcc->count++; 261 return true; 262 } 263 264 /* 265 * uvm_pgflcache_init: allocate and initialize per-CPU data structures for 266 * the free page cache. Don't set anything in motion - that's taken care 267 * of by uvm_pgflcache_resume(). 268 */ 269 270 static void 271 uvm_pgflcache_init_cpu(struct cpu_info *ci) 272 { 273 struct uvm_cpu *ucpu; 274 size_t sz; 275 276 ucpu = ci->ci_data.cpu_uvm; 277 KASSERT(ucpu->pgflcachemem == NULL); 278 KASSERT(ucpu->pgflcache[0] == NULL); 279 280 sz = offsetof(struct pgflcache, color[uvmexp.ncolors]); 281 ucpu->pgflcachememsz = 282 (roundup2(sz * VM_NFREELIST, coherency_unit) + coherency_unit - 1); 283 ucpu->pgflcachemem = kmem_zalloc(ucpu->pgflcachememsz, KM_SLEEP); 284 } 285 286 /* 287 * uvm_pgflcache_fini_cpu: dump all cached pages back to global free list 288 * and shut down caching on the CPU. Called on each CPU in the system via 289 * xcall. 290 */ 291 292 static void 293 uvm_pgflcache_fini_cpu(void *arg1 __unused, void *arg2 __unused) 294 { 295 struct uvm_cpu *ucpu; 296 int fl, color, s; 297 298 ucpu = curcpu()->ci_data.cpu_uvm; 299 for (fl = 0; fl < VM_NFREELIST; fl++) { 300 s = splvm(); 301 for (color = 0; color < uvmexp.ncolors; color++) { 302 uvm_pgflcache_spill(ucpu, fl, color); 303 } 304 ucpu->pgflcache[fl] = NULL; 305 splx(s); 306 } 307 } 308 309 /* 310 * uvm_pgflcache_pause: pause operation of the caches 311 */ 312 313 void 314 uvm_pgflcache_pause(void) 315 { 316 uint64_t where; 317 318 /* First one in starts draining. Everyone else waits. */ 319 mutex_enter(&uvm_pgflcache_lock); 320 if (uvm_pgflcache_sem++ == 0) { 321 where = xc_broadcast(XC_HIGHPRI, uvm_pgflcache_fini_cpu, 322 (void *)1, NULL); 323 xc_wait(where); 324 } 325 mutex_exit(&uvm_pgflcache_lock); 326 } 327 328 /* 329 * uvm_pgflcache_resume: resume operation of the caches 330 */ 331 332 void 333 uvm_pgflcache_resume(void) 334 { 335 CPU_INFO_ITERATOR cii; 336 struct cpu_info *ci; 337 struct uvm_cpu *ucpu; 338 uintptr_t addr; 339 size_t sz; 340 int fl; 341 342 /* Last guy out takes care of business. */ 343 mutex_enter(&uvm_pgflcache_lock); 344 KASSERT(uvm_pgflcache_sem > 0); 345 if (uvm_pgflcache_sem-- > 1) { 346 mutex_exit(&uvm_pgflcache_lock); 347 return; 348 } 349 350 /* 351 * Make sure dependant data structure updates are remotely visible. 352 * Essentially this functions as a global memory barrier. 353 */ 354 xc_barrier(XC_HIGHPRI); 355 356 /* 357 * Then set all of the pointers in place on each CPU. As soon as 358 * each pointer is set, caching is operational in that dimension. 359 */ 360 sz = offsetof(struct pgflcache, color[uvmexp.ncolors]); 361 for (CPU_INFO_FOREACH(cii, ci)) { 362 ucpu = ci->ci_data.cpu_uvm; 363 addr = roundup2((uintptr_t)ucpu->pgflcachemem, coherency_unit); 364 for (fl = 0; fl < VM_NFREELIST; fl++) { 365 ucpu->pgflcache[fl] = (struct pgflcache *)addr; 366 addr += sz; 367 } 368 } 369 mutex_exit(&uvm_pgflcache_lock); 370 } 371 372 /* 373 * uvm_pgflcache_start: start operation of the cache. 374 * 375 * => called once only, when init(8) is about to be started 376 */ 377 378 void 379 uvm_pgflcache_start(void) 380 { 381 CPU_INFO_ITERATOR cii; 382 struct cpu_info *ci; 383 384 KASSERT(uvm_pgflcache_sem > 0); 385 386 /* 387 * There's not much point doing this if every CPU has its own 388 * bucket (and that includes the uniprocessor case). 389 */ 390 if (ncpu == uvm.bucketcount) { 391 return; 392 } 393 394 /* Create data structures for each CPU. */ 395 for (CPU_INFO_FOREACH(cii, ci)) { 396 uvm_pgflcache_init_cpu(ci); 397 } 398 399 /* Kick it into action. */ 400 uvm_pgflcache_resume(); 401 } 402 403 /* 404 * uvm_pgflcache_init: set up data structures for the free page cache. 405 */ 406 407 void 408 uvm_pgflcache_init(void) 409 { 410 411 uvm_pgflcache_sem = 1; 412 mutex_init(&uvm_pgflcache_lock, MUTEX_DEFAULT, IPL_NONE); 413 } 414 415 #else /* MULTIPROCESSOR */ 416 417 struct vm_page * 418 uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c) 419 { 420 421 return NULL; 422 } 423 424 bool 425 uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg) 426 { 427 428 return false; 429 } 430 431 void 432 uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c) 433 { 434 435 } 436 437 void 438 uvm_pgflcache_pause(void) 439 { 440 441 } 442 443 void 444 uvm_pgflcache_resume(void) 445 { 446 447 } 448 449 void 450 uvm_pgflcache_start(void) 451 { 452 453 } 454 455 void 456 uvm_pgflcache_init(void) 457 { 458 459 } 460 461 #endif /* MULTIPROCESSOR */ 462