1 /* $NetBSD: uvm_pgflcache.c,v 1.5 2020/06/14 21:41:42 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * uvm_pgflcache.c: page freelist cache. 34 * 35 * This implements a tiny per-CPU cache of pages that sits between the main 36 * page allocator and the freelists. By allocating and freeing pages in 37 * batch, it reduces freelist contention by an order of magnitude. 38 * 39 * The cache can be paused & resumed at runtime so that UVM_HOTPLUG, 40 * uvm_pglistalloc() and uvm_page_redim() can have a consistent view of the 41 * world. On system with one CPU per physical package (e.g. a uniprocessor) 42 * the cache is not enabled. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: uvm_pgflcache.c,v 1.5 2020/06/14 21:41:42 ad Exp $"); 47 48 #include "opt_uvm.h" 49 #include "opt_multiprocessor.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/sched.h> 54 #include <sys/kernel.h> 55 #include <sys/vnode.h> 56 #include <sys/proc.h> 57 #include <sys/atomic.h> 58 #include <sys/cpu.h> 59 #include <sys/xcall.h> 60 61 #include <uvm/uvm.h> 62 #include <uvm/uvm_pglist.h> 63 #include <uvm/uvm_pgflcache.h> 64 65 /* There is no point doing any of this on a uniprocessor. */ 66 #ifdef MULTIPROCESSOR 67 68 /* 69 * MAXPGS - maximum pages per color, per bucket. 70 * FILLPGS - number of pages to allocate at once, per color, per bucket. 71 * 72 * Why the chosen values: 73 * 74 * (1) In 2019, an average Intel system has 4kB pages and 8x L2 cache 75 * colors. We make the assumption that most of the time allocation activity 76 * will be centered around one UVM freelist, so most of the time there will 77 * be no more than 224kB worth of cached pages per-CPU. That's tiny, but 78 * enough to hugely reduce contention on the freelist locks, and give us a 79 * small pool of pages which if we're very lucky may have some L1/L2 cache 80 * locality, and do so without subtracting too much from the L2/L3 cache 81 * benefits of having per-package free lists in the page allocator. 82 * 83 * (2) With the chosen values on _LP64, the data structure for each color 84 * takes up a single cache line (64 bytes) giving this very low overhead 85 * even in the "miss" case. 86 * 87 * (3) We don't want to cause too much pressure by hiding away memory that 88 * could otherwise be put to good use. 89 */ 90 #define MAXPGS 7 91 #define FILLPGS 6 92 93 /* Variable size, according to # colors. */ 94 struct pgflcache { 95 struct pccolor { 96 intptr_t count; 97 struct vm_page *pages[MAXPGS]; 98 } color[1]; 99 }; 100 101 static kmutex_t uvm_pgflcache_lock; 102 static int uvm_pgflcache_sem; 103 104 /* 105 * uvm_pgflcache_fill: fill specified freelist/color from global list 106 * 107 * => must be called at IPL_VM 108 * => must be called with given bucket lock held 109 * => must only fill from the correct bucket for this CPU 110 */ 111 112 void 113 uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c) 114 { 115 struct pgflbucket *pgb; 116 struct pgflcache *pc; 117 struct pccolor *pcc; 118 struct pgflist *head; 119 struct vm_page *pg; 120 int count; 121 122 KASSERT(mutex_owned(&uvm_freelist_locks[b].lock)); 123 KASSERT(ucpu->pgflbucket == b); 124 125 /* If caching is off, then bail out. */ 126 if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 127 return; 128 } 129 130 /* Fill only to the limit. */ 131 pcc = &pc->color[c]; 132 pgb = uvm.page_free[fl].pgfl_buckets[b]; 133 head = &pgb->pgb_colors[c]; 134 if (pcc->count >= FILLPGS) { 135 return; 136 } 137 138 /* Pull pages from the bucket until it's empty, or we are full. */ 139 count = pcc->count; 140 pg = LIST_FIRST(head); 141 while (__predict_true(pg != NULL && count < FILLPGS)) { 142 KASSERT(pg->flags & PG_FREE); 143 KASSERT(uvm_page_get_bucket(pg) == b); 144 pcc->pages[count++] = pg; 145 pg = LIST_NEXT(pg, pageq.list); 146 } 147 148 /* Violate LIST abstraction to remove all pages at once. */ 149 head->lh_first = pg; 150 if (__predict_true(pg != NULL)) { 151 pg->pageq.list.le_prev = &head->lh_first; 152 } 153 pgb->pgb_nfree -= (count - pcc->count); 154 pcc->count = count; 155 } 156 157 /* 158 * uvm_pgflcache_spill: spill specified freelist/color to global list 159 * 160 * => must be called at IPL_VM 161 * => mark __noinline so we don't pull it into uvm_pgflcache_free() 162 */ 163 164 static void __noinline 165 uvm_pgflcache_spill(struct uvm_cpu *ucpu, int fl, int c) 166 { 167 struct pgflbucket *pgb; 168 struct pgfreelist *pgfl; 169 struct pgflcache *pc; 170 struct pccolor *pcc; 171 struct pgflist *head; 172 kmutex_t *lock; 173 int b, adj; 174 175 pc = ucpu->pgflcache[fl]; 176 pcc = &pc->color[c]; 177 pgfl = &uvm.page_free[fl]; 178 b = ucpu->pgflbucket; 179 pgb = pgfl->pgfl_buckets[b]; 180 head = &pgb->pgb_colors[c]; 181 lock = &uvm_freelist_locks[b].lock; 182 183 mutex_spin_enter(lock); 184 for (adj = pcc->count; pcc->count != 0;) { 185 pcc->count--; 186 KASSERT(pcc->pages[pcc->count] != NULL); 187 KASSERT(pcc->pages[pcc->count]->flags & PG_FREE); 188 LIST_INSERT_HEAD(head, pcc->pages[pcc->count], pageq.list); 189 } 190 pgb->pgb_nfree += adj; 191 mutex_spin_exit(lock); 192 } 193 194 /* 195 * uvm_pgflcache_alloc: try to allocate a cached page. 196 * 197 * => must be called at IPL_VM 198 * => allocate only from the given freelist and given page color 199 */ 200 201 struct vm_page * 202 uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c) 203 { 204 struct pgflcache *pc; 205 struct pccolor *pcc; 206 struct vm_page *pg; 207 208 /* If caching is off, then bail out. */ 209 if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 210 return NULL; 211 } 212 213 /* Very simple: if we have a page then return it. */ 214 pcc = &pc->color[c]; 215 if (__predict_false(pcc->count == 0)) { 216 return NULL; 217 } 218 pg = pcc->pages[--(pcc->count)]; 219 KASSERT(pg != NULL); 220 KASSERT(pg->flags == PG_FREE); 221 KASSERT(uvm_page_get_freelist(pg) == fl); 222 KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket); 223 pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 224 return pg; 225 } 226 227 /* 228 * uvm_pgflcache_free: cache a page, if possible. 229 * 230 * => must be called at IPL_VM 231 * => must only send pages for the correct bucket for this CPU 232 */ 233 234 bool 235 uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg) 236 { 237 struct pgflcache *pc; 238 struct pccolor *pcc; 239 int fl, c; 240 241 KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket); 242 243 /* If caching is off, then bail out. */ 244 fl = uvm_page_get_freelist(pg); 245 if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 246 return false; 247 } 248 249 /* If the array is full spill it first, then add page to array. */ 250 c = VM_PGCOLOR(pg); 251 pcc = &pc->color[c]; 252 KASSERT((pg->flags & PG_FREE) == 0); 253 if (__predict_false(pcc->count == MAXPGS)) { 254 uvm_pgflcache_spill(ucpu, fl, c); 255 } 256 pg->flags = PG_FREE; 257 pcc->pages[pcc->count] = pg; 258 pcc->count++; 259 return true; 260 } 261 262 /* 263 * uvm_pgflcache_init: allocate and initialize per-CPU data structures for 264 * the free page cache. Don't set anything in motion - that's taken care 265 * of by uvm_pgflcache_resume(). 266 */ 267 268 static void 269 uvm_pgflcache_init_cpu(struct cpu_info *ci) 270 { 271 struct uvm_cpu *ucpu; 272 size_t sz; 273 274 ucpu = ci->ci_data.cpu_uvm; 275 KASSERT(ucpu->pgflcachemem == NULL); 276 KASSERT(ucpu->pgflcache[0] == NULL); 277 278 sz = offsetof(struct pgflcache, color[uvmexp.ncolors]); 279 ucpu->pgflcachememsz = 280 (roundup2(sz * VM_NFREELIST, coherency_unit) + coherency_unit - 1); 281 ucpu->pgflcachemem = kmem_zalloc(ucpu->pgflcachememsz, KM_SLEEP); 282 } 283 284 /* 285 * uvm_pgflcache_fini_cpu: dump all cached pages back to global free list 286 * and shut down caching on the CPU. Called on each CPU in the system via 287 * xcall. 288 */ 289 290 static void 291 uvm_pgflcache_fini_cpu(void *arg1 __unused, void *arg2 __unused) 292 { 293 struct uvm_cpu *ucpu; 294 int fl, color, s; 295 296 ucpu = curcpu()->ci_data.cpu_uvm; 297 for (fl = 0; fl < VM_NFREELIST; fl++) { 298 s = splvm(); 299 for (color = 0; color < uvmexp.ncolors; color++) { 300 uvm_pgflcache_spill(ucpu, fl, color); 301 } 302 ucpu->pgflcache[fl] = NULL; 303 splx(s); 304 } 305 } 306 307 /* 308 * uvm_pgflcache_pause: pause operation of the caches 309 */ 310 311 void 312 uvm_pgflcache_pause(void) 313 { 314 uint64_t where; 315 316 /* First one in starts draining. Everyone else waits. */ 317 mutex_enter(&uvm_pgflcache_lock); 318 if (uvm_pgflcache_sem++ == 0) { 319 where = xc_broadcast(XC_HIGHPRI, uvm_pgflcache_fini_cpu, 320 (void *)1, NULL); 321 xc_wait(where); 322 } 323 mutex_exit(&uvm_pgflcache_lock); 324 } 325 326 /* 327 * uvm_pgflcache_resume: resume operation of the caches 328 */ 329 330 void 331 uvm_pgflcache_resume(void) 332 { 333 CPU_INFO_ITERATOR cii; 334 struct cpu_info *ci; 335 struct uvm_cpu *ucpu; 336 uintptr_t addr; 337 size_t sz; 338 int fl; 339 340 /* Last guy out takes care of business. */ 341 mutex_enter(&uvm_pgflcache_lock); 342 KASSERT(uvm_pgflcache_sem > 0); 343 if (uvm_pgflcache_sem-- > 1) { 344 mutex_exit(&uvm_pgflcache_lock); 345 return; 346 } 347 348 /* 349 * Make sure dependant data structure updates are remotely visible. 350 * Essentially this functions as a global memory barrier. 351 */ 352 xc_barrier(XC_HIGHPRI); 353 354 /* 355 * Then set all of the pointers in place on each CPU. As soon as 356 * each pointer is set, caching is operational in that dimension. 357 */ 358 sz = offsetof(struct pgflcache, color[uvmexp.ncolors]); 359 for (CPU_INFO_FOREACH(cii, ci)) { 360 ucpu = ci->ci_data.cpu_uvm; 361 addr = roundup2((uintptr_t)ucpu->pgflcachemem, coherency_unit); 362 for (fl = 0; fl < VM_NFREELIST; fl++) { 363 ucpu->pgflcache[fl] = (struct pgflcache *)addr; 364 addr += sz; 365 } 366 } 367 mutex_exit(&uvm_pgflcache_lock); 368 } 369 370 /* 371 * uvm_pgflcache_start: start operation of the cache. 372 * 373 * => called once only, when init(8) is about to be started 374 */ 375 376 void 377 uvm_pgflcache_start(void) 378 { 379 CPU_INFO_ITERATOR cii; 380 struct cpu_info *ci; 381 382 KASSERT(uvm_pgflcache_sem > 0); 383 384 /* 385 * There's not much point doing this if every CPU has its own 386 * bucket (and that includes the uniprocessor case). 387 */ 388 if (ncpu == uvm.bucketcount) { 389 return; 390 } 391 392 /* Create data structures for each CPU. */ 393 for (CPU_INFO_FOREACH(cii, ci)) { 394 uvm_pgflcache_init_cpu(ci); 395 } 396 397 /* Kick it into action. */ 398 uvm_pgflcache_resume(); 399 } 400 401 /* 402 * uvm_pgflcache_init: set up data structures for the free page cache. 403 */ 404 405 void 406 uvm_pgflcache_init(void) 407 { 408 409 uvm_pgflcache_sem = 1; 410 mutex_init(&uvm_pgflcache_lock, MUTEX_DEFAULT, IPL_NONE); 411 } 412 413 #else /* MULTIPROCESSOR */ 414 415 struct vm_page * 416 uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c) 417 { 418 419 return NULL; 420 } 421 422 bool 423 uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg) 424 { 425 426 return false; 427 } 428 429 void 430 uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c) 431 { 432 433 } 434 435 void 436 uvm_pgflcache_pause(void) 437 { 438 439 } 440 441 void 442 uvm_pgflcache_resume(void) 443 { 444 445 } 446 447 void 448 uvm_pgflcache_start(void) 449 { 450 451 } 452 453 void 454 uvm_pgflcache_init(void) 455 { 456 457 } 458 459 #endif /* MULTIPROCESSOR */ 460