1 /* $NetBSD: subr_percpu.c,v 1.24 2020/02/07 11:55:22 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c)2007,2008 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * per-cpu storage. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.24 2020/02/07 11:55:22 thorpej Exp $"); 35 36 #include <sys/param.h> 37 #include <sys/cpu.h> 38 #include <sys/kernel.h> 39 #include <sys/kmem.h> 40 #include <sys/mutex.h> 41 #include <sys/percpu.h> 42 #include <sys/rwlock.h> 43 #include <sys/vmem.h> 44 #include <sys/xcall.h> 45 46 #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1) 47 #define PERCPU_QCACHE_MAX 0 48 #define PERCPU_IMPORT_SIZE 2048 49 50 struct percpu { 51 unsigned pc_offset; 52 size_t pc_size; 53 percpu_callback_t pc_dtor; 54 void *pc_cookie; 55 }; 56 57 static krwlock_t percpu_swap_lock __cacheline_aligned; 58 static vmem_t * percpu_offset_arena __read_mostly; 59 static struct { 60 kmutex_t lock; 61 unsigned int nextoff; 62 } percpu_allocation __cacheline_aligned; 63 64 static percpu_cpu_t * 65 cpu_percpu(struct cpu_info *ci) 66 { 67 68 return &ci->ci_data.cpu_percpu; 69 } 70 71 static unsigned int 72 percpu_offset(percpu_t *pc) 73 { 74 const unsigned int off = pc->pc_offset; 75 76 KASSERT(off < percpu_allocation.nextoff); 77 return off; 78 } 79 80 /* 81 * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge 82 */ 83 __noubsan 84 static void 85 percpu_cpu_swap(void *p1, void *p2) 86 { 87 struct cpu_info * const ci = p1; 88 percpu_cpu_t * const newpcc = p2; 89 percpu_cpu_t * const pcc = cpu_percpu(ci); 90 91 KASSERT(ci == curcpu() || !mp_online); 92 93 /* 94 * swap *pcc and *newpcc unless anyone has beaten us. 95 */ 96 rw_enter(&percpu_swap_lock, RW_WRITER); 97 if (newpcc->pcc_size > pcc->pcc_size) { 98 percpu_cpu_t tmp; 99 int s; 100 101 tmp = *pcc; 102 103 /* 104 * block interrupts so that we don't lose their modifications. 105 */ 106 107 s = splhigh(); 108 109 /* 110 * copy data to new storage. 111 */ 112 113 memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size); 114 115 /* 116 * this assignment needs to be atomic for percpu_getptr_remote. 117 */ 118 119 pcc->pcc_data = newpcc->pcc_data; 120 121 splx(s); 122 123 pcc->pcc_size = newpcc->pcc_size; 124 *newpcc = tmp; 125 } 126 rw_exit(&percpu_swap_lock); 127 } 128 129 /* 130 * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space 131 */ 132 133 static void 134 percpu_cpu_enlarge(size_t size) 135 { 136 CPU_INFO_ITERATOR cii; 137 struct cpu_info *ci; 138 139 for (CPU_INFO_FOREACH(cii, ci)) { 140 percpu_cpu_t pcc; 141 142 pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ 143 pcc.pcc_size = size; 144 if (!mp_online) { 145 percpu_cpu_swap(ci, &pcc); 146 } else { 147 uint64_t where; 148 149 where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); 150 xc_wait(where); 151 } 152 KASSERT(pcc.pcc_size <= size); 153 if (pcc.pcc_data != NULL) { 154 kmem_free(pcc.pcc_data, pcc.pcc_size); 155 } 156 } 157 } 158 159 /* 160 * percpu_backend_alloc: vmem import callback for percpu_offset_arena 161 */ 162 163 static int 164 percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize, 165 vm_flag_t vmflags, vmem_addr_t *addrp) 166 { 167 unsigned int offset; 168 unsigned int nextoff; 169 170 ASSERT_SLEEPABLE(); 171 KASSERT(dummy == NULL); 172 173 if ((vmflags & VM_NOSLEEP) != 0) 174 return ENOMEM; 175 176 size = roundup(size, PERCPU_IMPORT_SIZE); 177 mutex_enter(&percpu_allocation.lock); 178 offset = percpu_allocation.nextoff; 179 percpu_allocation.nextoff = nextoff = percpu_allocation.nextoff + size; 180 mutex_exit(&percpu_allocation.lock); 181 182 percpu_cpu_enlarge(nextoff); 183 184 *resultsize = size; 185 *addrp = (vmem_addr_t)offset; 186 return 0; 187 } 188 189 static void 190 percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci) 191 { 192 size_t sz = (uintptr_t)vp2; 193 194 memset(vp, 0, sz); 195 } 196 197 /* 198 * percpu_zero: initialize percpu storage with zero. 199 */ 200 201 static void 202 percpu_zero(percpu_t *pc, size_t sz) 203 { 204 205 percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz); 206 } 207 208 /* 209 * percpu_init: subsystem initialization 210 */ 211 212 void 213 percpu_init(void) 214 { 215 216 ASSERT_SLEEPABLE(); 217 rw_init(&percpu_swap_lock); 218 mutex_init(&percpu_allocation.lock, MUTEX_DEFAULT, IPL_NONE); 219 percpu_allocation.nextoff = PERCPU_QUANTUM_SIZE; 220 221 percpu_offset_arena = vmem_xcreate("percpu", 0, 0, PERCPU_QUANTUM_SIZE, 222 percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP, 223 IPL_NONE); 224 } 225 226 /* 227 * percpu_init_cpu: cpu initialization 228 * 229 * => should be called before the cpu appears on the list for CPU_INFO_FOREACH. 230 */ 231 232 void 233 percpu_init_cpu(struct cpu_info *ci) 234 { 235 percpu_cpu_t * const pcc = cpu_percpu(ci); 236 size_t size = percpu_allocation.nextoff; /* XXX racy */ 237 238 ASSERT_SLEEPABLE(); 239 pcc->pcc_size = size; 240 if (size) { 241 pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP); 242 } 243 } 244 245 /* 246 * percpu_alloc: allocate percpu storage 247 * 248 * => called in thread context. 249 * => considered as an expensive and rare operation. 250 * => allocated storage is initialized with zeros. 251 */ 252 253 percpu_t * 254 percpu_alloc(size_t size) 255 { 256 257 return percpu_create(size, NULL, NULL, NULL); 258 } 259 260 /* 261 * percpu_create: allocate percpu storage and associate ctor/dtor with it 262 * 263 * => called in thread context. 264 * => considered as an expensive and rare operation. 265 * => allocated storage is initialized by ctor, or zeros if ctor is null 266 * => percpu_free will call dtor first, if dtor is nonnull 267 * => ctor or dtor may sleep, even on allocation 268 */ 269 270 percpu_t * 271 percpu_create(size_t size, percpu_callback_t ctor, percpu_callback_t dtor, 272 void *cookie) 273 { 274 vmem_addr_t offset; 275 percpu_t *pc; 276 277 ASSERT_SLEEPABLE(); 278 (void)vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT, 279 &offset); 280 281 pc = kmem_alloc(sizeof(*pc), KM_SLEEP); 282 pc->pc_offset = offset; 283 pc->pc_size = size; 284 pc->pc_dtor = dtor; 285 pc->pc_cookie = cookie; 286 287 if (ctor) { 288 CPU_INFO_ITERATOR cii; 289 struct cpu_info *ci; 290 void *buf; 291 292 buf = kmem_alloc(size, KM_SLEEP); 293 for (CPU_INFO_FOREACH(cii, ci)) { 294 memset(buf, 0, size); 295 (*ctor)(buf, cookie, ci); 296 percpu_traverse_enter(); 297 memcpy(percpu_getptr_remote(pc, ci), buf, size); 298 percpu_traverse_exit(); 299 } 300 explicit_memset(buf, 0, size); 301 kmem_free(buf, size); 302 } else { 303 percpu_zero(pc, size); 304 } 305 306 return pc; 307 } 308 309 /* 310 * percpu_free: free percpu storage 311 * 312 * => called in thread context. 313 * => considered as an expensive and rare operation. 314 */ 315 316 void 317 percpu_free(percpu_t *pc, size_t size) 318 { 319 320 ASSERT_SLEEPABLE(); 321 KASSERT(size == pc->pc_size); 322 323 if (pc->pc_dtor) { 324 CPU_INFO_ITERATOR cii; 325 struct cpu_info *ci; 326 void *buf; 327 328 buf = kmem_alloc(size, KM_SLEEP); 329 for (CPU_INFO_FOREACH(cii, ci)) { 330 percpu_traverse_enter(); 331 memcpy(buf, percpu_getptr_remote(pc, ci), size); 332 explicit_memset(percpu_getptr_remote(pc, ci), 0, size); 333 percpu_traverse_exit(); 334 (*pc->pc_dtor)(buf, pc->pc_cookie, ci); 335 } 336 explicit_memset(buf, 0, size); 337 kmem_free(buf, size); 338 } 339 340 vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size); 341 kmem_free(pc, sizeof(*pc)); 342 } 343 344 /* 345 * percpu_getref: 346 * 347 * => safe to be used in either thread or interrupt context 348 * => disables preemption; must be bracketed with a percpu_putref() 349 */ 350 351 void * 352 percpu_getref(percpu_t *pc) 353 { 354 355 kpreempt_disable(); 356 return percpu_getptr_remote(pc, curcpu()); 357 } 358 359 /* 360 * percpu_putref: 361 * 362 * => drops the preemption-disabled count after caller is done with per-cpu 363 * data 364 */ 365 366 void 367 percpu_putref(percpu_t *pc) 368 { 369 370 kpreempt_enable(); 371 } 372 373 /* 374 * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote: 375 * helpers to access remote cpu's percpu data. 376 * 377 * => called in thread context. 378 * => percpu_traverse_enter can block low-priority xcalls. 379 * => typical usage would be: 380 * 381 * sum = 0; 382 * percpu_traverse_enter(); 383 * for (CPU_INFO_FOREACH(cii, ci)) { 384 * unsigned int *p = percpu_getptr_remote(pc, ci); 385 * sum += *p; 386 * } 387 * percpu_traverse_exit(); 388 */ 389 390 void 391 percpu_traverse_enter(void) 392 { 393 394 ASSERT_SLEEPABLE(); 395 rw_enter(&percpu_swap_lock, RW_READER); 396 } 397 398 void 399 percpu_traverse_exit(void) 400 { 401 402 rw_exit(&percpu_swap_lock); 403 } 404 405 void * 406 percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci) 407 { 408 409 return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)]; 410 } 411 412 /* 413 * percpu_foreach: call the specified callback function for each cpus. 414 * 415 * => must be called from thread context. 416 * => callback executes on **current** CPU (or, really, arbitrary CPU, 417 * in case of preemption) 418 * => caller should not rely on the cpu iteration order. 419 * => the callback function should be minimum because it is executed with 420 * holding a global lock, which can block low-priority xcalls. 421 * eg. it's illegal for a callback function to sleep for memory allocation. 422 */ 423 void 424 percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) 425 { 426 CPU_INFO_ITERATOR cii; 427 struct cpu_info *ci; 428 429 percpu_traverse_enter(); 430 for (CPU_INFO_FOREACH(cii, ci)) { 431 (*cb)(percpu_getptr_remote(pc, ci), arg, ci); 432 } 433 percpu_traverse_exit(); 434 } 435 436 struct percpu_xcall_ctx { 437 percpu_callback_t ctx_cb; 438 void *ctx_arg; 439 }; 440 441 static void 442 percpu_xcfunc(void * const v1, void * const v2) 443 { 444 percpu_t * const pc = v1; 445 struct percpu_xcall_ctx * const ctx = v2; 446 447 (*ctx->ctx_cb)(percpu_getref(pc), ctx->ctx_arg, curcpu()); 448 percpu_putref(pc); 449 } 450 451 /* 452 * percpu_foreach_xcall: call the specified callback function for each 453 * cpu. This version uses an xcall to run the callback on each cpu. 454 * 455 * => must be called from thread context. 456 * => callback executes on **remote** CPU in soft-interrupt context 457 * (at the specified soft interrupt priority). 458 * => caller should not rely on the cpu iteration order. 459 * => the callback function should be minimum because it may be 460 * executed in soft-interrupt context. eg. it's illegal for 461 * a callback function to sleep for memory allocation. 462 */ 463 void 464 percpu_foreach_xcall(percpu_t *pc, u_int xcflags, percpu_callback_t cb, 465 void *arg) 466 { 467 struct percpu_xcall_ctx ctx = { 468 .ctx_cb = cb, 469 .ctx_arg = arg, 470 }; 471 CPU_INFO_ITERATOR cii; 472 struct cpu_info *ci; 473 474 for (CPU_INFO_FOREACH(cii, ci)) { 475 xc_wait(xc_unicast(xcflags, percpu_xcfunc, pc, &ctx, ci)); 476 } 477 } 478