1 /* $NetBSD: subr_percpu.c,v 1.13 2011/05/13 22:16:44 rmind Exp $ */ 2 3 /*- 4 * Copyright (c)2007,2008 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * per-cpu storage. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.13 2011/05/13 22:16:44 rmind Exp $"); 35 36 #include <sys/param.h> 37 #include <sys/cpu.h> 38 #include <sys/kmem.h> 39 #include <sys/kernel.h> 40 #include <sys/mutex.h> 41 #include <sys/percpu.h> 42 #include <sys/rwlock.h> 43 #include <sys/vmem.h> 44 #include <sys/xcall.h> 45 46 #include <uvm/uvm_extern.h> 47 48 #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1) 49 #define PERCPU_QCACHE_MAX 0 50 #define PERCPU_IMPORT_SIZE 2048 51 52 #if defined(DIAGNOSTIC) 53 #define MAGIC 0x50435055 /* "PCPU" */ 54 #define percpu_encrypt(pc) ((pc) ^ MAGIC) 55 #define percpu_decrypt(pc) ((pc) ^ MAGIC) 56 #else /* defined(DIAGNOSTIC) */ 57 #define percpu_encrypt(pc) (pc) 58 #define percpu_decrypt(pc) (pc) 59 #endif /* defined(DIAGNOSTIC) */ 60 61 static krwlock_t percpu_swap_lock __cacheline_aligned; 62 static kmutex_t percpu_allocation_lock __cacheline_aligned; 63 static vmem_t * percpu_offset_arena __cacheline_aligned; 64 static unsigned int percpu_nextoff __cacheline_aligned; 65 66 static percpu_cpu_t * 67 cpu_percpu(struct cpu_info *ci) 68 { 69 70 return &ci->ci_data.cpu_percpu; 71 } 72 73 static unsigned int 74 percpu_offset(percpu_t *pc) 75 { 76 const unsigned int off = percpu_decrypt((uintptr_t)pc); 77 78 KASSERT(off < percpu_nextoff); 79 return off; 80 } 81 82 /* 83 * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge 84 */ 85 86 static void 87 percpu_cpu_swap(void *p1, void *p2) 88 { 89 struct cpu_info * const ci = p1; 90 percpu_cpu_t * const newpcc = p2; 91 percpu_cpu_t * const pcc = cpu_percpu(ci); 92 93 KASSERT(ci == curcpu() || !mp_online); 94 95 /* 96 * swap *pcc and *newpcc unless anyone has beaten us. 97 */ 98 rw_enter(&percpu_swap_lock, RW_WRITER); 99 if (newpcc->pcc_size > pcc->pcc_size) { 100 percpu_cpu_t tmp; 101 int s; 102 103 tmp = *pcc; 104 105 /* 106 * block interrupts so that we don't lose their modifications. 107 */ 108 109 s = splhigh(); 110 111 /* 112 * copy data to new storage. 113 */ 114 115 memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size); 116 117 /* 118 * this assignment needs to be atomic for percpu_getptr_remote. 119 */ 120 121 pcc->pcc_data = newpcc->pcc_data; 122 123 splx(s); 124 125 pcc->pcc_size = newpcc->pcc_size; 126 *newpcc = tmp; 127 } 128 rw_exit(&percpu_swap_lock); 129 } 130 131 /* 132 * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space 133 */ 134 135 static void 136 percpu_cpu_enlarge(size_t size) 137 { 138 CPU_INFO_ITERATOR cii; 139 struct cpu_info *ci; 140 141 for (CPU_INFO_FOREACH(cii, ci)) { 142 percpu_cpu_t pcc; 143 144 pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ 145 pcc.pcc_size = size; 146 if (!mp_online) { 147 percpu_cpu_swap(ci, &pcc); 148 } else { 149 uint64_t where; 150 151 where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); 152 xc_wait(where); 153 } 154 KASSERT(pcc.pcc_size < size); 155 if (pcc.pcc_data != NULL) { 156 kmem_free(pcc.pcc_data, pcc.pcc_size); 157 } 158 } 159 } 160 161 /* 162 * percpu_backend_alloc: vmem import callback for percpu_offset_arena 163 */ 164 165 static vmem_addr_t 166 percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize, 167 vm_flag_t vmflags) 168 { 169 unsigned int offset; 170 unsigned int nextoff; 171 172 ASSERT_SLEEPABLE(); 173 KASSERT(dummy == NULL); 174 175 if ((vmflags & VM_NOSLEEP) != 0) 176 return VMEM_ADDR_NULL; 177 178 size = roundup(size, PERCPU_IMPORT_SIZE); 179 mutex_enter(&percpu_allocation_lock); 180 offset = percpu_nextoff; 181 percpu_nextoff = nextoff = percpu_nextoff + size; 182 mutex_exit(&percpu_allocation_lock); 183 184 percpu_cpu_enlarge(nextoff); 185 186 *resultsize = size; 187 return (vmem_addr_t)offset; 188 } 189 190 static void 191 percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci) 192 { 193 size_t sz = (uintptr_t)vp2; 194 195 memset(vp, 0, sz); 196 } 197 198 /* 199 * percpu_zero: initialize percpu storage with zero. 200 */ 201 202 static void 203 percpu_zero(percpu_t *pc, size_t sz) 204 { 205 206 percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz); 207 } 208 209 /* 210 * percpu_init: subsystem initialization 211 */ 212 213 void 214 percpu_init(void) 215 { 216 217 ASSERT_SLEEPABLE(); 218 rw_init(&percpu_swap_lock); 219 mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE); 220 percpu_nextoff = PERCPU_QUANTUM_SIZE; 221 222 percpu_offset_arena = vmem_create("percpu", 0, 0, PERCPU_QUANTUM_SIZE, 223 percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP, 224 IPL_NONE); 225 } 226 227 /* 228 * percpu_init_cpu: cpu initialization 229 * 230 * => should be called before the cpu appears on the list for CPU_INFO_FOREACH. 231 */ 232 233 void 234 percpu_init_cpu(struct cpu_info *ci) 235 { 236 percpu_cpu_t * const pcc = cpu_percpu(ci); 237 size_t size = percpu_nextoff; /* XXX racy */ 238 239 ASSERT_SLEEPABLE(); 240 pcc->pcc_size = size; 241 if (size) { 242 pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP); 243 } 244 } 245 246 /* 247 * percpu_alloc: allocate percpu storage 248 * 249 * => called in thread context. 250 * => considered as an expensive and rare operation. 251 * => allocated storage is initialized with zeros. 252 */ 253 254 percpu_t * 255 percpu_alloc(size_t size) 256 { 257 unsigned int offset; 258 percpu_t *pc; 259 260 ASSERT_SLEEPABLE(); 261 offset = vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT); 262 pc = (percpu_t *)percpu_encrypt((uintptr_t)offset); 263 percpu_zero(pc, size); 264 return pc; 265 } 266 267 /* 268 * percpu_free: free percpu storage 269 * 270 * => called in thread context. 271 * => considered as an expensive and rare operation. 272 */ 273 274 void 275 percpu_free(percpu_t *pc, size_t size) 276 { 277 278 ASSERT_SLEEPABLE(); 279 vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size); 280 } 281 282 /* 283 * percpu_getref: 284 * 285 * => safe to be used in either thread or interrupt context 286 * => disables preemption; must be bracketed with a percpu_putref() 287 */ 288 289 void * 290 percpu_getref(percpu_t *pc) 291 { 292 293 KPREEMPT_DISABLE(curlwp); 294 return percpu_getptr_remote(pc, curcpu()); 295 } 296 297 /* 298 * percpu_putref: 299 * 300 * => drops the preemption-disabled count after caller is done with per-cpu 301 * data 302 */ 303 304 void 305 percpu_putref(percpu_t *pc) 306 { 307 308 KPREEMPT_ENABLE(curlwp); 309 } 310 311 /* 312 * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote: 313 * helpers to access remote cpu's percpu data. 314 * 315 * => called in thread context. 316 * => percpu_traverse_enter can block low-priority xcalls. 317 * => typical usage would be: 318 * 319 * sum = 0; 320 * percpu_traverse_enter(); 321 * for (CPU_INFO_FOREACH(cii, ci)) { 322 * unsigned int *p = percpu_getptr_remote(pc, ci); 323 * sum += *p; 324 * } 325 * percpu_traverse_exit(); 326 */ 327 328 void 329 percpu_traverse_enter(void) 330 { 331 332 ASSERT_SLEEPABLE(); 333 rw_enter(&percpu_swap_lock, RW_READER); 334 } 335 336 void 337 percpu_traverse_exit(void) 338 { 339 340 rw_exit(&percpu_swap_lock); 341 } 342 343 void * 344 percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci) 345 { 346 347 return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)]; 348 } 349 350 /* 351 * percpu_foreach: call the specified callback function for each cpus. 352 * 353 * => called in thread context. 354 * => caller should not rely on the cpu iteration order. 355 * => the callback function should be minimum because it is executed with 356 * holding a global lock, which can block low-priority xcalls. 357 * eg. it's illegal for a callback function to sleep for memory allocation. 358 */ 359 void 360 percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) 361 { 362 CPU_INFO_ITERATOR cii; 363 struct cpu_info *ci; 364 365 percpu_traverse_enter(); 366 for (CPU_INFO_FOREACH(cii, ci)) { 367 (*cb)(percpu_getptr_remote(pc, ci), arg, ci); 368 } 369 percpu_traverse_exit(); 370 } 371