1 /* $NetBSD: subr_percpu.c,v 1.7 2008/04/28 15:36:01 ad Exp $ */ 2 3 /*- 4 * Copyright (c)2007,2008 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * per-cpu storage. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.7 2008/04/28 15:36:01 ad Exp $"); 35 36 #include <sys/param.h> 37 #include <sys/cpu.h> 38 #include <sys/kmem.h> 39 #include <sys/kernel.h> 40 #include <sys/mutex.h> 41 #include <sys/percpu.h> 42 #include <sys/rwlock.h> 43 #include <sys/vmem.h> 44 #include <sys/xcall.h> 45 46 #include <uvm/uvm_extern.h> 47 48 static krwlock_t percpu_swap_lock; 49 static kmutex_t percpu_allocation_lock; 50 static vmem_t *percpu_offset_arena; 51 static unsigned int percpu_nextoff; 52 53 #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1) 54 #define PERCPU_QCACHE_MAX 0 55 #define PERCPU_IMPORT_SIZE 2048 56 57 static percpu_cpu_t * 58 cpu_percpu(struct cpu_info *ci) 59 { 60 61 return &ci->ci_data.cpu_percpu; 62 } 63 64 static unsigned int 65 percpu_offset(percpu_t *pc) 66 { 67 68 return (uintptr_t)pc; 69 } 70 71 /* 72 * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge 73 */ 74 75 static void 76 percpu_cpu_swap(void *p1, void *p2) 77 { 78 struct cpu_info * const ci = p1; 79 percpu_cpu_t * const newpcc = p2; 80 percpu_cpu_t * const pcc = cpu_percpu(ci); 81 82 /* 83 * swap *pcc and *newpcc unless anyone has beaten us. 84 */ 85 86 rw_enter(&percpu_swap_lock, RW_WRITER); 87 if (newpcc->pcc_size > pcc->pcc_size) { 88 percpu_cpu_t tmp; 89 int s; 90 91 tmp = *pcc; 92 93 /* 94 * block interrupts so that we don't lose their modifications. 95 */ 96 97 s = splhigh(); 98 99 /* 100 * copy data to new storage. 101 */ 102 103 memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size); 104 105 /* 106 * this assignment needs to be atomic for percpu_getptr_remote. 107 */ 108 109 pcc->pcc_data = newpcc->pcc_data; 110 111 splx(s); 112 113 pcc->pcc_size = newpcc->pcc_size; 114 *newpcc = tmp; 115 } 116 rw_exit(&percpu_swap_lock); 117 } 118 119 /* 120 * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space 121 */ 122 123 static void 124 percpu_cpu_enlarge(size_t size) 125 { 126 CPU_INFO_ITERATOR cii; 127 struct cpu_info *ci; 128 129 for (CPU_INFO_FOREACH(cii, ci)) { 130 percpu_cpu_t pcc; 131 132 pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ 133 pcc.pcc_size = size; 134 if (!mp_online) { 135 percpu_cpu_swap(ci, &pcc); 136 } else { 137 uint64_t where; 138 139 uvm_lwp_hold(curlwp); /* don't swap out pcc */ 140 where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); 141 xc_wait(where); 142 uvm_lwp_rele(curlwp); 143 } 144 KASSERT(pcc.pcc_size < size); 145 if (pcc.pcc_data != NULL) { 146 kmem_free(pcc.pcc_data, pcc.pcc_size); 147 } 148 } 149 } 150 151 /* 152 * percpu_backend_alloc: vmem import callback for percpu_offset_arena 153 */ 154 155 static vmem_addr_t 156 percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize, 157 vm_flag_t vmflags) 158 { 159 unsigned int offset; 160 unsigned int nextoff; 161 162 ASSERT_SLEEPABLE(); 163 KASSERT(dummy == NULL); 164 165 if ((vmflags & VM_NOSLEEP) != 0) 166 return VMEM_ADDR_NULL; 167 168 size = roundup(size, PERCPU_IMPORT_SIZE); 169 mutex_enter(&percpu_allocation_lock); 170 offset = percpu_nextoff; 171 percpu_nextoff = nextoff = percpu_nextoff + size; 172 mutex_exit(&percpu_allocation_lock); 173 174 percpu_cpu_enlarge(nextoff); 175 176 *resultsize = size; 177 return (vmem_addr_t)offset; 178 } 179 180 static void 181 percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci) 182 { 183 size_t sz = (uintptr_t)vp2; 184 185 memset(vp, 0, sz); 186 } 187 188 /* 189 * percpu_zero: initialize percpu storage with zero. 190 */ 191 192 static void 193 percpu_zero(percpu_t *pc, size_t sz) 194 { 195 196 percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz); 197 } 198 199 /* 200 * percpu_init: subsystem initialization 201 */ 202 203 void 204 percpu_init(void) 205 { 206 207 ASSERT_SLEEPABLE(); 208 rw_init(&percpu_swap_lock); 209 mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE); 210 211 percpu_offset_arena = vmem_create("percpu", 0, 0, PERCPU_QUANTUM_SIZE, 212 percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP, 213 IPL_NONE); 214 } 215 216 /* 217 * percpu_init_cpu: cpu initialization 218 * 219 * => should be called before the cpu appears on the list for CPU_INFO_FOREACH. 220 */ 221 222 void 223 percpu_init_cpu(struct cpu_info *ci) 224 { 225 percpu_cpu_t * const pcc = cpu_percpu(ci); 226 size_t size = percpu_nextoff; /* XXX racy */ 227 228 ASSERT_SLEEPABLE(); 229 pcc->pcc_size = size; 230 if (size) { 231 pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP); 232 } 233 } 234 235 /* 236 * percpu_alloc: allocate percpu storage 237 * 238 * => called in thread context. 239 * => considered as an expensive and rare operation. 240 * => allocated storage is initialized with zeros. 241 */ 242 243 percpu_t * 244 percpu_alloc(size_t size) 245 { 246 unsigned int offset; 247 percpu_t *pc; 248 249 ASSERT_SLEEPABLE(); 250 offset = vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT); 251 pc = (percpu_t *)(uintptr_t)offset; 252 percpu_zero(pc, size); 253 return pc; 254 } 255 256 /* 257 * percpu_free: free percpu storage 258 * 259 * => called in thread context. 260 * => considered as an expensive and rare operation. 261 */ 262 263 void 264 percpu_free(percpu_t *pc, size_t size) 265 { 266 267 ASSERT_SLEEPABLE(); 268 vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size); 269 } 270 271 /* 272 * percpu_getref: 273 * 274 * => safe to be used in either thread or interrupt context 275 * => disables preemption; must be bracketed with a percpu_putref() 276 */ 277 278 void * 279 percpu_getref(percpu_t *pc) 280 { 281 282 KPREEMPT_DISABLE(curlwp); 283 return percpu_getptr_remote(pc, curcpu()); 284 } 285 286 /* 287 * percpu_putref: 288 * 289 * => drops the preemption-disabled count after caller is done with per-cpu 290 * data 291 */ 292 293 void 294 percpu_putref(percpu_t *pc) 295 { 296 297 KPREEMPT_ENABLE(curlwp); 298 } 299 300 /* 301 * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote: 302 * helpers to access remote cpu's percpu data. 303 * 304 * => called in thread context. 305 * => percpu_traverse_enter can block low-priority xcalls. 306 * => typical usage would be: 307 * 308 * sum = 0; 309 * percpu_traverse_enter(); 310 * for (CPU_INFO_FOREACH(cii, ci)) { 311 * unsigned int *p = percpu_getptr_remote(pc, ci); 312 * sum += *p; 313 * } 314 * percpu_traverse_exit(); 315 */ 316 317 void 318 percpu_traverse_enter(void) 319 { 320 321 ASSERT_SLEEPABLE(); 322 rw_enter(&percpu_swap_lock, RW_READER); 323 } 324 325 void 326 percpu_traverse_exit(void) 327 { 328 329 rw_exit(&percpu_swap_lock); 330 } 331 332 void * 333 percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci) 334 { 335 336 return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)]; 337 } 338 339 /* 340 * percpu_foreach: call the specified callback function for each cpus. 341 * 342 * => called in thread context. 343 * => caller should not rely on the cpu iteration order. 344 * => the callback function should be minimum because it is executed with 345 * holding a global lock, which can block low-priority xcalls. 346 * eg. it's illegal for a callback function to sleep for memory allocation. 347 */ 348 void 349 percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) 350 { 351 CPU_INFO_ITERATOR cii; 352 struct cpu_info *ci; 353 354 percpu_traverse_enter(); 355 for (CPU_INFO_FOREACH(cii, ci)) { 356 (*cb)(percpu_getptr_remote(pc, ci), arg, ci); 357 } 358 percpu_traverse_exit(); 359 } 360