1 /* $NetBSD: scheduler.c,v 1.20 2010/09/07 07:59:48 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.20 2010/09/07 07:59:48 pooka Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/atomic.h> 33 #include <sys/cpu.h> 34 #include <sys/kmem.h> 35 #include <sys/mutex.h> 36 #include <sys/namei.h> 37 #include <sys/queue.h> 38 #include <sys/select.h> 39 #include <sys/systm.h> 40 41 #include <rump/rumpuser.h> 42 43 #include "rump_private.h" 44 45 static struct cpu_info rump_cpus[MAXCPUS]; 46 static struct rumpcpu { 47 /* needed in fastpath */ 48 struct cpu_info *rcpu_ci; 49 void *rcpu_prevlwp; 50 51 /* needed in slowpath */ 52 struct rumpuser_mtx *rcpu_mtx; 53 struct rumpuser_cv *rcpu_cv; 54 int rcpu_wanted; 55 56 /* offset 20 (P=4) or 36 (P=8) here */ 57 58 /* 59 * Some stats. Not really that necessary, but we should 60 * have room. Note that these overflow quite fast, so need 61 * to be collected often. 62 */ 63 unsigned int rcpu_fastpath; 64 unsigned int rcpu_slowpath; 65 unsigned int rcpu_migrated; 66 67 /* offset 32 (P=4) or 50 (P=8) */ 68 69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE); 70 } rcpu_storage[MAXCPUS]; 71 struct cpu_info *rump_cpu = &rump_cpus[0]; 72 int ncpu; 73 74 #define RCPULWP_BUSY ((void *)-1) 75 #define RCPULWP_WANTED ((void *)-2) 76 77 static struct rumpuser_mtx *lwp0mtx; 78 static struct rumpuser_cv *lwp0cv; 79 static unsigned nextcpu; 80 81 static bool lwp0isbusy = false; 82 83 /* 84 * Keep some stats. 85 * 86 * Keeping track of there is not really critical for speed, unless 87 * stats happen to be on a different cache line (CACHE_LINE_SIZE is 88 * really just a coarse estimate), so default for the performant case 89 * (i.e. no stats). 90 */ 91 #ifdef RUMPSCHED_STATS 92 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; 93 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; 94 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; 95 #else 96 #define SCHED_FASTPATH(rcpu) 97 #define SCHED_SLOWPATH(rcpu) 98 #define SCHED_MIGRATED(rcpu) 99 #endif 100 101 struct cpu_info * 102 cpu_lookup(u_int index) 103 { 104 105 return &rump_cpus[index]; 106 } 107 108 static inline struct rumpcpu * 109 getnextcpu(void) 110 { 111 unsigned newcpu; 112 113 newcpu = atomic_inc_uint_nv(&nextcpu); 114 if (__predict_false(ncpu > UINT_MAX/2)) 115 atomic_and_uint(&nextcpu, 0); 116 newcpu = newcpu % ncpu; 117 118 return &rcpu_storage[newcpu]; 119 } 120 121 /* this could/should be mi_attach_cpu? */ 122 void 123 rump_cpus_bootstrap(int num) 124 { 125 struct rumpcpu *rcpu; 126 struct cpu_info *ci; 127 int i; 128 129 if (num > MAXCPUS) { 130 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) available\n", 131 num, MAXCPUS); 132 num = MAXCPUS; 133 } 134 135 for (i = 0; i < num; i++) { 136 rcpu = &rcpu_storage[i]; 137 ci = &rump_cpus[i]; 138 ci->ci_index = i; 139 } 140 141 /* attach first cpu for bootstrap */ 142 rump_cpu_attach(&rump_cpus[0]); 143 ncpu = 1; 144 } 145 146 void 147 rump_scheduler_init(int numcpu) 148 { 149 struct rumpcpu *rcpu; 150 struct cpu_info *ci; 151 int i; 152 153 rumpuser_mutex_init(&lwp0mtx); 154 rumpuser_cv_init(&lwp0cv); 155 for (i = 0; i < numcpu; i++) { 156 rcpu = &rcpu_storage[i]; 157 ci = &rump_cpus[i]; 158 rcpu->rcpu_ci = ci; 159 ci->ci_schedstate.spc_mutex = 160 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 161 ci->ci_schedstate.spc_flags = SPCF_RUNNING; 162 rcpu->rcpu_wanted = 0; 163 rumpuser_cv_init(&rcpu->rcpu_cv); 164 rumpuser_mutex_init(&rcpu->rcpu_mtx); 165 } 166 } 167 168 /* 169 * condvar ops using scheduler lock as the rumpuser interlock. 170 */ 171 void 172 rump_schedlock_cv_wait(struct rumpuser_cv *cv) 173 { 174 struct lwp *l = curlwp; 175 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 176 177 /* mutex will be taken and released in cpu schedule/unschedule */ 178 rumpuser_cv_wait(cv, rcpu->rcpu_mtx); 179 } 180 181 int 182 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) 183 { 184 struct lwp *l = curlwp; 185 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 186 187 /* mutex will be taken and released in cpu schedule/unschedule */ 188 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, 189 ts->tv_sec, ts->tv_nsec); 190 } 191 192 static void 193 lwp0busy(void) 194 { 195 196 /* busy lwp0 */ 197 KASSERT(curlwp == NULL || curlwp->l_cpu == NULL); 198 rumpuser_mutex_enter_nowrap(lwp0mtx); 199 while (lwp0isbusy) 200 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); 201 lwp0isbusy = true; 202 rumpuser_mutex_exit(lwp0mtx); 203 } 204 205 static void 206 lwp0rele(void) 207 { 208 209 rumpuser_mutex_enter_nowrap(lwp0mtx); 210 KASSERT(lwp0isbusy == true); 211 lwp0isbusy = false; 212 rumpuser_cv_signal(lwp0cv); 213 rumpuser_mutex_exit(lwp0mtx); 214 } 215 216 void 217 rump_schedule() 218 { 219 struct lwp *l; 220 221 /* 222 * If there is no dedicated lwp, allocate a temp one and 223 * set it to be free'd upon unschedule(). Use lwp0 context 224 * for reserving the necessary resources. Don't optimize 225 * for this case -- anyone who cares about performance will 226 * start a real thread. 227 */ 228 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) { 229 rump_schedule_cpu(l); 230 LWP_CACHE_CREDS(l, l->l_proc); 231 } else { 232 lwp0busy(); 233 234 /* schedule cpu and use lwp0 */ 235 rump_schedule_cpu(&lwp0); 236 rumpuser_set_curlwp(&lwp0); 237 238 /* allocate thread, switch to it, and release lwp0 */ 239 l = rump__lwproc_allockernlwp(); 240 rump_lwproc_switch(l); 241 lwp0rele(); 242 243 /* 244 * mark new thread dead-on-unschedule. this 245 * means that we'll be running with l_refcnt == 0. 246 * relax, it's fine. 247 */ 248 rump_lwproc_releaselwp(); 249 } 250 } 251 252 void 253 rump_schedule_cpu(struct lwp *l) 254 { 255 256 rump_schedule_cpu_interlock(l, NULL); 257 } 258 259 /* 260 * Schedule a CPU. This optimizes for the case where we schedule 261 * the same thread often, and we have nCPU >= nFrequently-Running-Thread 262 * (where CPU is virtual rump cpu, not host CPU). 263 */ 264 void 265 rump_schedule_cpu_interlock(struct lwp *l, void *interlock) 266 { 267 struct rumpcpu *rcpu; 268 void *old; 269 bool domigrate; 270 bool bound = l->l_pflag & LP_BOUND; 271 272 /* 273 * First, try fastpath: if we were the previous user of the 274 * CPU, everything is in order cachewise and we can just 275 * proceed to use it. 276 * 277 * If we are a different thread (i.e. CAS fails), we must go 278 * through a memory barrier to ensure we get a truthful 279 * view of the world. 280 */ 281 282 KASSERT(l->l_target_cpu != NULL); 283 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]]; 284 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { 285 if (__predict_true(interlock == rcpu->rcpu_mtx)) 286 rumpuser_mutex_exit(rcpu->rcpu_mtx); 287 SCHED_FASTPATH(rcpu); 288 /* jones, you're the man */ 289 goto fastlane; 290 } 291 292 /* 293 * Else, it's the slowpath for us. First, determine if we 294 * can migrate. 295 */ 296 if (ncpu == 1) 297 domigrate = false; 298 else 299 domigrate = true; 300 301 /* Take lock. This acts as a load barrier too. */ 302 if (__predict_true(interlock != rcpu->rcpu_mtx)) 303 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 304 305 for (;;) { 306 SCHED_SLOWPATH(rcpu); 307 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); 308 309 /* CPU is free? */ 310 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { 311 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, 312 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { 313 break; 314 } 315 } 316 317 /* 318 * Do we want to migrate once? 319 * This may need a slightly better algorithm, or we 320 * might cache pingpong eternally for non-frequent 321 * threads. 322 */ 323 if (domigrate && !bound) { 324 domigrate = false; 325 SCHED_MIGRATED(rcpu); 326 rumpuser_mutex_exit(rcpu->rcpu_mtx); 327 rcpu = getnextcpu(); 328 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 329 continue; 330 } 331 332 /* Want CPU, wait until it's released an retry */ 333 rcpu->rcpu_wanted++; 334 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); 335 rcpu->rcpu_wanted--; 336 } 337 rumpuser_mutex_exit(rcpu->rcpu_mtx); 338 339 fastlane: 340 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci; 341 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; 342 l->l_ncsw++; 343 } 344 345 void 346 rump_unschedule() 347 { 348 struct lwp *l; 349 350 l = rumpuser_get_curlwp(); 351 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); 352 rump_unschedule_cpu(l); 353 l->l_mutex = NULL; 354 355 /* 356 * Check special conditions: 357 * 1) do we need to free the lwp which just unscheduled? 358 * (locking order: lwp0, cpu) 359 * 2) do we want to clear curlwp for the current host thread 360 */ 361 if (__predict_false(l->l_flag & LW_WEXIT)) { 362 lwp0busy(); 363 364 /* Now that we have lwp0, we can schedule a CPU again */ 365 rump_schedule_cpu(l); 366 367 /* switch to lwp0. this frees the old thread */ 368 KASSERT(l->l_flag & LW_WEXIT); 369 rump_lwproc_switch(&lwp0); 370 371 /* release lwp0 */ 372 rump_unschedule_cpu(&lwp0); 373 lwp0.l_mutex = NULL; 374 lwp0.l_pflag &= ~LP_RUNNING; 375 lwp0rele(); 376 rumpuser_set_curlwp(NULL); 377 378 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { 379 rumpuser_set_curlwp(NULL); 380 l->l_flag &= ~LW_RUMP_CLEAR; 381 } 382 } 383 384 void 385 rump_unschedule_cpu(struct lwp *l) 386 { 387 388 rump_unschedule_cpu_interlock(l, NULL); 389 } 390 391 void 392 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) 393 { 394 395 if ((l->l_pflag & LP_INTR) == 0) 396 rump_softint_run(l->l_cpu); 397 rump_unschedule_cpu1(l, interlock); 398 } 399 400 void 401 rump_unschedule_cpu1(struct lwp *l, void *interlock) 402 { 403 struct rumpcpu *rcpu; 404 struct cpu_info *ci; 405 void *old; 406 407 ci = l->l_cpu; 408 l->l_cpu = NULL; 409 rcpu = &rcpu_storage[ci-&rump_cpus[0]]; 410 411 KASSERT(rcpu->rcpu_ci == ci); 412 413 /* 414 * Make sure all stores are seen before the CPU release. This 415 * is relevant only in the non-fastpath scheduling case, but 416 * we don't know here if that's going to happen, so need to 417 * expect the worst. 418 */ 419 membar_exit(); 420 421 /* Release the CPU. */ 422 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); 423 424 /* No waiters? No problems. We're outta here. */ 425 if (old == RCPULWP_BUSY) { 426 /* Was the scheduler interlock requested? */ 427 if (__predict_false(interlock == rcpu->rcpu_mtx)) 428 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 429 return; 430 } 431 432 KASSERT(old == RCPULWP_WANTED); 433 434 /* 435 * Ok, things weren't so snappy. 436 * 437 * Snailpath: take lock and signal anyone waiting for this CPU. 438 */ 439 440 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 441 if (rcpu->rcpu_wanted) 442 rumpuser_cv_broadcast(rcpu->rcpu_cv); 443 444 if (__predict_true(interlock != rcpu->rcpu_mtx)) 445 rumpuser_mutex_exit(rcpu->rcpu_mtx); 446 } 447 448 /* Give up and retake CPU (perhaps a different one) */ 449 void 450 yield() 451 { 452 struct lwp *l = curlwp; 453 int nlocks; 454 455 KERNEL_UNLOCK_ALL(l, &nlocks); 456 rump_unschedule_cpu(l); 457 rump_schedule_cpu(l); 458 KERNEL_LOCK(nlocks, l); 459 } 460 461 void 462 preempt() 463 { 464 465 yield(); 466 } 467 468 bool 469 kpreempt(uintptr_t where) 470 { 471 472 return false; 473 } 474 475 /* 476 * There is no kernel thread preemption in rump currently. But call 477 * the implementing macros anyway in case they grow some side-effects 478 * down the road. 479 */ 480 void 481 kpreempt_disable(void) 482 { 483 484 KPREEMPT_DISABLE(curlwp); 485 } 486 487 void 488 kpreempt_enable(void) 489 { 490 491 KPREEMPT_ENABLE(curlwp); 492 } 493 494 void 495 suspendsched(void) 496 { 497 498 /* 499 * Could wait until everyone is out and block further entries, 500 * but skip that for now. 501 */ 502 } 503 504 void 505 sched_nice(struct proc *p, int level) 506 { 507 508 /* nothing to do for now */ 509 } 510