1 /* $NetBSD: scheduler.c,v 1.25 2011/01/28 16:58:28 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.25 2011/01/28 16:58:28 pooka Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/atomic.h> 33 #include <sys/cpu.h> 34 #include <sys/kmem.h> 35 #include <sys/mutex.h> 36 #include <sys/namei.h> 37 #include <sys/queue.h> 38 #include <sys/select.h> 39 #include <sys/systm.h> 40 41 #include <rump/rumpuser.h> 42 43 #include "rump_private.h" 44 45 static struct cpu_info rump_cpus[MAXCPUS]; 46 static struct rumpcpu { 47 /* needed in fastpath */ 48 struct cpu_info *rcpu_ci; 49 void *rcpu_prevlwp; 50 51 /* needed in slowpath */ 52 struct rumpuser_mtx *rcpu_mtx; 53 struct rumpuser_cv *rcpu_cv; 54 int rcpu_wanted; 55 56 /* offset 20 (P=4) or 36 (P=8) here */ 57 58 /* 59 * Some stats. Not really that necessary, but we should 60 * have room. Note that these overflow quite fast, so need 61 * to be collected often. 62 */ 63 unsigned int rcpu_fastpath; 64 unsigned int rcpu_slowpath; 65 unsigned int rcpu_migrated; 66 67 /* offset 32 (P=4) or 50 (P=8) */ 68 69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE); 70 } rcpu_storage[MAXCPUS]; 71 struct cpu_info *rump_cpu = &rump_cpus[0]; 72 int ncpu; 73 74 #define RCPULWP_BUSY ((void *)-1) 75 #define RCPULWP_WANTED ((void *)-2) 76 77 static struct rumpuser_mtx *lwp0mtx; 78 static struct rumpuser_cv *lwp0cv; 79 static unsigned nextcpu; 80 81 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */ 82 83 static bool lwp0isbusy = false; 84 85 /* 86 * Keep some stats. 87 * 88 * Keeping track of there is not really critical for speed, unless 89 * stats happen to be on a different cache line (CACHE_LINE_SIZE is 90 * really just a coarse estimate), so default for the performant case 91 * (i.e. no stats). 92 */ 93 #ifdef RUMPSCHED_STATS 94 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; 95 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; 96 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; 97 #else 98 #define SCHED_FASTPATH(rcpu) 99 #define SCHED_SLOWPATH(rcpu) 100 #define SCHED_MIGRATED(rcpu) 101 #endif 102 103 struct cpu_info * 104 cpu_lookup(u_int index) 105 { 106 107 return &rump_cpus[index]; 108 } 109 110 static inline struct rumpcpu * 111 getnextcpu(void) 112 { 113 unsigned newcpu; 114 115 newcpu = atomic_inc_uint_nv(&nextcpu); 116 if (__predict_false(ncpu > UINT_MAX/2)) 117 atomic_and_uint(&nextcpu, 0); 118 newcpu = newcpu % ncpu; 119 120 return &rcpu_storage[newcpu]; 121 } 122 123 /* this could/should be mi_attach_cpu? */ 124 void 125 rump_cpus_bootstrap(int *nump) 126 { 127 struct rumpcpu *rcpu; 128 struct cpu_info *ci; 129 int num = *nump; 130 int i; 131 132 if (num > MAXCPUS) { 133 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " 134 "available (adjusted)\n", num, MAXCPUS); 135 num = MAXCPUS; 136 } 137 138 for (i = 0; i < num; i++) { 139 rcpu = &rcpu_storage[i]; 140 ci = &rump_cpus[i]; 141 ci->ci_index = i; 142 } 143 144 /* attach first cpu for bootstrap */ 145 rump_cpu_attach(&rump_cpus[0]); 146 ncpu = 1; 147 *nump = num; 148 } 149 150 void 151 rump_scheduler_init(int numcpu) 152 { 153 struct rumpcpu *rcpu; 154 struct cpu_info *ci; 155 int i; 156 157 rumpuser_mutex_init(&lwp0mtx); 158 rumpuser_cv_init(&lwp0cv); 159 for (i = 0; i < numcpu; i++) { 160 rcpu = &rcpu_storage[i]; 161 ci = &rump_cpus[i]; 162 rcpu->rcpu_ci = ci; 163 ci->ci_schedstate.spc_mutex = 164 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 165 ci->ci_schedstate.spc_flags = SPCF_RUNNING; 166 rcpu->rcpu_wanted = 0; 167 rumpuser_cv_init(&rcpu->rcpu_cv); 168 rumpuser_mutex_init(&rcpu->rcpu_mtx); 169 } 170 171 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE); 172 } 173 174 /* 175 * condvar ops using scheduler lock as the rumpuser interlock. 176 */ 177 void 178 rump_schedlock_cv_wait(struct rumpuser_cv *cv) 179 { 180 struct lwp *l = curlwp; 181 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 182 183 /* mutex will be taken and released in cpu schedule/unschedule */ 184 rumpuser_cv_wait(cv, rcpu->rcpu_mtx); 185 } 186 187 int 188 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) 189 { 190 struct lwp *l = curlwp; 191 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 192 193 /* mutex will be taken and released in cpu schedule/unschedule */ 194 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, 195 ts->tv_sec, ts->tv_nsec); 196 } 197 198 static void 199 lwp0busy(void) 200 { 201 202 /* busy lwp0 */ 203 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC); 204 rumpuser_mutex_enter_nowrap(lwp0mtx); 205 while (lwp0isbusy) 206 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); 207 lwp0isbusy = true; 208 rumpuser_mutex_exit(lwp0mtx); 209 } 210 211 static void 212 lwp0rele(void) 213 { 214 215 rumpuser_mutex_enter_nowrap(lwp0mtx); 216 KASSERT(lwp0isbusy == true); 217 lwp0isbusy = false; 218 rumpuser_cv_signal(lwp0cv); 219 rumpuser_mutex_exit(lwp0mtx); 220 } 221 222 void 223 rump_schedule() 224 { 225 struct lwp *l; 226 227 /* 228 * If there is no dedicated lwp, allocate a temp one and 229 * set it to be free'd upon unschedule(). Use lwp0 context 230 * for reserving the necessary resources. Don't optimize 231 * for this case -- anyone who cares about performance will 232 * start a real thread. 233 */ 234 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) { 235 rump_schedule_cpu(l); 236 LWP_CACHE_CREDS(l, l->l_proc); 237 } else { 238 lwp0busy(); 239 240 /* schedule cpu and use lwp0 */ 241 rump_schedule_cpu(&lwp0); 242 rumpuser_set_curlwp(&lwp0); 243 244 /* allocate thread, switch to it, and release lwp0 */ 245 l = rump__lwproc_alloclwp(initproc); 246 rump_lwproc_switch(l); 247 lwp0rele(); 248 249 /* 250 * mark new thread dead-on-unschedule. this 251 * means that we'll be running with l_refcnt == 0. 252 * relax, it's fine. 253 */ 254 rump_lwproc_releaselwp(); 255 } 256 } 257 258 void 259 rump_schedule_cpu(struct lwp *l) 260 { 261 262 rump_schedule_cpu_interlock(l, NULL); 263 } 264 265 /* 266 * Schedule a CPU. This optimizes for the case where we schedule 267 * the same thread often, and we have nCPU >= nFrequently-Running-Thread 268 * (where CPU is virtual rump cpu, not host CPU). 269 */ 270 void 271 rump_schedule_cpu_interlock(struct lwp *l, void *interlock) 272 { 273 struct rumpcpu *rcpu; 274 void *old; 275 bool domigrate; 276 bool bound = l->l_pflag & LP_BOUND; 277 278 l->l_stat = LSRUN; 279 280 /* 281 * First, try fastpath: if we were the previous user of the 282 * CPU, everything is in order cachewise and we can just 283 * proceed to use it. 284 * 285 * If we are a different thread (i.e. CAS fails), we must go 286 * through a memory barrier to ensure we get a truthful 287 * view of the world. 288 */ 289 290 KASSERT(l->l_target_cpu != NULL); 291 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]]; 292 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { 293 if (__predict_true(interlock == rcpu->rcpu_mtx)) 294 rumpuser_mutex_exit(rcpu->rcpu_mtx); 295 SCHED_FASTPATH(rcpu); 296 /* jones, you're the man */ 297 goto fastlane; 298 } 299 300 /* 301 * Else, it's the slowpath for us. First, determine if we 302 * can migrate. 303 */ 304 if (ncpu == 1) 305 domigrate = false; 306 else 307 domigrate = true; 308 309 /* Take lock. This acts as a load barrier too. */ 310 if (__predict_true(interlock != rcpu->rcpu_mtx)) 311 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 312 313 for (;;) { 314 SCHED_SLOWPATH(rcpu); 315 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); 316 317 /* CPU is free? */ 318 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { 319 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, 320 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { 321 break; 322 } 323 } 324 325 /* 326 * Do we want to migrate once? 327 * This may need a slightly better algorithm, or we 328 * might cache pingpong eternally for non-frequent 329 * threads. 330 */ 331 if (domigrate && !bound) { 332 domigrate = false; 333 SCHED_MIGRATED(rcpu); 334 rumpuser_mutex_exit(rcpu->rcpu_mtx); 335 rcpu = getnextcpu(); 336 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 337 continue; 338 } 339 340 /* Want CPU, wait until it's released an retry */ 341 rcpu->rcpu_wanted++; 342 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); 343 rcpu->rcpu_wanted--; 344 } 345 rumpuser_mutex_exit(rcpu->rcpu_mtx); 346 347 fastlane: 348 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci; 349 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; 350 l->l_ncsw++; 351 l->l_stat = LSONPROC; 352 353 rcpu->rcpu_ci->ci_curlwp = l; 354 } 355 356 void 357 rump_unschedule() 358 { 359 struct lwp *l = rumpuser_get_curlwp(); 360 #ifdef DIAGNOSTIC 361 int nlock; 362 363 KERNEL_UNLOCK_ALL(l, &nlock); 364 KASSERT(nlock == 0); 365 #endif 366 367 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); 368 rump_unschedule_cpu(l); 369 l->l_mutex = &unruntime_lock; 370 l->l_stat = LSSTOP; 371 372 /* 373 * Check special conditions: 374 * 1) do we need to free the lwp which just unscheduled? 375 * (locking order: lwp0, cpu) 376 * 2) do we want to clear curlwp for the current host thread 377 */ 378 if (__predict_false(l->l_flag & LW_WEXIT)) { 379 lwp0busy(); 380 381 /* Now that we have lwp0, we can schedule a CPU again */ 382 rump_schedule_cpu(l); 383 384 /* switch to lwp0. this frees the old thread */ 385 KASSERT(l->l_flag & LW_WEXIT); 386 rump_lwproc_switch(&lwp0); 387 388 /* release lwp0 */ 389 rump_unschedule_cpu(&lwp0); 390 lwp0.l_mutex = &unruntime_lock; 391 lwp0.l_pflag &= ~LP_RUNNING; 392 lwp0rele(); 393 rumpuser_set_curlwp(NULL); 394 395 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { 396 rumpuser_set_curlwp(NULL); 397 l->l_flag &= ~LW_RUMP_CLEAR; 398 } 399 } 400 401 void 402 rump_unschedule_cpu(struct lwp *l) 403 { 404 405 rump_unschedule_cpu_interlock(l, NULL); 406 } 407 408 void 409 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) 410 { 411 412 if ((l->l_pflag & LP_INTR) == 0) 413 rump_softint_run(l->l_cpu); 414 rump_unschedule_cpu1(l, interlock); 415 } 416 417 void 418 rump_unschedule_cpu1(struct lwp *l, void *interlock) 419 { 420 struct rumpcpu *rcpu; 421 struct cpu_info *ci; 422 void *old; 423 424 ci = l->l_cpu; 425 ci->ci_curlwp = NULL; 426 rcpu = &rcpu_storage[ci-&rump_cpus[0]]; 427 428 KASSERT(rcpu->rcpu_ci == ci); 429 430 /* 431 * Make sure all stores are seen before the CPU release. This 432 * is relevant only in the non-fastpath scheduling case, but 433 * we don't know here if that's going to happen, so need to 434 * expect the worst. 435 */ 436 membar_exit(); 437 438 /* Release the CPU. */ 439 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); 440 441 /* No waiters? No problems. We're outta here. */ 442 if (old == RCPULWP_BUSY) { 443 /* Was the scheduler interlock requested? */ 444 if (__predict_false(interlock == rcpu->rcpu_mtx)) 445 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 446 return; 447 } 448 449 KASSERT(old == RCPULWP_WANTED); 450 451 /* 452 * Ok, things weren't so snappy. 453 * 454 * Snailpath: take lock and signal anyone waiting for this CPU. 455 */ 456 457 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 458 if (rcpu->rcpu_wanted) 459 rumpuser_cv_broadcast(rcpu->rcpu_cv); 460 461 if (__predict_true(interlock != rcpu->rcpu_mtx)) 462 rumpuser_mutex_exit(rcpu->rcpu_mtx); 463 } 464 465 /* Give up and retake CPU (perhaps a different one) */ 466 void 467 yield() 468 { 469 struct lwp *l = curlwp; 470 int nlocks; 471 472 KERNEL_UNLOCK_ALL(l, &nlocks); 473 rump_unschedule_cpu(l); 474 rump_schedule_cpu(l); 475 KERNEL_LOCK(nlocks, l); 476 } 477 478 void 479 preempt() 480 { 481 482 yield(); 483 } 484 485 bool 486 kpreempt(uintptr_t where) 487 { 488 489 return false; 490 } 491 492 /* 493 * There is no kernel thread preemption in rump currently. But call 494 * the implementing macros anyway in case they grow some side-effects 495 * down the road. 496 */ 497 void 498 kpreempt_disable(void) 499 { 500 501 KPREEMPT_DISABLE(curlwp); 502 } 503 504 void 505 kpreempt_enable(void) 506 { 507 508 KPREEMPT_ENABLE(curlwp); 509 } 510 511 void 512 suspendsched(void) 513 { 514 515 /* 516 * Could wait until everyone is out and block further entries, 517 * but skip that for now. 518 */ 519 } 520 521 void 522 sched_nice(struct proc *p, int level) 523 { 524 525 /* nothing to do for now */ 526 } 527