1 /* $NetBSD: scheduler.c,v 1.44 2016/02/19 18:38:37 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.44 2016/02/19 18:38:37 pooka Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/atomic.h> 33 #include <sys/cpu.h> 34 #include <sys/kmem.h> 35 #include <sys/mutex.h> 36 #include <sys/namei.h> 37 #include <sys/queue.h> 38 #include <sys/select.h> 39 #include <sys/systm.h> 40 41 #include <rump-sys/kern.h> 42 43 #include <rump/rumpuser.h> 44 45 static struct rumpcpu { 46 /* needed in fastpath */ 47 struct cpu_info *rcpu_ci; 48 void *rcpu_prevlwp; 49 50 /* needed in slowpath */ 51 struct rumpuser_mtx *rcpu_mtx; 52 struct rumpuser_cv *rcpu_cv; 53 int rcpu_wanted; 54 55 /* offset 20 (P=4) or 36 (P=8) here */ 56 57 /* 58 * Some stats. Not really that necessary, but we should 59 * have room. Note that these overflow quite fast, so need 60 * to be collected often. 61 */ 62 unsigned int rcpu_fastpath; 63 unsigned int rcpu_slowpath; 64 unsigned int rcpu_migrated; 65 66 /* offset 32 (P=4) or 50 (P=8) */ 67 68 int rcpu_align[0] __aligned(CACHE_LINE_SIZE); 69 } rcpu_storage[MAXCPUS]; 70 71 static inline struct rumpcpu * 72 cpuinfo_to_rumpcpu(struct cpu_info *ci) 73 { 74 75 return &rcpu_storage[cpu_index(ci)]; 76 } 77 78 struct cpu_info rump_bootcpu; 79 kcpuset_t *kcpuset_attached = NULL; 80 kcpuset_t *kcpuset_running = NULL; 81 int ncpu, ncpuonline; 82 83 kmutex_t cpu_lock; 84 85 #define RCPULWP_BUSY ((void *)-1) 86 #define RCPULWP_WANTED ((void *)-2) 87 88 static struct rumpuser_mtx *lwp0mtx; 89 static struct rumpuser_cv *lwp0cv; 90 static unsigned nextcpu; 91 92 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */ 93 94 static bool lwp0isbusy = false; 95 96 /* 97 * Keep some stats. 98 * 99 * Keeping track of there is not really critical for speed, unless 100 * stats happen to be on a different cache line (CACHE_LINE_SIZE is 101 * really just a coarse estimate), so default for the performant case 102 * (i.e. no stats). 103 */ 104 #ifdef RUMPSCHED_STATS 105 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; 106 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; 107 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; 108 #else 109 #define SCHED_FASTPATH(rcpu) 110 #define SCHED_SLOWPATH(rcpu) 111 #define SCHED_MIGRATED(rcpu) 112 #endif 113 114 struct cpu_info * 115 cpu_lookup(u_int index) 116 { 117 118 return rcpu_storage[index].rcpu_ci; 119 } 120 121 static inline struct rumpcpu * 122 getnextcpu(void) 123 { 124 unsigned newcpu; 125 126 newcpu = atomic_inc_uint_nv(&nextcpu); 127 if (__predict_false(ncpu > UINT_MAX/2)) 128 atomic_and_uint(&nextcpu, 0); 129 newcpu = newcpu % ncpu; 130 131 return &rcpu_storage[newcpu]; 132 } 133 134 /* this could/should be mi_attach_cpu? */ 135 void 136 rump_cpus_bootstrap(int *nump) 137 { 138 int num = *nump; 139 140 if (num > MAXCPUS) { 141 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " 142 "available (adjusted)\n", num, MAXCPUS); 143 num = MAXCPUS; 144 } 145 146 mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE); 147 148 kcpuset_create(&kcpuset_attached, true); 149 kcpuset_create(&kcpuset_running, true); 150 151 /* attach first cpu for bootstrap */ 152 rump_cpu_attach(&rump_bootcpu); 153 ncpu = 1; 154 *nump = num; 155 } 156 157 void 158 rump_scheduler_init(int numcpu) 159 { 160 struct rumpcpu *rcpu; 161 struct cpu_info *ci; 162 int i; 163 164 rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN); 165 rumpuser_cv_init(&lwp0cv); 166 for (i = 0; i < numcpu; i++) { 167 if (i == 0) { 168 ci = &rump_bootcpu; 169 } else { 170 ci = kmem_zalloc(sizeof(*ci), KM_SLEEP); 171 ci->ci_index = i; 172 } 173 174 rcpu = &rcpu_storage[i]; 175 rcpu->rcpu_ci = ci; 176 rcpu->rcpu_wanted = 0; 177 rumpuser_cv_init(&rcpu->rcpu_cv); 178 rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN); 179 180 ci->ci_schedstate.spc_mutex = 181 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 182 ci->ci_schedstate.spc_flags = SPCF_RUNNING; 183 } 184 185 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED); 186 } 187 188 /* 189 * condvar ops using scheduler lock as the rumpuser interlock. 190 */ 191 void 192 rump_schedlock_cv_wait(struct rumpuser_cv *cv) 193 { 194 struct lwp *l = curlwp; 195 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu); 196 197 /* mutex will be taken and released in cpu schedule/unschedule */ 198 rumpuser_cv_wait(cv, rcpu->rcpu_mtx); 199 } 200 201 int 202 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) 203 { 204 struct lwp *l = curlwp; 205 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu); 206 207 /* mutex will be taken and released in cpu schedule/unschedule */ 208 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, 209 ts->tv_sec, ts->tv_nsec); 210 } 211 212 static void 213 lwp0busy(void) 214 { 215 216 /* busy lwp0 */ 217 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC); 218 rumpuser_mutex_enter_nowrap(lwp0mtx); 219 while (lwp0isbusy) 220 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); 221 lwp0isbusy = true; 222 rumpuser_mutex_exit(lwp0mtx); 223 } 224 225 static void 226 lwp0rele(void) 227 { 228 229 rumpuser_mutex_enter_nowrap(lwp0mtx); 230 KASSERT(lwp0isbusy == true); 231 lwp0isbusy = false; 232 rumpuser_cv_signal(lwp0cv); 233 rumpuser_mutex_exit(lwp0mtx); 234 } 235 236 /* 237 * rump_schedule: ensure that the calling host thread has a valid lwp context. 238 * ie. ensure that curlwp != NULL. Also, ensure that there 239 * a 1:1 mapping between the lwp and rump kernel cpu. 240 */ 241 void 242 rump_schedule() 243 { 244 struct lwp *l; 245 246 /* 247 * If there is no dedicated lwp, allocate a temp one and 248 * set it to be free'd upon unschedule(). Use lwp0 context 249 * for reserving the necessary resources. Don't optimize 250 * for this case -- anyone who cares about performance will 251 * start a real thread. 252 */ 253 if (__predict_true((l = curlwp) != NULL)) { 254 rump_schedule_cpu(l); 255 LWP_CACHE_CREDS(l, l->l_proc); 256 } else { 257 lwp0busy(); 258 259 /* schedule cpu and use lwp0 */ 260 rump_schedule_cpu(&lwp0); 261 rump_lwproc_curlwp_set(&lwp0); 262 263 /* allocate thread, switch to it, and release lwp0 */ 264 l = rump__lwproc_alloclwp(initproc); 265 rump_lwproc_switch(l); 266 lwp0rele(); 267 268 /* 269 * mark new thread dead-on-unschedule. this 270 * means that we'll be running with l_refcnt == 0. 271 * relax, it's fine. 272 */ 273 rump_lwproc_releaselwp(); 274 } 275 } 276 277 void 278 rump_schedule_cpu(struct lwp *l) 279 { 280 281 rump_schedule_cpu_interlock(l, NULL); 282 } 283 284 /* 285 * Schedule a CPU. This optimizes for the case where we schedule 286 * the same thread often, and we have nCPU >= nFrequently-Running-Thread 287 * (where CPU is virtual rump cpu, not host CPU). 288 */ 289 void 290 rump_schedule_cpu_interlock(struct lwp *l, void *interlock) 291 { 292 struct rumpcpu *rcpu; 293 struct cpu_info *ci; 294 void *old; 295 bool domigrate; 296 bool bound = l->l_pflag & LP_BOUND; 297 298 l->l_stat = LSRUN; 299 300 /* 301 * First, try fastpath: if we were the previous user of the 302 * CPU, everything is in order cachewise and we can just 303 * proceed to use it. 304 * 305 * If we are a different thread (i.e. CAS fails), we must go 306 * through a memory barrier to ensure we get a truthful 307 * view of the world. 308 */ 309 310 KASSERT(l->l_target_cpu != NULL); 311 rcpu = cpuinfo_to_rumpcpu(l->l_target_cpu); 312 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { 313 if (interlock == rcpu->rcpu_mtx) 314 rumpuser_mutex_exit(rcpu->rcpu_mtx); 315 SCHED_FASTPATH(rcpu); 316 /* jones, you're the man */ 317 goto fastlane; 318 } 319 320 /* 321 * Else, it's the slowpath for us. First, determine if we 322 * can migrate. 323 */ 324 if (ncpu == 1) 325 domigrate = false; 326 else 327 domigrate = true; 328 329 /* Take lock. This acts as a load barrier too. */ 330 if (interlock != rcpu->rcpu_mtx) 331 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 332 333 for (;;) { 334 SCHED_SLOWPATH(rcpu); 335 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); 336 337 /* CPU is free? */ 338 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { 339 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, 340 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { 341 break; 342 } 343 } 344 345 /* 346 * Do we want to migrate once? 347 * This may need a slightly better algorithm, or we 348 * might cache pingpong eternally for non-frequent 349 * threads. 350 */ 351 if (domigrate && !bound) { 352 domigrate = false; 353 SCHED_MIGRATED(rcpu); 354 rumpuser_mutex_exit(rcpu->rcpu_mtx); 355 rcpu = getnextcpu(); 356 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 357 continue; 358 } 359 360 /* Want CPU, wait until it's released an retry */ 361 rcpu->rcpu_wanted++; 362 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); 363 rcpu->rcpu_wanted--; 364 } 365 rumpuser_mutex_exit(rcpu->rcpu_mtx); 366 367 fastlane: 368 ci = rcpu->rcpu_ci; 369 l->l_cpu = l->l_target_cpu = ci; 370 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; 371 l->l_ncsw++; 372 l->l_stat = LSONPROC; 373 374 /* 375 * No interrupts, so ci_curlwp === cpu_onproc. 376 * Okay, we could make an attempt to not set cpu_onproc 377 * in the case that an interrupt is scheduled immediately 378 * after a user proc, but leave that for later. 379 */ 380 ci->ci_curlwp = ci->ci_data.cpu_onproc = l; 381 } 382 383 void 384 rump_unschedule() 385 { 386 struct lwp *l = curlwp; 387 #ifdef DIAGNOSTIC 388 int nlock; 389 390 KERNEL_UNLOCK_ALL(l, &nlock); 391 KASSERT(nlock == 0); 392 #endif 393 394 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); 395 rump_unschedule_cpu(l); 396 l->l_mutex = &unruntime_lock; 397 l->l_stat = LSSTOP; 398 399 /* 400 * Check special conditions: 401 * 1) do we need to free the lwp which just unscheduled? 402 * (locking order: lwp0, cpu) 403 * 2) do we want to clear curlwp for the current host thread 404 */ 405 if (__predict_false(l->l_flag & LW_WEXIT)) { 406 lwp0busy(); 407 408 /* Now that we have lwp0, we can schedule a CPU again */ 409 rump_schedule_cpu(l); 410 411 /* switch to lwp0. this frees the old thread */ 412 KASSERT(l->l_flag & LW_WEXIT); 413 rump_lwproc_switch(&lwp0); 414 415 /* release lwp0 */ 416 rump_unschedule_cpu(&lwp0); 417 lwp0.l_mutex = &unruntime_lock; 418 lwp0.l_pflag &= ~LP_RUNNING; 419 lwp0rele(); 420 rump_lwproc_curlwp_clear(&lwp0); 421 422 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { 423 rump_lwproc_curlwp_clear(l); 424 l->l_flag &= ~LW_RUMP_CLEAR; 425 } 426 } 427 428 void 429 rump_unschedule_cpu(struct lwp *l) 430 { 431 432 rump_unschedule_cpu_interlock(l, NULL); 433 } 434 435 void 436 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) 437 { 438 439 if ((l->l_pflag & LP_INTR) == 0) 440 rump_softint_run(l->l_cpu); 441 rump_unschedule_cpu1(l, interlock); 442 } 443 444 void 445 rump_unschedule_cpu1(struct lwp *l, void *interlock) 446 { 447 struct rumpcpu *rcpu; 448 struct cpu_info *ci; 449 void *old; 450 451 ci = l->l_cpu; 452 ci->ci_curlwp = ci->ci_data.cpu_onproc = NULL; 453 rcpu = cpuinfo_to_rumpcpu(ci); 454 455 KASSERT(rcpu->rcpu_ci == ci); 456 457 /* 458 * Make sure all stores are seen before the CPU release. This 459 * is relevant only in the non-fastpath scheduling case, but 460 * we don't know here if that's going to happen, so need to 461 * expect the worst. 462 * 463 * If the scheduler interlock was requested by the caller, we 464 * need to obtain it before we release the CPU. Otherwise, we risk a 465 * race condition where another thread is scheduled onto the 466 * rump kernel CPU before our current thread can 467 * grab the interlock. 468 */ 469 if (interlock == rcpu->rcpu_mtx) 470 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 471 else 472 membar_exit(); 473 474 /* Release the CPU. */ 475 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); 476 477 /* No waiters? No problems. We're outta here. */ 478 if (old == RCPULWP_BUSY) { 479 return; 480 } 481 482 KASSERT(old == RCPULWP_WANTED); 483 484 /* 485 * Ok, things weren't so snappy. 486 * 487 * Snailpath: take lock and signal anyone waiting for this CPU. 488 */ 489 490 if (interlock != rcpu->rcpu_mtx) 491 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 492 if (rcpu->rcpu_wanted) 493 rumpuser_cv_broadcast(rcpu->rcpu_cv); 494 if (interlock != rcpu->rcpu_mtx) 495 rumpuser_mutex_exit(rcpu->rcpu_mtx); 496 } 497 498 /* Give up and retake CPU (perhaps a different one) */ 499 void 500 yield() 501 { 502 struct lwp *l = curlwp; 503 int nlocks; 504 505 KERNEL_UNLOCK_ALL(l, &nlocks); 506 rump_unschedule_cpu(l); 507 rump_schedule_cpu(l); 508 KERNEL_LOCK(nlocks, l); 509 } 510 511 void 512 preempt() 513 { 514 515 yield(); 516 } 517 518 bool 519 kpreempt(uintptr_t where) 520 { 521 522 return false; 523 } 524 525 /* 526 * There is no kernel thread preemption in rump currently. But call 527 * the implementing macros anyway in case they grow some side-effects 528 * down the road. 529 */ 530 void 531 kpreempt_disable(void) 532 { 533 534 KPREEMPT_DISABLE(curlwp); 535 } 536 537 void 538 kpreempt_enable(void) 539 { 540 541 KPREEMPT_ENABLE(curlwp); 542 } 543 544 bool 545 kpreempt_disabled(void) 546 { 547 #if 0 548 const lwp_t *l = curlwp; 549 550 return l->l_nopreempt != 0 || l->l_stat == LSZOMB || 551 (l->l_flag & LW_IDLE) != 0 || cpu_kpreempt_disabled(); 552 #endif 553 /* XXX: emulate cpu_kpreempt_disabled() */ 554 return true; 555 } 556 557 void 558 suspendsched(void) 559 { 560 561 /* 562 * Could wait until everyone is out and block further entries, 563 * but skip that for now. 564 */ 565 } 566 567 void 568 sched_nice(struct proc *p, int level) 569 { 570 571 /* nothing to do for now */ 572 } 573 574 void 575 sched_enqueue(struct lwp *l, bool swtch) 576 { 577 578 if (swtch) 579 panic("sched_enqueue with switcheroo"); 580 rump_thread_allow(l); 581 } 582 583 void 584 sched_dequeue(struct lwp *l) 585 { 586 587 panic("sched_dequeue not implemented"); 588 } 589