1 /* $NetBSD: scheduler.c,v 1.52 2020/11/01 20:58:38 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.52 2020/11/01 20:58:38 christos Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/atomic.h> 33 #include <sys/cpu.h> 34 #include <sys/kmem.h> 35 #include <sys/mutex.h> 36 #include <sys/namei.h> 37 #include <sys/queue.h> 38 #include <sys/select.h> 39 #include <sys/systm.h> 40 41 #include <rump-sys/kern.h> 42 43 #include <rump/rumpuser.h> 44 45 static struct rumpcpu { 46 /* needed in fastpath */ 47 struct cpu_info *rcpu_ci; 48 void *rcpu_prevlwp; 49 50 /* needed in slowpath */ 51 struct rumpuser_mtx *rcpu_mtx; 52 struct rumpuser_cv *rcpu_cv; 53 int rcpu_wanted; 54 55 /* offset 20 (P=4) or 36 (P=8) here */ 56 57 /* 58 * Some stats. Not really that necessary, but we should 59 * have room. Note that these overflow quite fast, so need 60 * to be collected often. 61 */ 62 unsigned int rcpu_fastpath; 63 unsigned int rcpu_slowpath; 64 unsigned int rcpu_migrated; 65 66 /* offset 32 (P=4) or 50 (P=8) */ 67 68 int rcpu_align[0] __aligned(CACHE_LINE_SIZE); 69 } rcpu_storage[MAXCPUS]; 70 71 static inline struct rumpcpu * 72 cpuinfo_to_rumpcpu(struct cpu_info *ci) 73 { 74 75 return &rcpu_storage[cpu_index(ci)]; 76 } 77 78 struct cpu_info rump_bootcpu; 79 80 #define RCPULWP_BUSY ((void *)-1) 81 #define RCPULWP_WANTED ((void *)-2) 82 83 static struct rumpuser_mtx *lwp0mtx; 84 static struct rumpuser_cv *lwp0cv; 85 static unsigned nextcpu; 86 87 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */ 88 89 static bool lwp0isbusy = false; 90 91 /* 92 * Keep some stats. 93 * 94 * Keeping track of there is not really critical for speed, unless 95 * stats happen to be on a different cache line (CACHE_LINE_SIZE is 96 * really just a coarse estimate), so default for the performant case 97 * (i.e. no stats). 98 */ 99 #ifdef RUMPSCHED_STATS 100 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; 101 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; 102 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; 103 #else 104 #define SCHED_FASTPATH(rcpu) 105 #define SCHED_SLOWPATH(rcpu) 106 #define SCHED_MIGRATED(rcpu) 107 #endif 108 109 struct cpu_info * 110 cpu_lookup(u_int index) 111 { 112 113 return rcpu_storage[index].rcpu_ci; 114 } 115 116 static inline struct rumpcpu * 117 getnextcpu(void) 118 { 119 unsigned newcpu; 120 121 newcpu = atomic_inc_uint_nv(&nextcpu); 122 if (__predict_false(ncpu > UINT_MAX/2)) 123 atomic_and_uint(&nextcpu, 0); 124 newcpu = newcpu % ncpu; 125 126 return &rcpu_storage[newcpu]; 127 } 128 129 /* this could/should be mi_attach_cpu? */ 130 void 131 rump_cpus_bootstrap(int *nump) 132 { 133 int num = *nump; 134 135 if (num > MAXCPUS) { 136 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " 137 "available (adjusted)\n", num, MAXCPUS); 138 num = MAXCPUS; 139 } 140 141 cpu_setmodel("rumpcore (virtual)"); 142 143 mi_cpu_init(); 144 145 /* attach first cpu for bootstrap */ 146 rump_cpu_attach(&rump_bootcpu); 147 ncpu = 1; 148 *nump = num; 149 } 150 151 void 152 rump_scheduler_init(int numcpu) 153 { 154 struct rumpcpu *rcpu; 155 struct cpu_info *ci; 156 int i; 157 158 rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN); 159 rumpuser_cv_init(&lwp0cv); 160 for (i = 0; i < numcpu; i++) { 161 if (i == 0) { 162 ci = &rump_bootcpu; 163 } else { 164 ci = kmem_zalloc(sizeof(*ci), KM_SLEEP); 165 ci->ci_index = i; 166 } 167 168 rcpu = &rcpu_storage[i]; 169 rcpu->rcpu_ci = ci; 170 rcpu->rcpu_wanted = 0; 171 rumpuser_cv_init(&rcpu->rcpu_cv); 172 rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN); 173 174 ci->ci_schedstate.spc_mutex = 175 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 176 ci->ci_schedstate.spc_flags = SPCF_RUNNING; 177 } 178 179 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED); 180 } 181 182 void 183 rump_schedlock_cv_signal(struct cpu_info *ci, struct rumpuser_cv *cv) 184 { 185 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(ci); 186 187 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 188 rumpuser_cv_signal(cv); 189 rumpuser_mutex_exit(rcpu->rcpu_mtx); 190 } 191 192 /* 193 * condvar ops using scheduler lock as the rumpuser interlock. 194 */ 195 void 196 rump_schedlock_cv_wait(struct rumpuser_cv *cv) 197 { 198 struct lwp *l = curlwp; 199 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu); 200 201 /* mutex will be taken and released in cpu schedule/unschedule */ 202 rumpuser_cv_wait(cv, rcpu->rcpu_mtx); 203 } 204 205 int 206 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) 207 { 208 struct lwp *l = curlwp; 209 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu); 210 211 /* mutex will be taken and released in cpu schedule/unschedule */ 212 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, 213 ts->tv_sec, ts->tv_nsec); 214 } 215 216 static void 217 lwp0busy(void) 218 { 219 220 /* busy lwp0 */ 221 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC); 222 rumpuser_mutex_enter_nowrap(lwp0mtx); 223 while (lwp0isbusy) 224 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); 225 lwp0isbusy = true; 226 rumpuser_mutex_exit(lwp0mtx); 227 } 228 229 static void 230 lwp0rele(void) 231 { 232 233 rumpuser_mutex_enter_nowrap(lwp0mtx); 234 KASSERT(lwp0isbusy == true); 235 lwp0isbusy = false; 236 rumpuser_cv_signal(lwp0cv); 237 rumpuser_mutex_exit(lwp0mtx); 238 } 239 240 /* 241 * rump_schedule: ensure that the calling host thread has a valid lwp context. 242 * ie. ensure that curlwp != NULL. Also, ensure that there 243 * a 1:1 mapping between the lwp and rump kernel cpu. 244 */ 245 void 246 rump_schedule() 247 { 248 struct lwp *l; 249 250 /* 251 * If there is no dedicated lwp, allocate a temp one and 252 * set it to be free'd upon unschedule(). Use lwp0 context 253 * for reserving the necessary resources. Don't optimize 254 * for this case -- anyone who cares about performance will 255 * start a real thread. 256 */ 257 if (__predict_true((l = curlwp) != NULL)) { 258 rump_schedule_cpu(l); 259 LWP_CACHE_CREDS(l, l->l_proc); 260 } else { 261 lwp0busy(); 262 263 /* schedule cpu and use lwp0 */ 264 rump_schedule_cpu(&lwp0); 265 rump_lwproc_curlwp_set(&lwp0); 266 267 /* allocate thread, switch to it, and release lwp0 */ 268 l = rump__lwproc_alloclwp(initproc); 269 rump_lwproc_switch(l); 270 lwp0rele(); 271 272 /* 273 * mark new thread dead-on-unschedule. this 274 * means that we'll be running with l_refcnt == 0. 275 * relax, it's fine. 276 */ 277 rump_lwproc_releaselwp(); 278 } 279 } 280 281 void 282 rump_schedule_cpu(struct lwp *l) 283 { 284 285 rump_schedule_cpu_interlock(l, NULL); 286 } 287 288 /* 289 * Schedule a CPU. This optimizes for the case where we schedule 290 * the same thread often, and we have nCPU >= nFrequently-Running-Thread 291 * (where CPU is virtual rump cpu, not host CPU). 292 */ 293 void 294 rump_schedule_cpu_interlock(struct lwp *l, void *interlock) 295 { 296 struct rumpcpu *rcpu; 297 struct cpu_info *ci; 298 void *old; 299 bool domigrate; 300 bool bound = l->l_pflag & LP_BOUND; 301 302 l->l_stat = LSRUN; 303 304 /* 305 * First, try fastpath: if we were the previous user of the 306 * CPU, everything is in order cachewise and we can just 307 * proceed to use it. 308 * 309 * If we are a different thread (i.e. CAS fails), we must go 310 * through a memory barrier to ensure we get a truthful 311 * view of the world. 312 */ 313 314 KASSERT(l->l_target_cpu != NULL); 315 rcpu = cpuinfo_to_rumpcpu(l->l_target_cpu); 316 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { 317 if (interlock == rcpu->rcpu_mtx) 318 rumpuser_mutex_exit(rcpu->rcpu_mtx); 319 SCHED_FASTPATH(rcpu); 320 /* jones, you're the man */ 321 goto fastlane; 322 } 323 324 /* 325 * Else, it's the slowpath for us. First, determine if we 326 * can migrate. 327 */ 328 if (ncpu == 1) 329 domigrate = false; 330 else 331 domigrate = true; 332 333 /* Take lock. This acts as a load barrier too. */ 334 if (interlock != rcpu->rcpu_mtx) 335 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 336 337 for (;;) { 338 SCHED_SLOWPATH(rcpu); 339 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); 340 341 /* CPU is free? */ 342 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { 343 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, 344 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { 345 break; 346 } 347 } 348 349 /* 350 * Do we want to migrate once? 351 * This may need a slightly better algorithm, or we 352 * might cache pingpong eternally for non-frequent 353 * threads. 354 */ 355 if (domigrate && !bound) { 356 domigrate = false; 357 SCHED_MIGRATED(rcpu); 358 rumpuser_mutex_exit(rcpu->rcpu_mtx); 359 rcpu = getnextcpu(); 360 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 361 continue; 362 } 363 364 /* Want CPU, wait until it's released an retry */ 365 rcpu->rcpu_wanted++; 366 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); 367 rcpu->rcpu_wanted--; 368 } 369 rumpuser_mutex_exit(rcpu->rcpu_mtx); 370 371 fastlane: 372 ci = rcpu->rcpu_ci; 373 l->l_cpu = l->l_target_cpu = ci; 374 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; 375 l->l_ncsw++; 376 l->l_stat = LSONPROC; 377 378 /* 379 * No interrupts, so ci_curlwp === cpu_onproc. 380 * Okay, we could make an attempt to not set cpu_onproc 381 * in the case that an interrupt is scheduled immediately 382 * after a user proc, but leave that for later. 383 */ 384 ci->ci_curlwp = ci->ci_onproc = l; 385 } 386 387 void 388 rump_unschedule() 389 { 390 struct lwp *l = curlwp; 391 #ifdef DIAGNOSTIC 392 int nlock; 393 394 KERNEL_UNLOCK_ALL(l, &nlock); 395 KASSERT(nlock == 0); 396 #endif 397 398 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); 399 rump_unschedule_cpu(l); 400 l->l_mutex = &unruntime_lock; 401 l->l_stat = LSSTOP; 402 403 /* 404 * Check special conditions: 405 * 1) do we need to free the lwp which just unscheduled? 406 * (locking order: lwp0, cpu) 407 * 2) do we want to clear curlwp for the current host thread 408 */ 409 if (__predict_false(l->l_flag & LW_WEXIT)) { 410 lwp0busy(); 411 412 /* Now that we have lwp0, we can schedule a CPU again */ 413 rump_schedule_cpu(l); 414 415 /* switch to lwp0. this frees the old thread */ 416 KASSERT(l->l_flag & LW_WEXIT); 417 rump_lwproc_switch(&lwp0); 418 419 /* release lwp0 */ 420 rump_unschedule_cpu(&lwp0); 421 lwp0.l_mutex = &unruntime_lock; 422 lwp0.l_pflag &= ~LP_RUNNING; 423 lwp0rele(); 424 rump_lwproc_curlwp_clear(&lwp0); 425 426 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { 427 rump_lwproc_curlwp_clear(l); 428 l->l_flag &= ~LW_RUMP_CLEAR; 429 } 430 } 431 432 void 433 rump_unschedule_cpu(struct lwp *l) 434 { 435 436 rump_unschedule_cpu_interlock(l, NULL); 437 } 438 439 void 440 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) 441 { 442 443 if ((l->l_pflag & LP_INTR) == 0) 444 rump_softint_run(l->l_cpu); 445 rump_unschedule_cpu1(l, interlock); 446 } 447 448 void 449 rump_unschedule_cpu1(struct lwp *l, void *interlock) 450 { 451 struct rumpcpu *rcpu; 452 struct cpu_info *ci; 453 void *old; 454 455 ci = l->l_cpu; 456 ci->ci_curlwp = ci->ci_onproc = NULL; 457 rcpu = cpuinfo_to_rumpcpu(ci); 458 459 KASSERT(rcpu->rcpu_ci == ci); 460 461 /* 462 * Make sure all stores are seen before the CPU release. This 463 * is relevant only in the non-fastpath scheduling case, but 464 * we don't know here if that's going to happen, so need to 465 * expect the worst. 466 * 467 * If the scheduler interlock was requested by the caller, we 468 * need to obtain it before we release the CPU. Otherwise, we risk a 469 * race condition where another thread is scheduled onto the 470 * rump kernel CPU before our current thread can 471 * grab the interlock. 472 */ 473 if (interlock == rcpu->rcpu_mtx) 474 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 475 else 476 membar_exit(); 477 478 /* Release the CPU. */ 479 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); 480 481 /* No waiters? No problems. We're outta here. */ 482 if (old == RCPULWP_BUSY) { 483 return; 484 } 485 486 KASSERT(old == RCPULWP_WANTED); 487 488 /* 489 * Ok, things weren't so snappy. 490 * 491 * Snailpath: take lock and signal anyone waiting for this CPU. 492 */ 493 494 if (interlock != rcpu->rcpu_mtx) 495 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 496 if (rcpu->rcpu_wanted) 497 rumpuser_cv_broadcast(rcpu->rcpu_cv); 498 if (interlock != rcpu->rcpu_mtx) 499 rumpuser_mutex_exit(rcpu->rcpu_mtx); 500 } 501 502 /* Give up and retake CPU (perhaps a different one) */ 503 void 504 yield() 505 { 506 struct lwp *l = curlwp; 507 int nlocks; 508 509 KERNEL_UNLOCK_ALL(l, &nlocks); 510 rump_unschedule_cpu(l); 511 rump_schedule_cpu(l); 512 KERNEL_LOCK(nlocks, l); 513 } 514 515 void 516 preempt() 517 { 518 519 yield(); 520 } 521 522 bool 523 kpreempt(uintptr_t where) 524 { 525 526 return false; 527 } 528 529 /* 530 * There is no kernel thread preemption in rump currently. But call 531 * the implementing macros anyway in case they grow some side-effects 532 * down the road. 533 */ 534 void 535 kpreempt_disable(void) 536 { 537 538 KPREEMPT_DISABLE(curlwp); 539 } 540 541 void 542 kpreempt_enable(void) 543 { 544 545 KPREEMPT_ENABLE(curlwp); 546 } 547 548 bool 549 kpreempt_disabled(void) 550 { 551 #if 0 552 const lwp_t *l = curlwp; 553 554 return l->l_nopreempt != 0 || l->l_stat == LSZOMB || 555 (l->l_flag & LW_IDLE) != 0 || cpu_kpreempt_disabled(); 556 #endif 557 /* XXX: emulate cpu_kpreempt_disabled() */ 558 return true; 559 } 560 561 void 562 suspendsched(void) 563 { 564 565 /* 566 * Could wait until everyone is out and block further entries, 567 * but skip that for now. 568 */ 569 } 570 571 void 572 sched_nice(struct proc *p, int level) 573 { 574 575 /* nothing to do for now */ 576 } 577 578 void 579 setrunnable(struct lwp *l) 580 { 581 582 sched_enqueue(l); 583 } 584 585 void 586 sched_enqueue(struct lwp *l) 587 { 588 589 rump_thread_allow(l); 590 } 591 592 void 593 sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock) 594 { 595 596 } 597 598 void 599 sched_resched_lwp(struct lwp *l, bool unlock) 600 { 601 602 } 603 604 void 605 sched_dequeue(struct lwp *l) 606 { 607 608 panic("sched_dequeue not implemented"); 609 } 610 611 void 612 preempt_point(void) 613 { 614 615 } 616 617 bool 618 preempt_needed(void) 619 { 620 621 return false; 622 } 623