1 /* $NetBSD: scheduler.c,v 1.51 2020/03/14 18:08:39 ad Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.51 2020/03/14 18:08:39 ad Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/atomic.h> 33 #include <sys/cpu.h> 34 #include <sys/kmem.h> 35 #include <sys/mutex.h> 36 #include <sys/namei.h> 37 #include <sys/queue.h> 38 #include <sys/select.h> 39 #include <sys/systm.h> 40 41 #include <rump-sys/kern.h> 42 43 #include <rump/rumpuser.h> 44 45 static struct rumpcpu { 46 /* needed in fastpath */ 47 struct cpu_info *rcpu_ci; 48 void *rcpu_prevlwp; 49 50 /* needed in slowpath */ 51 struct rumpuser_mtx *rcpu_mtx; 52 struct rumpuser_cv *rcpu_cv; 53 int rcpu_wanted; 54 55 /* offset 20 (P=4) or 36 (P=8) here */ 56 57 /* 58 * Some stats. Not really that necessary, but we should 59 * have room. Note that these overflow quite fast, so need 60 * to be collected often. 61 */ 62 unsigned int rcpu_fastpath; 63 unsigned int rcpu_slowpath; 64 unsigned int rcpu_migrated; 65 66 /* offset 32 (P=4) or 50 (P=8) */ 67 68 int rcpu_align[0] __aligned(CACHE_LINE_SIZE); 69 } rcpu_storage[MAXCPUS]; 70 71 static inline struct rumpcpu * 72 cpuinfo_to_rumpcpu(struct cpu_info *ci) 73 { 74 75 return &rcpu_storage[cpu_index(ci)]; 76 } 77 78 struct cpu_info rump_bootcpu; 79 80 #define RCPULWP_BUSY ((void *)-1) 81 #define RCPULWP_WANTED ((void *)-2) 82 83 static struct rumpuser_mtx *lwp0mtx; 84 static struct rumpuser_cv *lwp0cv; 85 static unsigned nextcpu; 86 87 kmutex_t unruntime_lock; /* unruntime lwp lock. practically unused */ 88 89 static bool lwp0isbusy = false; 90 91 /* 92 * Keep some stats. 93 * 94 * Keeping track of there is not really critical for speed, unless 95 * stats happen to be on a different cache line (CACHE_LINE_SIZE is 96 * really just a coarse estimate), so default for the performant case 97 * (i.e. no stats). 98 */ 99 #ifdef RUMPSCHED_STATS 100 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; 101 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; 102 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; 103 #else 104 #define SCHED_FASTPATH(rcpu) 105 #define SCHED_SLOWPATH(rcpu) 106 #define SCHED_MIGRATED(rcpu) 107 #endif 108 109 struct cpu_info * 110 cpu_lookup(u_int index) 111 { 112 113 return rcpu_storage[index].rcpu_ci; 114 } 115 116 static inline struct rumpcpu * 117 getnextcpu(void) 118 { 119 unsigned newcpu; 120 121 newcpu = atomic_inc_uint_nv(&nextcpu); 122 if (__predict_false(ncpu > UINT_MAX/2)) 123 atomic_and_uint(&nextcpu, 0); 124 newcpu = newcpu % ncpu; 125 126 return &rcpu_storage[newcpu]; 127 } 128 129 /* this could/should be mi_attach_cpu? */ 130 void 131 rump_cpus_bootstrap(int *nump) 132 { 133 int num = *nump; 134 135 if (num > MAXCPUS) { 136 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " 137 "available (adjusted)\n", num, MAXCPUS); 138 num = MAXCPUS; 139 } 140 141 cpu_setmodel("rumpcore (virtual)"); 142 143 mi_cpu_init(); 144 145 /* attach first cpu for bootstrap */ 146 rump_cpu_attach(&rump_bootcpu); 147 ncpu = 1; 148 *nump = num; 149 } 150 151 void 152 rump_scheduler_init(int numcpu) 153 { 154 struct rumpcpu *rcpu; 155 struct cpu_info *ci; 156 int i; 157 158 rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN); 159 rumpuser_cv_init(&lwp0cv); 160 for (i = 0; i < numcpu; i++) { 161 if (i == 0) { 162 ci = &rump_bootcpu; 163 } else { 164 ci = kmem_zalloc(sizeof(*ci), KM_SLEEP); 165 ci->ci_index = i; 166 } 167 168 rcpu = &rcpu_storage[i]; 169 rcpu->rcpu_ci = ci; 170 rcpu->rcpu_wanted = 0; 171 rumpuser_cv_init(&rcpu->rcpu_cv); 172 rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN); 173 174 ci->ci_schedstate.spc_mutex = 175 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 176 ci->ci_schedstate.spc_flags = SPCF_RUNNING; 177 } 178 179 mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED); 180 } 181 182 /* 183 * condvar ops using scheduler lock as the rumpuser interlock. 184 */ 185 void 186 rump_schedlock_cv_wait(struct rumpuser_cv *cv) 187 { 188 struct lwp *l = curlwp; 189 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu); 190 191 /* mutex will be taken and released in cpu schedule/unschedule */ 192 rumpuser_cv_wait(cv, rcpu->rcpu_mtx); 193 } 194 195 int 196 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) 197 { 198 struct lwp *l = curlwp; 199 struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu); 200 201 /* mutex will be taken and released in cpu schedule/unschedule */ 202 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, 203 ts->tv_sec, ts->tv_nsec); 204 } 205 206 static void 207 lwp0busy(void) 208 { 209 210 /* busy lwp0 */ 211 KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC); 212 rumpuser_mutex_enter_nowrap(lwp0mtx); 213 while (lwp0isbusy) 214 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); 215 lwp0isbusy = true; 216 rumpuser_mutex_exit(lwp0mtx); 217 } 218 219 static void 220 lwp0rele(void) 221 { 222 223 rumpuser_mutex_enter_nowrap(lwp0mtx); 224 KASSERT(lwp0isbusy == true); 225 lwp0isbusy = false; 226 rumpuser_cv_signal(lwp0cv); 227 rumpuser_mutex_exit(lwp0mtx); 228 } 229 230 /* 231 * rump_schedule: ensure that the calling host thread has a valid lwp context. 232 * ie. ensure that curlwp != NULL. Also, ensure that there 233 * a 1:1 mapping between the lwp and rump kernel cpu. 234 */ 235 void 236 rump_schedule() 237 { 238 struct lwp *l; 239 240 /* 241 * If there is no dedicated lwp, allocate a temp one and 242 * set it to be free'd upon unschedule(). Use lwp0 context 243 * for reserving the necessary resources. Don't optimize 244 * for this case -- anyone who cares about performance will 245 * start a real thread. 246 */ 247 if (__predict_true((l = curlwp) != NULL)) { 248 rump_schedule_cpu(l); 249 LWP_CACHE_CREDS(l, l->l_proc); 250 } else { 251 lwp0busy(); 252 253 /* schedule cpu and use lwp0 */ 254 rump_schedule_cpu(&lwp0); 255 rump_lwproc_curlwp_set(&lwp0); 256 257 /* allocate thread, switch to it, and release lwp0 */ 258 l = rump__lwproc_alloclwp(initproc); 259 rump_lwproc_switch(l); 260 lwp0rele(); 261 262 /* 263 * mark new thread dead-on-unschedule. this 264 * means that we'll be running with l_refcnt == 0. 265 * relax, it's fine. 266 */ 267 rump_lwproc_releaselwp(); 268 } 269 } 270 271 void 272 rump_schedule_cpu(struct lwp *l) 273 { 274 275 rump_schedule_cpu_interlock(l, NULL); 276 } 277 278 /* 279 * Schedule a CPU. This optimizes for the case where we schedule 280 * the same thread often, and we have nCPU >= nFrequently-Running-Thread 281 * (where CPU is virtual rump cpu, not host CPU). 282 */ 283 void 284 rump_schedule_cpu_interlock(struct lwp *l, void *interlock) 285 { 286 struct rumpcpu *rcpu; 287 struct cpu_info *ci; 288 void *old; 289 bool domigrate; 290 bool bound = l->l_pflag & LP_BOUND; 291 292 l->l_stat = LSRUN; 293 294 /* 295 * First, try fastpath: if we were the previous user of the 296 * CPU, everything is in order cachewise and we can just 297 * proceed to use it. 298 * 299 * If we are a different thread (i.e. CAS fails), we must go 300 * through a memory barrier to ensure we get a truthful 301 * view of the world. 302 */ 303 304 KASSERT(l->l_target_cpu != NULL); 305 rcpu = cpuinfo_to_rumpcpu(l->l_target_cpu); 306 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { 307 if (interlock == rcpu->rcpu_mtx) 308 rumpuser_mutex_exit(rcpu->rcpu_mtx); 309 SCHED_FASTPATH(rcpu); 310 /* jones, you're the man */ 311 goto fastlane; 312 } 313 314 /* 315 * Else, it's the slowpath for us. First, determine if we 316 * can migrate. 317 */ 318 if (ncpu == 1) 319 domigrate = false; 320 else 321 domigrate = true; 322 323 /* Take lock. This acts as a load barrier too. */ 324 if (interlock != rcpu->rcpu_mtx) 325 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 326 327 for (;;) { 328 SCHED_SLOWPATH(rcpu); 329 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); 330 331 /* CPU is free? */ 332 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { 333 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, 334 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { 335 break; 336 } 337 } 338 339 /* 340 * Do we want to migrate once? 341 * This may need a slightly better algorithm, or we 342 * might cache pingpong eternally for non-frequent 343 * threads. 344 */ 345 if (domigrate && !bound) { 346 domigrate = false; 347 SCHED_MIGRATED(rcpu); 348 rumpuser_mutex_exit(rcpu->rcpu_mtx); 349 rcpu = getnextcpu(); 350 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 351 continue; 352 } 353 354 /* Want CPU, wait until it's released an retry */ 355 rcpu->rcpu_wanted++; 356 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); 357 rcpu->rcpu_wanted--; 358 } 359 rumpuser_mutex_exit(rcpu->rcpu_mtx); 360 361 fastlane: 362 ci = rcpu->rcpu_ci; 363 l->l_cpu = l->l_target_cpu = ci; 364 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; 365 l->l_ncsw++; 366 l->l_stat = LSONPROC; 367 368 /* 369 * No interrupts, so ci_curlwp === cpu_onproc. 370 * Okay, we could make an attempt to not set cpu_onproc 371 * in the case that an interrupt is scheduled immediately 372 * after a user proc, but leave that for later. 373 */ 374 ci->ci_curlwp = ci->ci_onproc = l; 375 } 376 377 void 378 rump_unschedule() 379 { 380 struct lwp *l = curlwp; 381 #ifdef DIAGNOSTIC 382 int nlock; 383 384 KERNEL_UNLOCK_ALL(l, &nlock); 385 KASSERT(nlock == 0); 386 #endif 387 388 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); 389 rump_unschedule_cpu(l); 390 l->l_mutex = &unruntime_lock; 391 l->l_stat = LSSTOP; 392 393 /* 394 * Check special conditions: 395 * 1) do we need to free the lwp which just unscheduled? 396 * (locking order: lwp0, cpu) 397 * 2) do we want to clear curlwp for the current host thread 398 */ 399 if (__predict_false(l->l_flag & LW_WEXIT)) { 400 lwp0busy(); 401 402 /* Now that we have lwp0, we can schedule a CPU again */ 403 rump_schedule_cpu(l); 404 405 /* switch to lwp0. this frees the old thread */ 406 KASSERT(l->l_flag & LW_WEXIT); 407 rump_lwproc_switch(&lwp0); 408 409 /* release lwp0 */ 410 rump_unschedule_cpu(&lwp0); 411 lwp0.l_mutex = &unruntime_lock; 412 lwp0.l_pflag &= ~LP_RUNNING; 413 lwp0rele(); 414 rump_lwproc_curlwp_clear(&lwp0); 415 416 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { 417 rump_lwproc_curlwp_clear(l); 418 l->l_flag &= ~LW_RUMP_CLEAR; 419 } 420 } 421 422 void 423 rump_unschedule_cpu(struct lwp *l) 424 { 425 426 rump_unschedule_cpu_interlock(l, NULL); 427 } 428 429 void 430 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) 431 { 432 433 if ((l->l_pflag & LP_INTR) == 0) 434 rump_softint_run(l->l_cpu); 435 rump_unschedule_cpu1(l, interlock); 436 } 437 438 void 439 rump_unschedule_cpu1(struct lwp *l, void *interlock) 440 { 441 struct rumpcpu *rcpu; 442 struct cpu_info *ci; 443 void *old; 444 445 ci = l->l_cpu; 446 ci->ci_curlwp = ci->ci_onproc = NULL; 447 rcpu = cpuinfo_to_rumpcpu(ci); 448 449 KASSERT(rcpu->rcpu_ci == ci); 450 451 /* 452 * Make sure all stores are seen before the CPU release. This 453 * is relevant only in the non-fastpath scheduling case, but 454 * we don't know here if that's going to happen, so need to 455 * expect the worst. 456 * 457 * If the scheduler interlock was requested by the caller, we 458 * need to obtain it before we release the CPU. Otherwise, we risk a 459 * race condition where another thread is scheduled onto the 460 * rump kernel CPU before our current thread can 461 * grab the interlock. 462 */ 463 if (interlock == rcpu->rcpu_mtx) 464 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 465 else 466 membar_exit(); 467 468 /* Release the CPU. */ 469 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); 470 471 /* No waiters? No problems. We're outta here. */ 472 if (old == RCPULWP_BUSY) { 473 return; 474 } 475 476 KASSERT(old == RCPULWP_WANTED); 477 478 /* 479 * Ok, things weren't so snappy. 480 * 481 * Snailpath: take lock and signal anyone waiting for this CPU. 482 */ 483 484 if (interlock != rcpu->rcpu_mtx) 485 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 486 if (rcpu->rcpu_wanted) 487 rumpuser_cv_broadcast(rcpu->rcpu_cv); 488 if (interlock != rcpu->rcpu_mtx) 489 rumpuser_mutex_exit(rcpu->rcpu_mtx); 490 } 491 492 /* Give up and retake CPU (perhaps a different one) */ 493 void 494 yield() 495 { 496 struct lwp *l = curlwp; 497 int nlocks; 498 499 KERNEL_UNLOCK_ALL(l, &nlocks); 500 rump_unschedule_cpu(l); 501 rump_schedule_cpu(l); 502 KERNEL_LOCK(nlocks, l); 503 } 504 505 void 506 preempt() 507 { 508 509 yield(); 510 } 511 512 bool 513 kpreempt(uintptr_t where) 514 { 515 516 return false; 517 } 518 519 /* 520 * There is no kernel thread preemption in rump currently. But call 521 * the implementing macros anyway in case they grow some side-effects 522 * down the road. 523 */ 524 void 525 kpreempt_disable(void) 526 { 527 528 KPREEMPT_DISABLE(curlwp); 529 } 530 531 void 532 kpreempt_enable(void) 533 { 534 535 KPREEMPT_ENABLE(curlwp); 536 } 537 538 bool 539 kpreempt_disabled(void) 540 { 541 #if 0 542 const lwp_t *l = curlwp; 543 544 return l->l_nopreempt != 0 || l->l_stat == LSZOMB || 545 (l->l_flag & LW_IDLE) != 0 || cpu_kpreempt_disabled(); 546 #endif 547 /* XXX: emulate cpu_kpreempt_disabled() */ 548 return true; 549 } 550 551 void 552 suspendsched(void) 553 { 554 555 /* 556 * Could wait until everyone is out and block further entries, 557 * but skip that for now. 558 */ 559 } 560 561 void 562 sched_nice(struct proc *p, int level) 563 { 564 565 /* nothing to do for now */ 566 } 567 568 void 569 setrunnable(struct lwp *l) 570 { 571 572 sched_enqueue(l); 573 } 574 575 void 576 sched_enqueue(struct lwp *l) 577 { 578 579 rump_thread_allow(l); 580 } 581 582 void 583 sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock) 584 { 585 586 } 587 588 void 589 sched_resched_lwp(struct lwp *l, bool unlock) 590 { 591 592 } 593 594 void 595 sched_dequeue(struct lwp *l) 596 { 597 598 panic("sched_dequeue not implemented"); 599 } 600 601 void 602 preempt_point(void) 603 { 604 605 } 606 607 bool 608 preempt_needed(void) 609 { 610 611 return false; 612 } 613