1 /* $NetBSD: scheduler.c,v 1.24 2011/01/11 10:49:20 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.24 2011/01/11 10:49:20 pooka Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/atomic.h> 33 #include <sys/cpu.h> 34 #include <sys/kmem.h> 35 #include <sys/mutex.h> 36 #include <sys/namei.h> 37 #include <sys/queue.h> 38 #include <sys/select.h> 39 #include <sys/systm.h> 40 41 #include <rump/rumpuser.h> 42 43 #include "rump_private.h" 44 45 static struct cpu_info rump_cpus[MAXCPUS]; 46 static struct rumpcpu { 47 /* needed in fastpath */ 48 struct cpu_info *rcpu_ci; 49 void *rcpu_prevlwp; 50 51 /* needed in slowpath */ 52 struct rumpuser_mtx *rcpu_mtx; 53 struct rumpuser_cv *rcpu_cv; 54 int rcpu_wanted; 55 56 /* offset 20 (P=4) or 36 (P=8) here */ 57 58 /* 59 * Some stats. Not really that necessary, but we should 60 * have room. Note that these overflow quite fast, so need 61 * to be collected often. 62 */ 63 unsigned int rcpu_fastpath; 64 unsigned int rcpu_slowpath; 65 unsigned int rcpu_migrated; 66 67 /* offset 32 (P=4) or 50 (P=8) */ 68 69 int rcpu_align[0] __aligned(CACHE_LINE_SIZE); 70 } rcpu_storage[MAXCPUS]; 71 struct cpu_info *rump_cpu = &rump_cpus[0]; 72 int ncpu; 73 74 #define RCPULWP_BUSY ((void *)-1) 75 #define RCPULWP_WANTED ((void *)-2) 76 77 static struct rumpuser_mtx *lwp0mtx; 78 static struct rumpuser_cv *lwp0cv; 79 static unsigned nextcpu; 80 81 static bool lwp0isbusy = false; 82 83 /* 84 * Keep some stats. 85 * 86 * Keeping track of there is not really critical for speed, unless 87 * stats happen to be on a different cache line (CACHE_LINE_SIZE is 88 * really just a coarse estimate), so default for the performant case 89 * (i.e. no stats). 90 */ 91 #ifdef RUMPSCHED_STATS 92 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++; 93 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++; 94 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++; 95 #else 96 #define SCHED_FASTPATH(rcpu) 97 #define SCHED_SLOWPATH(rcpu) 98 #define SCHED_MIGRATED(rcpu) 99 #endif 100 101 struct cpu_info * 102 cpu_lookup(u_int index) 103 { 104 105 return &rump_cpus[index]; 106 } 107 108 static inline struct rumpcpu * 109 getnextcpu(void) 110 { 111 unsigned newcpu; 112 113 newcpu = atomic_inc_uint_nv(&nextcpu); 114 if (__predict_false(ncpu > UINT_MAX/2)) 115 atomic_and_uint(&nextcpu, 0); 116 newcpu = newcpu % ncpu; 117 118 return &rcpu_storage[newcpu]; 119 } 120 121 /* this could/should be mi_attach_cpu? */ 122 void 123 rump_cpus_bootstrap(int *nump) 124 { 125 struct rumpcpu *rcpu; 126 struct cpu_info *ci; 127 int num = *nump; 128 int i; 129 130 if (num > MAXCPUS) { 131 aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) " 132 "available (adjusted)\n", num, MAXCPUS); 133 num = MAXCPUS; 134 } 135 136 for (i = 0; i < num; i++) { 137 rcpu = &rcpu_storage[i]; 138 ci = &rump_cpus[i]; 139 ci->ci_index = i; 140 } 141 142 /* attach first cpu for bootstrap */ 143 rump_cpu_attach(&rump_cpus[0]); 144 ncpu = 1; 145 *nump = num; 146 } 147 148 void 149 rump_scheduler_init(int numcpu) 150 { 151 struct rumpcpu *rcpu; 152 struct cpu_info *ci; 153 int i; 154 155 rumpuser_mutex_init(&lwp0mtx); 156 rumpuser_cv_init(&lwp0cv); 157 for (i = 0; i < numcpu; i++) { 158 rcpu = &rcpu_storage[i]; 159 ci = &rump_cpus[i]; 160 rcpu->rcpu_ci = ci; 161 ci->ci_schedstate.spc_mutex = 162 mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 163 ci->ci_schedstate.spc_flags = SPCF_RUNNING; 164 rcpu->rcpu_wanted = 0; 165 rumpuser_cv_init(&rcpu->rcpu_cv); 166 rumpuser_mutex_init(&rcpu->rcpu_mtx); 167 } 168 } 169 170 /* 171 * condvar ops using scheduler lock as the rumpuser interlock. 172 */ 173 void 174 rump_schedlock_cv_wait(struct rumpuser_cv *cv) 175 { 176 struct lwp *l = curlwp; 177 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 178 179 /* mutex will be taken and released in cpu schedule/unschedule */ 180 rumpuser_cv_wait(cv, rcpu->rcpu_mtx); 181 } 182 183 int 184 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts) 185 { 186 struct lwp *l = curlwp; 187 struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]]; 188 189 /* mutex will be taken and released in cpu schedule/unschedule */ 190 return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx, 191 ts->tv_sec, ts->tv_nsec); 192 } 193 194 static void 195 lwp0busy(void) 196 { 197 198 /* busy lwp0 */ 199 KASSERT(curlwp == NULL || curlwp->l_cpu == NULL); 200 rumpuser_mutex_enter_nowrap(lwp0mtx); 201 while (lwp0isbusy) 202 rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx); 203 lwp0isbusy = true; 204 rumpuser_mutex_exit(lwp0mtx); 205 } 206 207 static void 208 lwp0rele(void) 209 { 210 211 rumpuser_mutex_enter_nowrap(lwp0mtx); 212 KASSERT(lwp0isbusy == true); 213 lwp0isbusy = false; 214 rumpuser_cv_signal(lwp0cv); 215 rumpuser_mutex_exit(lwp0mtx); 216 } 217 218 void 219 rump_schedule() 220 { 221 struct lwp *l; 222 223 /* 224 * If there is no dedicated lwp, allocate a temp one and 225 * set it to be free'd upon unschedule(). Use lwp0 context 226 * for reserving the necessary resources. Don't optimize 227 * for this case -- anyone who cares about performance will 228 * start a real thread. 229 */ 230 if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) { 231 rump_schedule_cpu(l); 232 LWP_CACHE_CREDS(l, l->l_proc); 233 } else { 234 lwp0busy(); 235 236 /* schedule cpu and use lwp0 */ 237 rump_schedule_cpu(&lwp0); 238 rumpuser_set_curlwp(&lwp0); 239 240 /* allocate thread, switch to it, and release lwp0 */ 241 l = rump__lwproc_alloclwp(initproc); 242 rump_lwproc_switch(l); 243 lwp0rele(); 244 245 /* 246 * mark new thread dead-on-unschedule. this 247 * means that we'll be running with l_refcnt == 0. 248 * relax, it's fine. 249 */ 250 rump_lwproc_releaselwp(); 251 } 252 } 253 254 void 255 rump_schedule_cpu(struct lwp *l) 256 { 257 258 rump_schedule_cpu_interlock(l, NULL); 259 } 260 261 /* 262 * Schedule a CPU. This optimizes for the case where we schedule 263 * the same thread often, and we have nCPU >= nFrequently-Running-Thread 264 * (where CPU is virtual rump cpu, not host CPU). 265 */ 266 void 267 rump_schedule_cpu_interlock(struct lwp *l, void *interlock) 268 { 269 struct rumpcpu *rcpu; 270 void *old; 271 bool domigrate; 272 bool bound = l->l_pflag & LP_BOUND; 273 274 /* 275 * First, try fastpath: if we were the previous user of the 276 * CPU, everything is in order cachewise and we can just 277 * proceed to use it. 278 * 279 * If we are a different thread (i.e. CAS fails), we must go 280 * through a memory barrier to ensure we get a truthful 281 * view of the world. 282 */ 283 284 KASSERT(l->l_target_cpu != NULL); 285 rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]]; 286 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) { 287 if (__predict_true(interlock == rcpu->rcpu_mtx)) 288 rumpuser_mutex_exit(rcpu->rcpu_mtx); 289 SCHED_FASTPATH(rcpu); 290 /* jones, you're the man */ 291 goto fastlane; 292 } 293 294 /* 295 * Else, it's the slowpath for us. First, determine if we 296 * can migrate. 297 */ 298 if (ncpu == 1) 299 domigrate = false; 300 else 301 domigrate = true; 302 303 /* Take lock. This acts as a load barrier too. */ 304 if (__predict_true(interlock != rcpu->rcpu_mtx)) 305 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 306 307 for (;;) { 308 SCHED_SLOWPATH(rcpu); 309 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED); 310 311 /* CPU is free? */ 312 if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) { 313 if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, 314 RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) { 315 break; 316 } 317 } 318 319 /* 320 * Do we want to migrate once? 321 * This may need a slightly better algorithm, or we 322 * might cache pingpong eternally for non-frequent 323 * threads. 324 */ 325 if (domigrate && !bound) { 326 domigrate = false; 327 SCHED_MIGRATED(rcpu); 328 rumpuser_mutex_exit(rcpu->rcpu_mtx); 329 rcpu = getnextcpu(); 330 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 331 continue; 332 } 333 334 /* Want CPU, wait until it's released an retry */ 335 rcpu->rcpu_wanted++; 336 rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx); 337 rcpu->rcpu_wanted--; 338 } 339 rumpuser_mutex_exit(rcpu->rcpu_mtx); 340 341 fastlane: 342 l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci; 343 l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex; 344 l->l_ncsw++; 345 346 rcpu->rcpu_ci->ci_curlwp = l; 347 } 348 349 void 350 rump_unschedule() 351 { 352 struct lwp *l = rumpuser_get_curlwp(); 353 #ifdef DIAGNOSTIC 354 int nlock; 355 356 KERNEL_UNLOCK_ALL(l, &nlock); 357 KASSERT(nlock == 0); 358 #endif 359 360 KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex); 361 rump_unschedule_cpu(l); 362 l->l_mutex = NULL; 363 364 /* 365 * Check special conditions: 366 * 1) do we need to free the lwp which just unscheduled? 367 * (locking order: lwp0, cpu) 368 * 2) do we want to clear curlwp for the current host thread 369 */ 370 if (__predict_false(l->l_flag & LW_WEXIT)) { 371 lwp0busy(); 372 373 /* Now that we have lwp0, we can schedule a CPU again */ 374 rump_schedule_cpu(l); 375 376 /* switch to lwp0. this frees the old thread */ 377 KASSERT(l->l_flag & LW_WEXIT); 378 rump_lwproc_switch(&lwp0); 379 380 /* release lwp0 */ 381 rump_unschedule_cpu(&lwp0); 382 lwp0.l_mutex = NULL; 383 lwp0.l_pflag &= ~LP_RUNNING; 384 lwp0rele(); 385 rumpuser_set_curlwp(NULL); 386 387 } else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) { 388 rumpuser_set_curlwp(NULL); 389 l->l_flag &= ~LW_RUMP_CLEAR; 390 } 391 } 392 393 void 394 rump_unschedule_cpu(struct lwp *l) 395 { 396 397 rump_unschedule_cpu_interlock(l, NULL); 398 } 399 400 void 401 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock) 402 { 403 404 if ((l->l_pflag & LP_INTR) == 0) 405 rump_softint_run(l->l_cpu); 406 rump_unschedule_cpu1(l, interlock); 407 } 408 409 void 410 rump_unschedule_cpu1(struct lwp *l, void *interlock) 411 { 412 struct rumpcpu *rcpu; 413 struct cpu_info *ci; 414 void *old; 415 416 ci = l->l_cpu; 417 ci->ci_curlwp = NULL; 418 l->l_cpu = NULL; 419 rcpu = &rcpu_storage[ci-&rump_cpus[0]]; 420 421 KASSERT(rcpu->rcpu_ci == ci); 422 423 /* 424 * Make sure all stores are seen before the CPU release. This 425 * is relevant only in the non-fastpath scheduling case, but 426 * we don't know here if that's going to happen, so need to 427 * expect the worst. 428 */ 429 membar_exit(); 430 431 /* Release the CPU. */ 432 old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); 433 434 /* No waiters? No problems. We're outta here. */ 435 if (old == RCPULWP_BUSY) { 436 /* Was the scheduler interlock requested? */ 437 if (__predict_false(interlock == rcpu->rcpu_mtx)) 438 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 439 return; 440 } 441 442 KASSERT(old == RCPULWP_WANTED); 443 444 /* 445 * Ok, things weren't so snappy. 446 * 447 * Snailpath: take lock and signal anyone waiting for this CPU. 448 */ 449 450 rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); 451 if (rcpu->rcpu_wanted) 452 rumpuser_cv_broadcast(rcpu->rcpu_cv); 453 454 if (__predict_true(interlock != rcpu->rcpu_mtx)) 455 rumpuser_mutex_exit(rcpu->rcpu_mtx); 456 } 457 458 /* Give up and retake CPU (perhaps a different one) */ 459 void 460 yield() 461 { 462 struct lwp *l = curlwp; 463 int nlocks; 464 465 KERNEL_UNLOCK_ALL(l, &nlocks); 466 rump_unschedule_cpu(l); 467 rump_schedule_cpu(l); 468 KERNEL_LOCK(nlocks, l); 469 } 470 471 void 472 preempt() 473 { 474 475 yield(); 476 } 477 478 bool 479 kpreempt(uintptr_t where) 480 { 481 482 return false; 483 } 484 485 /* 486 * There is no kernel thread preemption in rump currently. But call 487 * the implementing macros anyway in case they grow some side-effects 488 * down the road. 489 */ 490 void 491 kpreempt_disable(void) 492 { 493 494 KPREEMPT_DISABLE(curlwp); 495 } 496 497 void 498 kpreempt_enable(void) 499 { 500 501 KPREEMPT_ENABLE(curlwp); 502 } 503 504 void 505 suspendsched(void) 506 { 507 508 /* 509 * Could wait until everyone is out and block further entries, 510 * but skip that for now. 511 */ 512 } 513 514 void 515 sched_nice(struct proc *p, int level) 516 { 517 518 /* nothing to do for now */ 519 } 520