1 /* $NetBSD: kern_runq.c,v 1.28 2009/12/30 23:49:59 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.28 2009/12/30 23:49:59 rmind Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/bitops.h> 35 #include <sys/cpu.h> 36 #include <sys/idle.h> 37 #include <sys/intr.h> 38 #include <sys/kmem.h> 39 #include <sys/lwp.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/sched.h> 43 #include <sys/syscallargs.h> 44 #include <sys/sysctl.h> 45 #include <sys/systm.h> 46 #include <sys/types.h> 47 #include <sys/evcnt.h> 48 49 /* 50 * Priority related defintions. 51 */ 52 #define PRI_TS_COUNT (NPRI_USER) 53 #define PRI_RT_COUNT (PRI_COUNT - PRI_TS_COUNT) 54 #define PRI_HTS_RANGE (PRI_TS_COUNT / 10) 55 56 #define PRI_HIGHEST_TS (MAXPRI_USER) 57 58 /* 59 * Bits per map. 60 */ 61 #define BITMAP_BITS (32) 62 #define BITMAP_SHIFT (5) 63 #define BITMAP_MSB (0x80000000U) 64 #define BITMAP_MASK (BITMAP_BITS - 1) 65 66 /* 67 * Structures, runqueue. 68 */ 69 70 const int schedppq = 1; 71 72 typedef struct { 73 TAILQ_HEAD(, lwp) q_head; 74 } queue_t; 75 76 typedef struct { 77 /* Bitmap */ 78 uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT]; 79 /* Counters */ 80 u_int r_count; /* Count of the threads */ 81 u_int r_avgcount; /* Average count of threads */ 82 u_int r_mcount; /* Count of migratable threads */ 83 /* Runqueues */ 84 queue_t r_rt_queue[PRI_RT_COUNT]; 85 queue_t r_ts_queue[PRI_TS_COUNT]; 86 /* Event counters */ 87 struct evcnt r_ev_pull; 88 struct evcnt r_ev_push; 89 struct evcnt r_ev_stay; 90 struct evcnt r_ev_localize; 91 } runqueue_t; 92 93 static void * sched_getrq(runqueue_t *, const pri_t); 94 #ifdef MULTIPROCESSOR 95 static lwp_t * sched_catchlwp(struct cpu_info *); 96 static void sched_balance(void *); 97 #endif 98 99 /* 100 * Preemption control. 101 */ 102 int sched_upreempt_pri = PRI_KERNEL; 103 #ifdef __HAVE_PREEMPTION 104 # ifdef DEBUG 105 int sched_kpreempt_pri = 0; 106 # else 107 int sched_kpreempt_pri = PRI_USER_RT; 108 # endif 109 #else 110 int sched_kpreempt_pri = 1000; 111 #endif 112 113 /* 114 * Migration and balancing. 115 */ 116 static u_int cacheht_time; /* Cache hotness time */ 117 static u_int min_catch; /* Minimal LWP count for catching */ 118 static u_int balance_period; /* Balance period */ 119 static struct cpu_info *worker_ci; /* Victim CPU */ 120 #ifdef MULTIPROCESSOR 121 static struct callout balance_ch; /* Callout of balancer */ 122 #endif 123 124 void 125 runq_init(void) 126 { 127 128 /* Balancing */ 129 worker_ci = curcpu(); 130 cacheht_time = mstohz(3); /* ~3 ms */ 131 balance_period = mstohz(300); /* ~300 ms */ 132 133 /* Minimal count of LWPs for catching */ 134 min_catch = 1; 135 136 /* Initialize balancing callout and run it */ 137 #ifdef MULTIPROCESSOR 138 callout_init(&balance_ch, CALLOUT_MPSAFE); 139 callout_setfunc(&balance_ch, sched_balance, NULL); 140 callout_schedule(&balance_ch, balance_period); 141 #endif 142 } 143 144 void 145 sched_cpuattach(struct cpu_info *ci) 146 { 147 runqueue_t *ci_rq; 148 void *rq_ptr; 149 u_int i, size; 150 char *cpuname; 151 152 if (ci->ci_schedstate.spc_lwplock == NULL) { 153 ci->ci_schedstate.spc_lwplock = 154 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 155 } 156 if (ci == lwp0.l_cpu) { 157 /* Initialize the scheduler structure of the primary LWP */ 158 lwp0.l_mutex = ci->ci_schedstate.spc_lwplock; 159 } 160 if (ci->ci_schedstate.spc_mutex != NULL) { 161 /* Already initialized. */ 162 return; 163 } 164 165 /* Allocate the run queue */ 166 size = roundup2(sizeof(runqueue_t), coherency_unit) + coherency_unit; 167 rq_ptr = kmem_zalloc(size, KM_SLEEP); 168 if (rq_ptr == NULL) { 169 panic("sched_cpuattach: could not allocate the runqueue"); 170 } 171 ci_rq = (void *)(roundup2((uintptr_t)(rq_ptr), coherency_unit)); 172 173 /* Initialize run queues */ 174 ci->ci_schedstate.spc_mutex = 175 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 176 for (i = 0; i < PRI_RT_COUNT; i++) 177 TAILQ_INIT(&ci_rq->r_rt_queue[i].q_head); 178 for (i = 0; i < PRI_TS_COUNT; i++) 179 TAILQ_INIT(&ci_rq->r_ts_queue[i].q_head); 180 181 ci->ci_schedstate.spc_sched_info = ci_rq; 182 183 cpuname = kmem_alloc(8, KM_SLEEP); 184 snprintf(cpuname, 8, "cpu%d", cpu_index(ci)); 185 186 evcnt_attach_dynamic(&ci_rq->r_ev_pull, EVCNT_TYPE_MISC, NULL, 187 cpuname, "runqueue pull"); 188 evcnt_attach_dynamic(&ci_rq->r_ev_push, EVCNT_TYPE_MISC, NULL, 189 cpuname, "runqueue push"); 190 evcnt_attach_dynamic(&ci_rq->r_ev_stay, EVCNT_TYPE_MISC, NULL, 191 cpuname, "runqueue stay"); 192 evcnt_attach_dynamic(&ci_rq->r_ev_localize, EVCNT_TYPE_MISC, NULL, 193 cpuname, "runqueue localize"); 194 } 195 196 /* 197 * Control of the runqueue. 198 */ 199 200 static inline void * 201 sched_getrq(runqueue_t *ci_rq, const pri_t prio) 202 { 203 204 KASSERT(prio < PRI_COUNT); 205 return (prio <= PRI_HIGHEST_TS) ? 206 &ci_rq->r_ts_queue[prio].q_head : 207 &ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - 1].q_head; 208 } 209 210 void 211 sched_enqueue(struct lwp *l, bool swtch) 212 { 213 runqueue_t *ci_rq; 214 struct schedstate_percpu *spc; 215 TAILQ_HEAD(, lwp) *q_head; 216 const pri_t eprio = lwp_eprio(l); 217 struct cpu_info *ci; 218 int type; 219 220 ci = l->l_cpu; 221 spc = &ci->ci_schedstate; 222 ci_rq = spc->spc_sched_info; 223 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); 224 225 /* Update the last run time on switch */ 226 if (__predict_true(swtch == true)) 227 l->l_rticksum += (hardclock_ticks - l->l_rticks); 228 else if (l->l_rticks == 0) 229 l->l_rticks = hardclock_ticks; 230 231 /* Enqueue the thread */ 232 q_head = sched_getrq(ci_rq, eprio); 233 if (TAILQ_EMPTY(q_head)) { 234 u_int i; 235 uint32_t q; 236 237 /* Mark bit */ 238 i = eprio >> BITMAP_SHIFT; 239 q = BITMAP_MSB >> (eprio & BITMAP_MASK); 240 KASSERT((ci_rq->r_bitmap[i] & q) == 0); 241 ci_rq->r_bitmap[i] |= q; 242 } 243 TAILQ_INSERT_TAIL(q_head, l, l_runq); 244 ci_rq->r_count++; 245 if ((l->l_pflag & LP_BOUND) == 0) 246 ci_rq->r_mcount++; 247 248 /* 249 * Update the value of highest priority in the runqueue, 250 * if priority of this thread is higher. 251 */ 252 if (eprio > spc->spc_maxpriority) 253 spc->spc_maxpriority = eprio; 254 255 sched_newts(l); 256 257 /* 258 * Wake the chosen CPU or cause a preemption if the newly 259 * enqueued thread has higher priority. Don't cause a 260 * preemption if the thread is yielding (swtch). 261 */ 262 if (!swtch && eprio > spc->spc_curpriority) { 263 if (eprio >= sched_kpreempt_pri) 264 type = RESCHED_KPREEMPT; 265 else if (eprio >= sched_upreempt_pri) 266 type = RESCHED_IMMED; 267 else 268 type = RESCHED_LAZY; 269 cpu_need_resched(ci, type); 270 } 271 } 272 273 void 274 sched_dequeue(struct lwp *l) 275 { 276 runqueue_t *ci_rq; 277 TAILQ_HEAD(, lwp) *q_head; 278 struct schedstate_percpu *spc; 279 const pri_t eprio = lwp_eprio(l); 280 281 spc = & l->l_cpu->ci_schedstate; 282 ci_rq = spc->spc_sched_info; 283 KASSERT(lwp_locked(l, spc->spc_mutex)); 284 285 KASSERT(eprio <= spc->spc_maxpriority); 286 KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != 0); 287 KASSERT(ci_rq->r_count > 0); 288 289 if (spc->spc_migrating == l) 290 spc->spc_migrating = NULL; 291 292 ci_rq->r_count--; 293 if ((l->l_pflag & LP_BOUND) == 0) 294 ci_rq->r_mcount--; 295 296 q_head = sched_getrq(ci_rq, eprio); 297 TAILQ_REMOVE(q_head, l, l_runq); 298 if (TAILQ_EMPTY(q_head)) { 299 u_int i; 300 uint32_t q; 301 302 /* Unmark bit */ 303 i = eprio >> BITMAP_SHIFT; 304 q = BITMAP_MSB >> (eprio & BITMAP_MASK); 305 KASSERT((ci_rq->r_bitmap[i] & q) != 0); 306 ci_rq->r_bitmap[i] &= ~q; 307 308 /* 309 * Update the value of highest priority in the runqueue, in a 310 * case it was a last thread in the queue of highest priority. 311 */ 312 if (eprio != spc->spc_maxpriority) 313 return; 314 315 do { 316 if (ci_rq->r_bitmap[i] != 0) { 317 q = ffs(ci_rq->r_bitmap[i]); 318 spc->spc_maxpriority = 319 (i << BITMAP_SHIFT) + (BITMAP_BITS - q); 320 return; 321 } 322 } while (i--); 323 324 /* If not found - set the lowest value */ 325 spc->spc_maxpriority = 0; 326 } 327 } 328 329 /* 330 * Migration and balancing. 331 */ 332 333 #ifdef MULTIPROCESSOR 334 335 /* Estimate if LWP is cache-hot */ 336 static inline bool 337 lwp_cache_hot(const struct lwp *l) 338 { 339 340 if (__predict_false(l->l_slptime || l->l_rticks == 0)) 341 return false; 342 343 return (hardclock_ticks - l->l_rticks <= cacheht_time); 344 } 345 346 /* Check if LWP can migrate to the chosen CPU */ 347 static inline bool 348 sched_migratable(const struct lwp *l, struct cpu_info *ci) 349 { 350 const struct schedstate_percpu *spc = &ci->ci_schedstate; 351 KASSERT(lwp_locked(__UNCONST(l), NULL)); 352 353 /* CPU is offline */ 354 if (__predict_false(spc->spc_flags & SPCF_OFFLINE)) 355 return false; 356 357 /* Affinity bind */ 358 if (__predict_false(l->l_flag & LW_AFFINITY)) 359 return kcpuset_isset(cpu_index(ci), l->l_affinity); 360 361 /* Processor-set */ 362 return (spc->spc_psid == l->l_psid); 363 } 364 365 /* 366 * Estimate the migration of LWP to the other CPU. 367 * Take and return the CPU, if migration is needed. 368 */ 369 struct cpu_info * 370 sched_takecpu(struct lwp *l) 371 { 372 struct cpu_info *ci, *tci, *first, *next; 373 struct schedstate_percpu *spc; 374 runqueue_t *ci_rq, *ici_rq; 375 pri_t eprio, lpri, pri; 376 377 KASSERT(lwp_locked(l, NULL)); 378 379 /* If thread is strictly bound, do not estimate other CPUs */ 380 ci = l->l_cpu; 381 if (l->l_pflag & LP_BOUND) 382 return ci; 383 384 spc = &ci->ci_schedstate; 385 ci_rq = spc->spc_sched_info; 386 387 /* Make sure that thread is in appropriate processor-set */ 388 if (__predict_true(spc->spc_psid == l->l_psid)) { 389 /* If CPU of this thread is idling - run there */ 390 if (ci_rq->r_count == 0) { 391 ci_rq->r_ev_stay.ev_count++; 392 return ci; 393 } 394 /* Stay if thread is cache-hot */ 395 eprio = lwp_eprio(l); 396 if (__predict_true(l->l_stat != LSIDL) && 397 lwp_cache_hot(l) && eprio >= spc->spc_curpriority) { 398 ci_rq->r_ev_stay.ev_count++; 399 return ci; 400 } 401 } else { 402 eprio = lwp_eprio(l); 403 } 404 405 /* Run on current CPU if priority of thread is higher */ 406 ci = curcpu(); 407 spc = &ci->ci_schedstate; 408 if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) { 409 ci_rq = spc->spc_sched_info; 410 ci_rq->r_ev_localize.ev_count++; 411 return ci; 412 } 413 414 /* 415 * Look for the CPU with the lowest priority thread. In case of 416 * equal priority, choose the CPU with the fewest of threads. 417 */ 418 first = l->l_cpu; 419 ci = first; 420 tci = first; 421 lpri = PRI_COUNT; 422 do { 423 next = CIRCLEQ_LOOP_NEXT(&cpu_queue, ci, ci_data.cpu_qchain); 424 spc = &ci->ci_schedstate; 425 ici_rq = spc->spc_sched_info; 426 pri = max(spc->spc_curpriority, spc->spc_maxpriority); 427 if (pri > lpri) 428 continue; 429 430 if (pri == lpri && ci_rq->r_count < ici_rq->r_count) 431 continue; 432 433 if (!sched_migratable(l, ci)) 434 continue; 435 436 lpri = pri; 437 tci = ci; 438 ci_rq = ici_rq; 439 } while (ci = next, ci != first); 440 441 ci_rq = tci->ci_schedstate.spc_sched_info; 442 ci_rq->r_ev_push.ev_count++; 443 444 return tci; 445 } 446 447 /* 448 * Tries to catch an LWP from the runqueue of other CPU. 449 */ 450 static struct lwp * 451 sched_catchlwp(struct cpu_info *ci) 452 { 453 struct cpu_info *curci = curcpu(); 454 struct schedstate_percpu *spc, *curspc; 455 TAILQ_HEAD(, lwp) *q_head; 456 runqueue_t *ci_rq; 457 struct lwp *l; 458 459 curspc = &curci->ci_schedstate; 460 spc = &ci->ci_schedstate; 461 KASSERT(curspc->spc_psid == spc->spc_psid); 462 463 ci_rq = spc->spc_sched_info; 464 if (ci_rq->r_mcount < min_catch) { 465 spc_unlock(ci); 466 return NULL; 467 } 468 469 /* Take the highest priority thread */ 470 q_head = sched_getrq(ci_rq, spc->spc_maxpriority); 471 l = TAILQ_FIRST(q_head); 472 473 for (;;) { 474 /* Check the first and next result from the queue */ 475 if (l == NULL) { 476 break; 477 } 478 KASSERT(l->l_stat == LSRUN); 479 480 /* Look for threads, whose are allowed to migrate */ 481 if ((l->l_pflag & LP_BOUND) || lwp_cache_hot(l) || 482 !sched_migratable(l, curci)) { 483 l = TAILQ_NEXT(l, l_runq); 484 continue; 485 } 486 487 /* Grab the thread, and move to the local run queue */ 488 sched_dequeue(l); 489 490 /* 491 * If LWP is still context switching, we may need to 492 * spin-wait before changing its CPU. 493 */ 494 if (__predict_false(l->l_ctxswtch != 0)) { 495 u_int count; 496 count = SPINLOCK_BACKOFF_MIN; 497 while (l->l_ctxswtch) 498 SPINLOCK_BACKOFF(count); 499 } 500 l->l_cpu = curci; 501 ci_rq->r_ev_pull.ev_count++; 502 lwp_unlock_to(l, curspc->spc_mutex); 503 sched_enqueue(l, false); 504 return l; 505 } 506 spc_unlock(ci); 507 508 return l; 509 } 510 511 /* 512 * Periodical calculations for balancing. 513 */ 514 static void 515 sched_balance(void *nocallout) 516 { 517 struct cpu_info *ci, *hci; 518 runqueue_t *ci_rq; 519 CPU_INFO_ITERATOR cii; 520 u_int highest; 521 522 hci = curcpu(); 523 highest = 0; 524 525 /* Make lockless countings */ 526 for (CPU_INFO_FOREACH(cii, ci)) { 527 ci_rq = ci->ci_schedstate.spc_sched_info; 528 529 /* Average count of the threads */ 530 ci_rq->r_avgcount = (ci_rq->r_avgcount + ci_rq->r_mcount) >> 1; 531 532 /* Look for CPU with the highest average */ 533 if (ci_rq->r_avgcount > highest) { 534 hci = ci; 535 highest = ci_rq->r_avgcount; 536 } 537 } 538 539 /* Update the worker */ 540 worker_ci = hci; 541 542 if (nocallout == NULL) 543 callout_schedule(&balance_ch, balance_period); 544 } 545 546 /* 547 * Called from each CPU's idle loop. 548 */ 549 void 550 sched_idle(void) 551 { 552 struct cpu_info *ci = curcpu(), *tci = NULL; 553 struct schedstate_percpu *spc, *tspc; 554 runqueue_t *ci_rq; 555 bool dlock = false; 556 557 /* Check if there is a migrating LWP */ 558 spc = &ci->ci_schedstate; 559 if (spc->spc_migrating == NULL) 560 goto no_migration; 561 562 spc_lock(ci); 563 for (;;) { 564 struct lwp *l; 565 566 l = spc->spc_migrating; 567 if (l == NULL) 568 break; 569 570 /* 571 * If second attempt, and target CPU has changed, 572 * drop the old lock. 573 */ 574 if (dlock == true && tci != l->l_target_cpu) { 575 KASSERT(tci != NULL); 576 spc_unlock(tci); 577 dlock = false; 578 } 579 580 /* 581 * Nothing to do if destination has changed to the 582 * local CPU, or migration was done by other CPU. 583 */ 584 tci = l->l_target_cpu; 585 if (tci == NULL || tci == ci) { 586 spc->spc_migrating = NULL; 587 l->l_target_cpu = NULL; 588 break; 589 } 590 tspc = &tci->ci_schedstate; 591 592 /* 593 * Double-lock the runqueues. 594 * We do that only once. 595 */ 596 if (dlock == false) { 597 dlock = true; 598 if (ci < tci) { 599 spc_lock(tci); 600 } else if (!mutex_tryenter(tspc->spc_mutex)) { 601 spc_unlock(ci); 602 spc_lock(tci); 603 spc_lock(ci); 604 /* Check the situation again.. */ 605 continue; 606 } 607 } 608 609 /* Migrate the thread */ 610 KASSERT(l->l_stat == LSRUN); 611 spc->spc_migrating = NULL; 612 l->l_target_cpu = NULL; 613 sched_dequeue(l); 614 l->l_cpu = tci; 615 lwp_setlock(l, tspc->spc_mutex); 616 sched_enqueue(l, false); 617 break; 618 } 619 if (dlock == true) { 620 KASSERT(tci != NULL); 621 spc_unlock(tci); 622 } 623 spc_unlock(ci); 624 625 no_migration: 626 ci_rq = spc->spc_sched_info; 627 if ((spc->spc_flags & SPCF_OFFLINE) != 0 || ci_rq->r_count != 0) { 628 return; 629 } 630 631 /* Reset the counter, and call the balancer */ 632 ci_rq->r_avgcount = 0; 633 sched_balance(ci); 634 tci = worker_ci; 635 tspc = &tci->ci_schedstate; 636 if (ci == tci || spc->spc_psid != tspc->spc_psid) 637 return; 638 spc_dlock(ci, tci); 639 (void)sched_catchlwp(tci); 640 spc_unlock(ci); 641 } 642 643 #else 644 645 struct cpu_info * 646 sched_takecpu(struct lwp *l) 647 { 648 649 return l->l_cpu; 650 } 651 652 void 653 sched_idle(void) 654 { 655 656 } 657 #endif /* MULTIPROCESSOR */ 658 659 /* 660 * Scheduling statistics and balancing. 661 */ 662 void 663 sched_lwp_stats(struct lwp *l) 664 { 665 int batch; 666 667 KASSERT(lwp_locked(l, NULL)); 668 669 /* Update sleep time */ 670 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP || 671 l->l_stat == LSSUSPENDED) 672 l->l_slptime++; 673 674 /* 675 * Set that thread is more CPU-bound, if sum of run time exceeds the 676 * sum of sleep time. Check if thread is CPU-bound a first time. 677 */ 678 batch = (l->l_rticksum > l->l_slpticksum); 679 if (batch != 0) { 680 if ((l->l_flag & LW_BATCH) == 0) 681 batch = 0; 682 l->l_flag |= LW_BATCH; 683 } else 684 l->l_flag &= ~LW_BATCH; 685 686 /* 687 * If thread is CPU-bound and never sleeps, it would occupy the CPU. 688 * In such case reset the value of last sleep, and check it later, if 689 * it is still zero - perform the migration, unmark the batch flag. 690 */ 691 if (batch && (l->l_slptime + l->l_slpticksum) == 0) { 692 if (l->l_slpticks == 0) { 693 if (l->l_target_cpu == NULL && 694 (l->l_stat == LSRUN || l->l_stat == LSONPROC)) { 695 struct cpu_info *ci = sched_takecpu(l); 696 l->l_target_cpu = (ci != l->l_cpu) ? ci : NULL; 697 } 698 l->l_flag &= ~LW_BATCH; 699 } else { 700 l->l_slpticks = 0; 701 } 702 } 703 704 /* Reset the time sums */ 705 l->l_slpticksum = 0; 706 l->l_rticksum = 0; 707 708 /* Scheduler-specific hook */ 709 sched_pstats_hook(l, batch); 710 } 711 712 /* 713 * Scheduler mill. 714 */ 715 struct lwp * 716 sched_nextlwp(void) 717 { 718 struct cpu_info *ci = curcpu(); 719 struct schedstate_percpu *spc; 720 TAILQ_HEAD(, lwp) *q_head; 721 runqueue_t *ci_rq; 722 struct lwp *l; 723 724 /* Return to idle LWP if there is a migrating thread */ 725 spc = &ci->ci_schedstate; 726 if (__predict_false(spc->spc_migrating != NULL)) 727 return NULL; 728 ci_rq = spc->spc_sched_info; 729 730 #ifdef MULTIPROCESSOR 731 /* If runqueue is empty, try to catch some thread from other CPU */ 732 if (__predict_false(ci_rq->r_count == 0)) { 733 struct schedstate_percpu *cspc; 734 struct cpu_info *cci; 735 736 /* Offline CPUs should not perform this, however */ 737 if (__predict_false(spc->spc_flags & SPCF_OFFLINE)) 738 return NULL; 739 740 /* Reset the counter, and call the balancer */ 741 ci_rq->r_avgcount = 0; 742 sched_balance(ci); 743 cci = worker_ci; 744 cspc = &cci->ci_schedstate; 745 if (ci == cci || spc->spc_psid != cspc->spc_psid || 746 !mutex_tryenter(cci->ci_schedstate.spc_mutex)) 747 return NULL; 748 return sched_catchlwp(cci); 749 } 750 #else 751 if (__predict_false(ci_rq->r_count == 0)) 752 return NULL; 753 #endif 754 755 /* Take the highest priority thread */ 756 KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]); 757 q_head = sched_getrq(ci_rq, spc->spc_maxpriority); 758 l = TAILQ_FIRST(q_head); 759 KASSERT(l != NULL); 760 761 sched_oncpu(l); 762 l->l_rticks = hardclock_ticks; 763 764 return l; 765 } 766 767 bool 768 sched_curcpu_runnable_p(void) 769 { 770 const struct cpu_info *ci; 771 const struct schedstate_percpu *spc; 772 const runqueue_t *ci_rq; 773 bool rv; 774 775 kpreempt_disable(); 776 ci = curcpu(); 777 spc = &ci->ci_schedstate; 778 ci_rq = spc->spc_sched_info; 779 780 #ifndef __HAVE_FAST_SOFTINTS 781 if (ci->ci_data.cpu_softints) { 782 kpreempt_enable(); 783 return true; 784 } 785 #endif 786 787 rv = (ci_rq->r_count != 0) ? true : false; 788 kpreempt_enable(); 789 790 return rv; 791 } 792 793 /* 794 * Sysctl nodes and initialization. 795 */ 796 797 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 798 { 799 const struct sysctlnode *node = NULL; 800 801 sysctl_createv(clog, 0, NULL, NULL, 802 CTLFLAG_PERMANENT, 803 CTLTYPE_NODE, "kern", NULL, 804 NULL, 0, NULL, 0, 805 CTL_KERN, CTL_EOL); 806 sysctl_createv(clog, 0, NULL, &node, 807 CTLFLAG_PERMANENT, 808 CTLTYPE_NODE, "sched", 809 SYSCTL_DESCR("Scheduler options"), 810 NULL, 0, NULL, 0, 811 CTL_KERN, CTL_CREATE, CTL_EOL); 812 813 if (node == NULL) 814 return; 815 816 sysctl_createv(clog, 0, &node, NULL, 817 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 818 CTLTYPE_INT, "cacheht_time", 819 SYSCTL_DESCR("Cache hotness time (in ticks)"), 820 NULL, 0, &cacheht_time, 0, 821 CTL_CREATE, CTL_EOL); 822 sysctl_createv(clog, 0, &node, NULL, 823 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 824 CTLTYPE_INT, "balance_period", 825 SYSCTL_DESCR("Balance period (in ticks)"), 826 NULL, 0, &balance_period, 0, 827 CTL_CREATE, CTL_EOL); 828 sysctl_createv(clog, 0, &node, NULL, 829 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 830 CTLTYPE_INT, "min_catch", 831 SYSCTL_DESCR("Minimal count of threads for catching"), 832 NULL, 0, &min_catch, 0, 833 CTL_CREATE, CTL_EOL); 834 sysctl_createv(clog, 0, &node, NULL, 835 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 836 CTLTYPE_INT, "timesoftints", 837 SYSCTL_DESCR("Track CPU time for soft interrupts"), 838 NULL, 0, &softint_timing, 0, 839 CTL_CREATE, CTL_EOL); 840 sysctl_createv(clog, 0, &node, NULL, 841 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 842 CTLTYPE_INT, "kpreempt_pri", 843 SYSCTL_DESCR("Minimum priority to trigger kernel preemption"), 844 NULL, 0, &sched_kpreempt_pri, 0, 845 CTL_CREATE, CTL_EOL); 846 sysctl_createv(clog, 0, &node, NULL, 847 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 848 CTLTYPE_INT, "upreempt_pri", 849 SYSCTL_DESCR("Minimum priority to trigger user preemption"), 850 NULL, 0, &sched_upreempt_pri, 0, 851 CTL_CREATE, CTL_EOL); 852 } 853 854 /* 855 * Debugging. 856 */ 857 858 #ifdef DDB 859 860 void 861 sched_print_runqueue(void (*pr)(const char *, ...) 862 __attribute__((__format__(__printf__,1,2)))) 863 { 864 runqueue_t *ci_rq; 865 struct cpu_info *ci, *tci; 866 struct schedstate_percpu *spc; 867 struct lwp *l; 868 struct proc *p; 869 CPU_INFO_ITERATOR cii; 870 871 for (CPU_INFO_FOREACH(cii, ci)) { 872 int i; 873 874 spc = &ci->ci_schedstate; 875 ci_rq = spc->spc_sched_info; 876 877 (*pr)("Run-queue (CPU = %u):\n", ci->ci_index); 878 (*pr)(" pid.lid = %d.%d, r_count = %u, r_avgcount = %u, " 879 "maxpri = %d, mlwp = %p\n", 880 #ifdef MULTIPROCESSOR 881 ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid, 882 #else 883 curlwp->l_proc->p_pid, curlwp->l_lid, 884 #endif 885 ci_rq->r_count, ci_rq->r_avgcount, spc->spc_maxpriority, 886 spc->spc_migrating); 887 i = (PRI_COUNT >> BITMAP_SHIFT) - 1; 888 do { 889 uint32_t q; 890 q = ci_rq->r_bitmap[i]; 891 (*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q); 892 } while (i--); 893 } 894 895 (*pr)(" %5s %4s %4s %10s %3s %18s %4s %4s %s\n", 896 "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS"); 897 898 PROCLIST_FOREACH(p, &allproc) { 899 if ((p->p_flag & PK_MARKER) != 0) 900 continue; 901 (*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm); 902 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 903 ci = l->l_cpu; 904 tci = l->l_target_cpu; 905 (*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n", 906 (int)l->l_lid, l->l_priority, lwp_eprio(l), 907 l->l_flag, l->l_stat == LSRUN ? "RQ" : 908 (l->l_stat == LSSLEEP ? "SQ" : "-"), 909 l, ci->ci_index, (tci ? tci->ci_index : -1), 910 (u_int)(hardclock_ticks - l->l_rticks)); 911 } 912 } 913 } 914 915 #endif 916