xref: /netbsd-src/sys/rump/librump/rumpkern/scheduler.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /*      $NetBSD: scheduler.c,v 1.51 2020/03/14 18:08:39 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.51 2020/03/14 18:08:39 ad Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40 
41 #include <rump-sys/kern.h>
42 
43 #include <rump/rumpuser.h>
44 
45 static struct rumpcpu {
46 	/* needed in fastpath */
47 	struct cpu_info *rcpu_ci;
48 	void *rcpu_prevlwp;
49 
50 	/* needed in slowpath */
51 	struct rumpuser_mtx *rcpu_mtx;
52 	struct rumpuser_cv *rcpu_cv;
53 	int rcpu_wanted;
54 
55 	/* offset 20 (P=4) or 36 (P=8) here */
56 
57 	/*
58 	 * Some stats.  Not really that necessary, but we should
59 	 * have room.  Note that these overflow quite fast, so need
60 	 * to be collected often.
61 	 */
62 	unsigned int rcpu_fastpath;
63 	unsigned int rcpu_slowpath;
64 	unsigned int rcpu_migrated;
65 
66 	/* offset 32 (P=4) or 50 (P=8) */
67 
68 	int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
69 } rcpu_storage[MAXCPUS];
70 
71 static inline struct rumpcpu *
72 cpuinfo_to_rumpcpu(struct cpu_info *ci)
73 {
74 
75 	return &rcpu_storage[cpu_index(ci)];
76 }
77 
78 struct cpu_info rump_bootcpu;
79 
80 #define RCPULWP_BUSY	((void *)-1)
81 #define RCPULWP_WANTED	((void *)-2)
82 
83 static struct rumpuser_mtx *lwp0mtx;
84 static struct rumpuser_cv *lwp0cv;
85 static unsigned nextcpu;
86 
87 kmutex_t unruntime_lock; /* unruntime lwp lock.  practically unused */
88 
89 static bool lwp0isbusy = false;
90 
91 /*
92  * Keep some stats.
93  *
94  * Keeping track of there is not really critical for speed, unless
95  * stats happen to be on a different cache line (CACHE_LINE_SIZE is
96  * really just a coarse estimate), so default for the performant case
97  * (i.e. no stats).
98  */
99 #ifdef RUMPSCHED_STATS
100 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
101 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
102 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
103 #else
104 #define SCHED_FASTPATH(rcpu)
105 #define SCHED_SLOWPATH(rcpu)
106 #define SCHED_MIGRATED(rcpu)
107 #endif
108 
109 struct cpu_info *
110 cpu_lookup(u_int index)
111 {
112 
113 	return rcpu_storage[index].rcpu_ci;
114 }
115 
116 static inline struct rumpcpu *
117 getnextcpu(void)
118 {
119 	unsigned newcpu;
120 
121 	newcpu = atomic_inc_uint_nv(&nextcpu);
122 	if (__predict_false(ncpu > UINT_MAX/2))
123 		atomic_and_uint(&nextcpu, 0);
124 	newcpu = newcpu % ncpu;
125 
126 	return &rcpu_storage[newcpu];
127 }
128 
129 /* this could/should be mi_attach_cpu? */
130 void
131 rump_cpus_bootstrap(int *nump)
132 {
133 	int num = *nump;
134 
135 	if (num > MAXCPUS) {
136 		aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
137 		    "available (adjusted)\n", num, MAXCPUS);
138 		num = MAXCPUS;
139 	}
140 
141 	cpu_setmodel("rumpcore (virtual)");
142 
143 	mi_cpu_init();
144 
145 	/* attach first cpu for bootstrap */
146 	rump_cpu_attach(&rump_bootcpu);
147 	ncpu = 1;
148 	*nump = num;
149 }
150 
151 void
152 rump_scheduler_init(int numcpu)
153 {
154 	struct rumpcpu *rcpu;
155 	struct cpu_info *ci;
156 	int i;
157 
158 	rumpuser_mutex_init(&lwp0mtx, RUMPUSER_MTX_SPIN);
159 	rumpuser_cv_init(&lwp0cv);
160 	for (i = 0; i < numcpu; i++) {
161 		if (i == 0) {
162 			ci = &rump_bootcpu;
163 		} else {
164 			ci = kmem_zalloc(sizeof(*ci), KM_SLEEP);
165 			ci->ci_index = i;
166 		}
167 
168 		rcpu = &rcpu_storage[i];
169 		rcpu->rcpu_ci = ci;
170 		rcpu->rcpu_wanted = 0;
171 		rumpuser_cv_init(&rcpu->rcpu_cv);
172 		rumpuser_mutex_init(&rcpu->rcpu_mtx, RUMPUSER_MTX_SPIN);
173 
174 		ci->ci_schedstate.spc_mutex =
175 		    mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
176 		ci->ci_schedstate.spc_flags = SPCF_RUNNING;
177 	}
178 
179 	mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED);
180 }
181 
182 /*
183  * condvar ops using scheduler lock as the rumpuser interlock.
184  */
185 void
186 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
187 {
188 	struct lwp *l = curlwp;
189 	struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu);
190 
191 	/* mutex will be taken and released in cpu schedule/unschedule */
192 	rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
193 }
194 
195 int
196 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
197 {
198 	struct lwp *l = curlwp;
199 	struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(l->l_cpu);
200 
201 	/* mutex will be taken and released in cpu schedule/unschedule */
202 	return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
203 	    ts->tv_sec, ts->tv_nsec);
204 }
205 
206 static void
207 lwp0busy(void)
208 {
209 
210 	/* busy lwp0 */
211 	KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
212 	rumpuser_mutex_enter_nowrap(lwp0mtx);
213 	while (lwp0isbusy)
214 		rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
215 	lwp0isbusy = true;
216 	rumpuser_mutex_exit(lwp0mtx);
217 }
218 
219 static void
220 lwp0rele(void)
221 {
222 
223 	rumpuser_mutex_enter_nowrap(lwp0mtx);
224 	KASSERT(lwp0isbusy == true);
225 	lwp0isbusy = false;
226 	rumpuser_cv_signal(lwp0cv);
227 	rumpuser_mutex_exit(lwp0mtx);
228 }
229 
230 /*
231  * rump_schedule: ensure that the calling host thread has a valid lwp context.
232  * ie. ensure that curlwp != NULL.  Also, ensure that there
233  * a 1:1 mapping between the lwp and rump kernel cpu.
234  */
235 void
236 rump_schedule()
237 {
238 	struct lwp *l;
239 
240 	/*
241 	 * If there is no dedicated lwp, allocate a temp one and
242 	 * set it to be free'd upon unschedule().  Use lwp0 context
243 	 * for reserving the necessary resources.  Don't optimize
244 	 * for this case -- anyone who cares about performance will
245 	 * start a real thread.
246 	 */
247 	if (__predict_true((l = curlwp) != NULL)) {
248 		rump_schedule_cpu(l);
249 		LWP_CACHE_CREDS(l, l->l_proc);
250 	} else {
251 		lwp0busy();
252 
253 		/* schedule cpu and use lwp0 */
254 		rump_schedule_cpu(&lwp0);
255 		rump_lwproc_curlwp_set(&lwp0);
256 
257 		/* allocate thread, switch to it, and release lwp0 */
258 		l = rump__lwproc_alloclwp(initproc);
259 		rump_lwproc_switch(l);
260 		lwp0rele();
261 
262 		/*
263 		 * mark new thread dead-on-unschedule.  this
264 		 * means that we'll be running with l_refcnt == 0.
265 		 * relax, it's fine.
266 		 */
267 		rump_lwproc_releaselwp();
268 	}
269 }
270 
271 void
272 rump_schedule_cpu(struct lwp *l)
273 {
274 
275 	rump_schedule_cpu_interlock(l, NULL);
276 }
277 
278 /*
279  * Schedule a CPU.  This optimizes for the case where we schedule
280  * the same thread often, and we have nCPU >= nFrequently-Running-Thread
281  * (where CPU is virtual rump cpu, not host CPU).
282  */
283 void
284 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
285 {
286 	struct rumpcpu *rcpu;
287 	struct cpu_info *ci;
288 	void *old;
289 	bool domigrate;
290 	bool bound = l->l_pflag & LP_BOUND;
291 
292 	l->l_stat = LSRUN;
293 
294 	/*
295 	 * First, try fastpath: if we were the previous user of the
296 	 * CPU, everything is in order cachewise and we can just
297 	 * proceed to use it.
298 	 *
299 	 * If we are a different thread (i.e. CAS fails), we must go
300 	 * through a memory barrier to ensure we get a truthful
301 	 * view of the world.
302 	 */
303 
304 	KASSERT(l->l_target_cpu != NULL);
305 	rcpu = cpuinfo_to_rumpcpu(l->l_target_cpu);
306 	if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
307 		if (interlock == rcpu->rcpu_mtx)
308 			rumpuser_mutex_exit(rcpu->rcpu_mtx);
309 		SCHED_FASTPATH(rcpu);
310 		/* jones, you're the man */
311 		goto fastlane;
312 	}
313 
314 	/*
315 	 * Else, it's the slowpath for us.  First, determine if we
316 	 * can migrate.
317 	 */
318 	if (ncpu == 1)
319 		domigrate = false;
320 	else
321 		domigrate = true;
322 
323 	/* Take lock.  This acts as a load barrier too. */
324 	if (interlock != rcpu->rcpu_mtx)
325 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
326 
327 	for (;;) {
328 		SCHED_SLOWPATH(rcpu);
329 		old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
330 
331 		/* CPU is free? */
332 		if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
333 			if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
334 			    RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
335 				break;
336 			}
337 		}
338 
339 		/*
340 		 * Do we want to migrate once?
341 		 * This may need a slightly better algorithm, or we
342 		 * might cache pingpong eternally for non-frequent
343 		 * threads.
344 		 */
345 		if (domigrate && !bound) {
346 			domigrate = false;
347 			SCHED_MIGRATED(rcpu);
348 			rumpuser_mutex_exit(rcpu->rcpu_mtx);
349 			rcpu = getnextcpu();
350 			rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
351 			continue;
352 		}
353 
354 		/* Want CPU, wait until it's released an retry */
355 		rcpu->rcpu_wanted++;
356 		rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
357 		rcpu->rcpu_wanted--;
358 	}
359 	rumpuser_mutex_exit(rcpu->rcpu_mtx);
360 
361  fastlane:
362 	ci = rcpu->rcpu_ci;
363 	l->l_cpu = l->l_target_cpu = ci;
364 	l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
365 	l->l_ncsw++;
366 	l->l_stat = LSONPROC;
367 
368 	/*
369 	 * No interrupts, so ci_curlwp === cpu_onproc.
370 	 * Okay, we could make an attempt to not set cpu_onproc
371 	 * in the case that an interrupt is scheduled immediately
372 	 * after a user proc, but leave that for later.
373 	 */
374 	ci->ci_curlwp = ci->ci_onproc = l;
375 }
376 
377 void
378 rump_unschedule()
379 {
380 	struct lwp *l = curlwp;
381 #ifdef DIAGNOSTIC
382 	int nlock;
383 
384 	KERNEL_UNLOCK_ALL(l, &nlock);
385 	KASSERT(nlock == 0);
386 #endif
387 
388 	KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
389 	rump_unschedule_cpu(l);
390 	l->l_mutex = &unruntime_lock;
391 	l->l_stat = LSSTOP;
392 
393 	/*
394 	 * Check special conditions:
395 	 *  1) do we need to free the lwp which just unscheduled?
396 	 *     (locking order: lwp0, cpu)
397 	 *  2) do we want to clear curlwp for the current host thread
398 	 */
399 	if (__predict_false(l->l_flag & LW_WEXIT)) {
400 		lwp0busy();
401 
402 		/* Now that we have lwp0, we can schedule a CPU again */
403 		rump_schedule_cpu(l);
404 
405 		/* switch to lwp0.  this frees the old thread */
406 		KASSERT(l->l_flag & LW_WEXIT);
407 		rump_lwproc_switch(&lwp0);
408 
409 		/* release lwp0 */
410 		rump_unschedule_cpu(&lwp0);
411 		lwp0.l_mutex = &unruntime_lock;
412 		lwp0.l_pflag &= ~LP_RUNNING;
413 		lwp0rele();
414 		rump_lwproc_curlwp_clear(&lwp0);
415 
416 	} else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
417 		rump_lwproc_curlwp_clear(l);
418 		l->l_flag &= ~LW_RUMP_CLEAR;
419 	}
420 }
421 
422 void
423 rump_unschedule_cpu(struct lwp *l)
424 {
425 
426 	rump_unschedule_cpu_interlock(l, NULL);
427 }
428 
429 void
430 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
431 {
432 
433 	if ((l->l_pflag & LP_INTR) == 0)
434 		rump_softint_run(l->l_cpu);
435 	rump_unschedule_cpu1(l, interlock);
436 }
437 
438 void
439 rump_unschedule_cpu1(struct lwp *l, void *interlock)
440 {
441 	struct rumpcpu *rcpu;
442 	struct cpu_info *ci;
443 	void *old;
444 
445 	ci = l->l_cpu;
446 	ci->ci_curlwp = ci->ci_onproc = NULL;
447 	rcpu = cpuinfo_to_rumpcpu(ci);
448 
449 	KASSERT(rcpu->rcpu_ci == ci);
450 
451 	/*
452 	 * Make sure all stores are seen before the CPU release.  This
453 	 * is relevant only in the non-fastpath scheduling case, but
454 	 * we don't know here if that's going to happen, so need to
455 	 * expect the worst.
456 	 *
457 	 * If the scheduler interlock was requested by the caller, we
458 	 * need to obtain it before we release the CPU.  Otherwise, we risk a
459 	 * race condition where another thread is scheduled onto the
460 	 * rump kernel CPU before our current thread can
461 	 * grab the interlock.
462 	 */
463 	if (interlock == rcpu->rcpu_mtx)
464 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
465 	else
466 		membar_exit();
467 
468 	/* Release the CPU. */
469 	old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
470 
471 	/* No waiters?  No problems.  We're outta here. */
472 	if (old == RCPULWP_BUSY) {
473 		return;
474 	}
475 
476 	KASSERT(old == RCPULWP_WANTED);
477 
478 	/*
479 	 * Ok, things weren't so snappy.
480 	 *
481 	 * Snailpath: take lock and signal anyone waiting for this CPU.
482 	 */
483 
484 	if (interlock != rcpu->rcpu_mtx)
485 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
486 	if (rcpu->rcpu_wanted)
487 		rumpuser_cv_broadcast(rcpu->rcpu_cv);
488 	if (interlock != rcpu->rcpu_mtx)
489 		rumpuser_mutex_exit(rcpu->rcpu_mtx);
490 }
491 
492 /* Give up and retake CPU (perhaps a different one) */
493 void
494 yield()
495 {
496 	struct lwp *l = curlwp;
497 	int nlocks;
498 
499 	KERNEL_UNLOCK_ALL(l, &nlocks);
500 	rump_unschedule_cpu(l);
501 	rump_schedule_cpu(l);
502 	KERNEL_LOCK(nlocks, l);
503 }
504 
505 void
506 preempt()
507 {
508 
509 	yield();
510 }
511 
512 bool
513 kpreempt(uintptr_t where)
514 {
515 
516 	return false;
517 }
518 
519 /*
520  * There is no kernel thread preemption in rump currently.  But call
521  * the implementing macros anyway in case they grow some side-effects
522  * down the road.
523  */
524 void
525 kpreempt_disable(void)
526 {
527 
528 	KPREEMPT_DISABLE(curlwp);
529 }
530 
531 void
532 kpreempt_enable(void)
533 {
534 
535 	KPREEMPT_ENABLE(curlwp);
536 }
537 
538 bool
539 kpreempt_disabled(void)
540 {
541 #if 0
542 	const lwp_t *l = curlwp;
543 
544 	return l->l_nopreempt != 0 || l->l_stat == LSZOMB ||
545 	    (l->l_flag & LW_IDLE) != 0 || cpu_kpreempt_disabled();
546 #endif
547 	/* XXX: emulate cpu_kpreempt_disabled() */
548 	return true;
549 }
550 
551 void
552 suspendsched(void)
553 {
554 
555 	/*
556 	 * Could wait until everyone is out and block further entries,
557 	 * but skip that for now.
558 	 */
559 }
560 
561 void
562 sched_nice(struct proc *p, int level)
563 {
564 
565 	/* nothing to do for now */
566 }
567 
568 void
569 setrunnable(struct lwp *l)
570 {
571 
572 	sched_enqueue(l);
573 }
574 
575 void
576 sched_enqueue(struct lwp *l)
577 {
578 
579 	rump_thread_allow(l);
580 }
581 
582 void
583 sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock)
584 {
585 
586 }
587 
588 void
589 sched_resched_lwp(struct lwp *l, bool unlock)
590 {
591 
592 }
593 
594 void
595 sched_dequeue(struct lwp *l)
596 {
597 
598 	panic("sched_dequeue not implemented");
599 }
600 
601 void
602 preempt_point(void)
603 {
604 
605 }
606 
607 bool
608 preempt_needed(void)
609 {
610 
611 	return false;
612 }
613