xref: /netbsd-src/sys/rump/librump/rumpkern/scheduler.c (revision daf6c4152fcddc27c445489775ed1f66ab4ea9a9)
1 /*      $NetBSD: scheduler.c,v 1.25 2011/01/28 16:58:28 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.25 2011/01/28 16:58:28 pooka Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/cpu.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/namei.h>
37 #include <sys/queue.h>
38 #include <sys/select.h>
39 #include <sys/systm.h>
40 
41 #include <rump/rumpuser.h>
42 
43 #include "rump_private.h"
44 
45 static struct cpu_info rump_cpus[MAXCPUS];
46 static struct rumpcpu {
47 	/* needed in fastpath */
48 	struct cpu_info *rcpu_ci;
49 	void *rcpu_prevlwp;
50 
51 	/* needed in slowpath */
52 	struct rumpuser_mtx *rcpu_mtx;
53 	struct rumpuser_cv *rcpu_cv;
54 	int rcpu_wanted;
55 
56 	/* offset 20 (P=4) or 36 (P=8) here */
57 
58 	/*
59 	 * Some stats.  Not really that necessary, but we should
60 	 * have room.  Note that these overflow quite fast, so need
61 	 * to be collected often.
62 	 */
63 	unsigned int rcpu_fastpath;
64 	unsigned int rcpu_slowpath;
65 	unsigned int rcpu_migrated;
66 
67 	/* offset 32 (P=4) or 50 (P=8) */
68 
69 	int rcpu_align[0] __aligned(CACHE_LINE_SIZE);
70 } rcpu_storage[MAXCPUS];
71 struct cpu_info *rump_cpu = &rump_cpus[0];
72 int ncpu;
73 
74 #define RCPULWP_BUSY	((void *)-1)
75 #define RCPULWP_WANTED	((void *)-2)
76 
77 static struct rumpuser_mtx *lwp0mtx;
78 static struct rumpuser_cv *lwp0cv;
79 static unsigned nextcpu;
80 
81 kmutex_t unruntime_lock; /* unruntime lwp lock.  practically unused */
82 
83 static bool lwp0isbusy = false;
84 
85 /*
86  * Keep some stats.
87  *
88  * Keeping track of there is not really critical for speed, unless
89  * stats happen to be on a different cache line (CACHE_LINE_SIZE is
90  * really just a coarse estimate), so default for the performant case
91  * (i.e. no stats).
92  */
93 #ifdef RUMPSCHED_STATS
94 #define SCHED_FASTPATH(rcpu) rcpu->rcpu_fastpath++;
95 #define SCHED_SLOWPATH(rcpu) rcpu->rcpu_slowpath++;
96 #define SCHED_MIGRATED(rcpu) rcpu->rcpu_migrated++;
97 #else
98 #define SCHED_FASTPATH(rcpu)
99 #define SCHED_SLOWPATH(rcpu)
100 #define SCHED_MIGRATED(rcpu)
101 #endif
102 
103 struct cpu_info *
104 cpu_lookup(u_int index)
105 {
106 
107 	return &rump_cpus[index];
108 }
109 
110 static inline struct rumpcpu *
111 getnextcpu(void)
112 {
113 	unsigned newcpu;
114 
115 	newcpu = atomic_inc_uint_nv(&nextcpu);
116 	if (__predict_false(ncpu > UINT_MAX/2))
117 		atomic_and_uint(&nextcpu, 0);
118 	newcpu = newcpu % ncpu;
119 
120 	return &rcpu_storage[newcpu];
121 }
122 
123 /* this could/should be mi_attach_cpu? */
124 void
125 rump_cpus_bootstrap(int *nump)
126 {
127 	struct rumpcpu *rcpu;
128 	struct cpu_info *ci;
129 	int num = *nump;
130 	int i;
131 
132 	if (num > MAXCPUS) {
133 		aprint_verbose("CPU limit: %d wanted, %d (MAXCPUS) "
134 		    "available (adjusted)\n", num, MAXCPUS);
135 		num = MAXCPUS;
136 	}
137 
138 	for (i = 0; i < num; i++) {
139 		rcpu = &rcpu_storage[i];
140 		ci = &rump_cpus[i];
141 		ci->ci_index = i;
142 	}
143 
144 	/* attach first cpu for bootstrap */
145 	rump_cpu_attach(&rump_cpus[0]);
146 	ncpu = 1;
147 	*nump = num;
148 }
149 
150 void
151 rump_scheduler_init(int numcpu)
152 {
153 	struct rumpcpu *rcpu;
154 	struct cpu_info *ci;
155 	int i;
156 
157 	rumpuser_mutex_init(&lwp0mtx);
158 	rumpuser_cv_init(&lwp0cv);
159 	for (i = 0; i < numcpu; i++) {
160 		rcpu = &rcpu_storage[i];
161 		ci = &rump_cpus[i];
162 		rcpu->rcpu_ci = ci;
163 		ci->ci_schedstate.spc_mutex =
164 		    mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
165 		ci->ci_schedstate.spc_flags = SPCF_RUNNING;
166 		rcpu->rcpu_wanted = 0;
167 		rumpuser_cv_init(&rcpu->rcpu_cv);
168 		rumpuser_mutex_init(&rcpu->rcpu_mtx);
169 	}
170 
171 	mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_NONE);
172 }
173 
174 /*
175  * condvar ops using scheduler lock as the rumpuser interlock.
176  */
177 void
178 rump_schedlock_cv_wait(struct rumpuser_cv *cv)
179 {
180 	struct lwp *l = curlwp;
181 	struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
182 
183 	/* mutex will be taken and released in cpu schedule/unschedule */
184 	rumpuser_cv_wait(cv, rcpu->rcpu_mtx);
185 }
186 
187 int
188 rump_schedlock_cv_timedwait(struct rumpuser_cv *cv, const struct timespec *ts)
189 {
190 	struct lwp *l = curlwp;
191 	struct rumpcpu *rcpu = &rcpu_storage[l->l_cpu-&rump_cpus[0]];
192 
193 	/* mutex will be taken and released in cpu schedule/unschedule */
194 	return rumpuser_cv_timedwait(cv, rcpu->rcpu_mtx,
195 	    ts->tv_sec, ts->tv_nsec);
196 }
197 
198 static void
199 lwp0busy(void)
200 {
201 
202 	/* busy lwp0 */
203 	KASSERT(curlwp == NULL || curlwp->l_stat != LSONPROC);
204 	rumpuser_mutex_enter_nowrap(lwp0mtx);
205 	while (lwp0isbusy)
206 		rumpuser_cv_wait_nowrap(lwp0cv, lwp0mtx);
207 	lwp0isbusy = true;
208 	rumpuser_mutex_exit(lwp0mtx);
209 }
210 
211 static void
212 lwp0rele(void)
213 {
214 
215 	rumpuser_mutex_enter_nowrap(lwp0mtx);
216 	KASSERT(lwp0isbusy == true);
217 	lwp0isbusy = false;
218 	rumpuser_cv_signal(lwp0cv);
219 	rumpuser_mutex_exit(lwp0mtx);
220 }
221 
222 void
223 rump_schedule()
224 {
225 	struct lwp *l;
226 
227 	/*
228 	 * If there is no dedicated lwp, allocate a temp one and
229 	 * set it to be free'd upon unschedule().  Use lwp0 context
230 	 * for reserving the necessary resources.  Don't optimize
231 	 * for this case -- anyone who cares about performance will
232 	 * start a real thread.
233 	 */
234 	if (__predict_true((l = rumpuser_get_curlwp()) != NULL)) {
235 		rump_schedule_cpu(l);
236 		LWP_CACHE_CREDS(l, l->l_proc);
237 	} else {
238 		lwp0busy();
239 
240 		/* schedule cpu and use lwp0 */
241 		rump_schedule_cpu(&lwp0);
242 		rumpuser_set_curlwp(&lwp0);
243 
244 		/* allocate thread, switch to it, and release lwp0 */
245 		l = rump__lwproc_alloclwp(initproc);
246 		rump_lwproc_switch(l);
247 		lwp0rele();
248 
249 		/*
250 		 * mark new thread dead-on-unschedule.  this
251 		 * means that we'll be running with l_refcnt == 0.
252 		 * relax, it's fine.
253 		 */
254 		rump_lwproc_releaselwp();
255 	}
256 }
257 
258 void
259 rump_schedule_cpu(struct lwp *l)
260 {
261 
262 	rump_schedule_cpu_interlock(l, NULL);
263 }
264 
265 /*
266  * Schedule a CPU.  This optimizes for the case where we schedule
267  * the same thread often, and we have nCPU >= nFrequently-Running-Thread
268  * (where CPU is virtual rump cpu, not host CPU).
269  */
270 void
271 rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
272 {
273 	struct rumpcpu *rcpu;
274 	void *old;
275 	bool domigrate;
276 	bool bound = l->l_pflag & LP_BOUND;
277 
278 	l->l_stat = LSRUN;
279 
280 	/*
281 	 * First, try fastpath: if we were the previous user of the
282 	 * CPU, everything is in order cachewise and we can just
283 	 * proceed to use it.
284 	 *
285 	 * If we are a different thread (i.e. CAS fails), we must go
286 	 * through a memory barrier to ensure we get a truthful
287 	 * view of the world.
288 	 */
289 
290 	KASSERT(l->l_target_cpu != NULL);
291 	rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
292 	if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
293 		if (__predict_true(interlock == rcpu->rcpu_mtx))
294 			rumpuser_mutex_exit(rcpu->rcpu_mtx);
295 		SCHED_FASTPATH(rcpu);
296 		/* jones, you're the man */
297 		goto fastlane;
298 	}
299 
300 	/*
301 	 * Else, it's the slowpath for us.  First, determine if we
302 	 * can migrate.
303 	 */
304 	if (ncpu == 1)
305 		domigrate = false;
306 	else
307 		domigrate = true;
308 
309 	/* Take lock.  This acts as a load barrier too. */
310 	if (__predict_true(interlock != rcpu->rcpu_mtx))
311 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
312 
313 	for (;;) {
314 		SCHED_SLOWPATH(rcpu);
315 		old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, RCPULWP_WANTED);
316 
317 		/* CPU is free? */
318 		if (old != RCPULWP_BUSY && old != RCPULWP_WANTED) {
319 			if (atomic_cas_ptr(&rcpu->rcpu_prevlwp,
320 			    RCPULWP_WANTED, RCPULWP_BUSY) == RCPULWP_WANTED) {
321 				break;
322 			}
323 		}
324 
325 		/*
326 		 * Do we want to migrate once?
327 		 * This may need a slightly better algorithm, or we
328 		 * might cache pingpong eternally for non-frequent
329 		 * threads.
330 		 */
331 		if (domigrate && !bound) {
332 			domigrate = false;
333 			SCHED_MIGRATED(rcpu);
334 			rumpuser_mutex_exit(rcpu->rcpu_mtx);
335 			rcpu = getnextcpu();
336 			rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
337 			continue;
338 		}
339 
340 		/* Want CPU, wait until it's released an retry */
341 		rcpu->rcpu_wanted++;
342 		rumpuser_cv_wait_nowrap(rcpu->rcpu_cv, rcpu->rcpu_mtx);
343 		rcpu->rcpu_wanted--;
344 	}
345 	rumpuser_mutex_exit(rcpu->rcpu_mtx);
346 
347  fastlane:
348 	l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
349 	l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
350 	l->l_ncsw++;
351 	l->l_stat = LSONPROC;
352 
353 	rcpu->rcpu_ci->ci_curlwp = l;
354 }
355 
356 void
357 rump_unschedule()
358 {
359 	struct lwp *l = rumpuser_get_curlwp();
360 #ifdef DIAGNOSTIC
361 	int nlock;
362 
363 	KERNEL_UNLOCK_ALL(l, &nlock);
364 	KASSERT(nlock == 0);
365 #endif
366 
367 	KASSERT(l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex);
368 	rump_unschedule_cpu(l);
369 	l->l_mutex = &unruntime_lock;
370 	l->l_stat = LSSTOP;
371 
372 	/*
373 	 * Check special conditions:
374 	 *  1) do we need to free the lwp which just unscheduled?
375 	 *     (locking order: lwp0, cpu)
376 	 *  2) do we want to clear curlwp for the current host thread
377 	 */
378 	if (__predict_false(l->l_flag & LW_WEXIT)) {
379 		lwp0busy();
380 
381 		/* Now that we have lwp0, we can schedule a CPU again */
382 		rump_schedule_cpu(l);
383 
384 		/* switch to lwp0.  this frees the old thread */
385 		KASSERT(l->l_flag & LW_WEXIT);
386 		rump_lwproc_switch(&lwp0);
387 
388 		/* release lwp0 */
389 		rump_unschedule_cpu(&lwp0);
390 		lwp0.l_mutex = &unruntime_lock;
391 		lwp0.l_pflag &= ~LP_RUNNING;
392 		lwp0rele();
393 		rumpuser_set_curlwp(NULL);
394 
395 	} else if (__predict_false(l->l_flag & LW_RUMP_CLEAR)) {
396 		rumpuser_set_curlwp(NULL);
397 		l->l_flag &= ~LW_RUMP_CLEAR;
398 	}
399 }
400 
401 void
402 rump_unschedule_cpu(struct lwp *l)
403 {
404 
405 	rump_unschedule_cpu_interlock(l, NULL);
406 }
407 
408 void
409 rump_unschedule_cpu_interlock(struct lwp *l, void *interlock)
410 {
411 
412 	if ((l->l_pflag & LP_INTR) == 0)
413 		rump_softint_run(l->l_cpu);
414 	rump_unschedule_cpu1(l, interlock);
415 }
416 
417 void
418 rump_unschedule_cpu1(struct lwp *l, void *interlock)
419 {
420 	struct rumpcpu *rcpu;
421 	struct cpu_info *ci;
422 	void *old;
423 
424 	ci = l->l_cpu;
425 	ci->ci_curlwp = NULL;
426 	rcpu = &rcpu_storage[ci-&rump_cpus[0]];
427 
428 	KASSERT(rcpu->rcpu_ci == ci);
429 
430 	/*
431 	 * Make sure all stores are seen before the CPU release.  This
432 	 * is relevant only in the non-fastpath scheduling case, but
433 	 * we don't know here if that's going to happen, so need to
434 	 * expect the worst.
435 	 */
436 	membar_exit();
437 
438 	/* Release the CPU. */
439 	old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
440 
441 	/* No waiters?  No problems.  We're outta here. */
442 	if (old == RCPULWP_BUSY) {
443 		/* Was the scheduler interlock requested? */
444 		if (__predict_false(interlock == rcpu->rcpu_mtx))
445 			rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
446 		return;
447 	}
448 
449 	KASSERT(old == RCPULWP_WANTED);
450 
451 	/*
452 	 * Ok, things weren't so snappy.
453 	 *
454 	 * Snailpath: take lock and signal anyone waiting for this CPU.
455 	 */
456 
457 	rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
458 	if (rcpu->rcpu_wanted)
459 		rumpuser_cv_broadcast(rcpu->rcpu_cv);
460 
461 	if (__predict_true(interlock != rcpu->rcpu_mtx))
462 		rumpuser_mutex_exit(rcpu->rcpu_mtx);
463 }
464 
465 /* Give up and retake CPU (perhaps a different one) */
466 void
467 yield()
468 {
469 	struct lwp *l = curlwp;
470 	int nlocks;
471 
472 	KERNEL_UNLOCK_ALL(l, &nlocks);
473 	rump_unschedule_cpu(l);
474 	rump_schedule_cpu(l);
475 	KERNEL_LOCK(nlocks, l);
476 }
477 
478 void
479 preempt()
480 {
481 
482 	yield();
483 }
484 
485 bool
486 kpreempt(uintptr_t where)
487 {
488 
489 	return false;
490 }
491 
492 /*
493  * There is no kernel thread preemption in rump currently.  But call
494  * the implementing macros anyway in case they grow some side-effects
495  * down the road.
496  */
497 void
498 kpreempt_disable(void)
499 {
500 
501 	KPREEMPT_DISABLE(curlwp);
502 }
503 
504 void
505 kpreempt_enable(void)
506 {
507 
508 	KPREEMPT_ENABLE(curlwp);
509 }
510 
511 void
512 suspendsched(void)
513 {
514 
515 	/*
516 	 * Could wait until everyone is out and block further entries,
517 	 * but skip that for now.
518 	 */
519 }
520 
521 void
522 sched_nice(struct proc *p, int level)
523 {
524 
525 	/* nothing to do for now */
526 }
527