xref: /openbsd-src/sys/kern/kern_clockintr.c (revision 949c1c4ec8cc03255798b09f6078e1d0aed70a6a)
1*949c1c4eSmiod /* $OpenBSD: kern_clockintr.c,v 1.71 2024/11/07 16:02:29 miod Exp $ */
2329e3480Scheloha /*
3329e3480Scheloha  * Copyright (c) 2003 Dale Rahn <drahn@openbsd.org>
4329e3480Scheloha  * Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
52caeedbfScheloha  * Copyright (c) 2020-2024 Scott Cheloha <cheloha@openbsd.org>
6329e3480Scheloha  *
7329e3480Scheloha  * Permission to use, copy, modify, and distribute this software for any
8329e3480Scheloha  * purpose with or without fee is hereby granted, provided that the above
9329e3480Scheloha  * copyright notice and this permission notice appear in all copies.
10329e3480Scheloha  *
11329e3480Scheloha  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12329e3480Scheloha  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13329e3480Scheloha  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14329e3480Scheloha  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15329e3480Scheloha  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16329e3480Scheloha  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17329e3480Scheloha  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18329e3480Scheloha  */
19329e3480Scheloha 
20329e3480Scheloha #include <sys/param.h>
21329e3480Scheloha #include <sys/systm.h>
22329e3480Scheloha #include <sys/atomic.h>
23329e3480Scheloha #include <sys/clockintr.h>
24329e3480Scheloha #include <sys/kernel.h>
25ace67ae8Scheloha #include <sys/malloc.h>
26329e3480Scheloha #include <sys/mutex.h>
2796496668Scheloha #include <sys/resourcevar.h>
28f95e5436Scheloha #include <sys/queue.h>
2996496668Scheloha #include <sys/sched.h>
30329e3480Scheloha #include <sys/stdint.h>
31329e3480Scheloha #include <sys/sysctl.h>
32329e3480Scheloha #include <sys/time.h>
33329e3480Scheloha 
3434cee562Scheloha void clockintr_cancel_locked(struct clockintr *);
35106c68c4Scheloha void clockintr_hardclock(struct clockrequest *, void *, void *);
367f1ddf07Scheloha void clockintr_schedule_locked(struct clockintr *, uint64_t);
37c737cf90Scheloha void clockqueue_intrclock_install(struct clockqueue *,
38d3ae44b7Scheloha     const struct intrclock *);
39c737cf90Scheloha void clockqueue_intrclock_reprogram(struct clockqueue *);
40c737cf90Scheloha uint64_t clockqueue_next(const struct clockqueue *);
41c737cf90Scheloha void clockqueue_pend_delete(struct clockqueue *, struct clockintr *);
42c737cf90Scheloha void clockqueue_pend_insert(struct clockqueue *, struct clockintr *,
439bc72213Scheloha     uint64_t);
44799b5cc6Scheloha void intrclock_rearm(struct intrclock *, uint64_t);
45799b5cc6Scheloha void intrclock_trigger(struct intrclock *);
46329e3480Scheloha uint64_t nsec_advance(uint64_t *, uint64_t, uint64_t);
47329e3480Scheloha 
48329e3480Scheloha /*
49329e3480Scheloha  * Ready the calling CPU for clockintr_dispatch().  If this is our
50329e3480Scheloha  * first time here, install the intrclock, if any, and set necessary
51329e3480Scheloha  * flags.  Advance the schedule as needed.
52329e3480Scheloha  */
53329e3480Scheloha void
54329e3480Scheloha clockintr_cpu_init(const struct intrclock *ic)
55329e3480Scheloha {
56a3d0b92bScheloha 	uint64_t multiplier = 0;
57329e3480Scheloha 	struct cpu_info *ci = curcpu();
58c737cf90Scheloha 	struct clockqueue *cq = &ci->ci_queue;
5996496668Scheloha 	struct schedstate_percpu *spc = &ci->ci_schedstate;
605d423768Scheloha 	int reset_cq_intrclock = 0;
61329e3480Scheloha 
62d3ae44b7Scheloha 	if (ic != NULL)
63d3ae44b7Scheloha 		clockqueue_intrclock_install(cq, ic);
64bdd1445dScheloha 
65c737cf90Scheloha 	/* TODO: Remove this from struct clockqueue. */
668434116fScheloha 	if (CPU_IS_PRIMARY(ci) && cq->cq_hardclock.cl_expiration == 0) {
671d970828Scheloha 		clockintr_bind(&cq->cq_hardclock, ci, clockintr_hardclock,
68a3464c93Scheloha 		    NULL);
699a4914a6Scheloha 	}
70329e3480Scheloha 
71329e3480Scheloha 	/*
725d423768Scheloha 	 * Mask CQ_INTRCLOCK while we're advancing the internal clock
735d423768Scheloha 	 * interrupts.  We don't want the intrclock to fire until this
745d423768Scheloha 	 * thread reaches clockintr_trigger().
755d423768Scheloha 	 */
765d423768Scheloha 	if (ISSET(cq->cq_flags, CQ_INTRCLOCK)) {
775d423768Scheloha 		CLR(cq->cq_flags, CQ_INTRCLOCK);
785d423768Scheloha 		reset_cq_intrclock = 1;
795d423768Scheloha 	}
805d423768Scheloha 
815d423768Scheloha 	/*
82329e3480Scheloha 	 * Until we understand scheduler lock contention better, stagger
83329e3480Scheloha 	 * the hardclock and statclock so they don't all happen at once.
84329e3480Scheloha 	 * If we have no intrclock it doesn't matter, we have no control
85329e3480Scheloha 	 * anyway.  The primary CPU's starting offset is always zero, so
86ace67ae8Scheloha 	 * leave the multiplier zero.
87329e3480Scheloha 	 */
88fa0435f2Scheloha 	if (!CPU_IS_PRIMARY(ci) && reset_cq_intrclock)
89329e3480Scheloha 		multiplier = CPU_INFO_UNIT(ci);
90329e3480Scheloha 
91329e3480Scheloha 	/*
92329e3480Scheloha 	 * The first time we do this, the primary CPU cannot skip any
93329e3480Scheloha 	 * hardclocks.  We can skip hardclocks on subsequent calls because
94329e3480Scheloha 	 * the global tick value is advanced during inittodr(9) on our
95329e3480Scheloha 	 * behalf.
96329e3480Scheloha 	 */
972ee1c8ebScheloha 	if (CPU_IS_PRIMARY(ci)) {
981d970828Scheloha 		if (cq->cq_hardclock.cl_expiration == 0)
991d970828Scheloha 			clockintr_schedule(&cq->cq_hardclock, 0);
1002ee1c8ebScheloha 		else
1011d970828Scheloha 			clockintr_advance(&cq->cq_hardclock, hardclock_period);
1022ee1c8ebScheloha 	}
103329e3480Scheloha 
104329e3480Scheloha 	/*
1053611b9b9Scheloha 	 * We can always advance the statclock.  There is no reason to
1063611b9b9Scheloha 	 * stagger a randomized statclock.
107329e3480Scheloha 	 */
108b3ef18bdScheloha 	if (!statclock_is_randomized) {
1091d970828Scheloha 		if (spc->spc_statclock.cl_expiration == 0) {
1101d970828Scheloha 			clockintr_stagger(&spc->spc_statclock, statclock_avg,
111a3d0b92bScheloha 			    multiplier, MAXCPUS);
112a3d0b92bScheloha 		}
113a3d0b92bScheloha 	}
1141d970828Scheloha 	clockintr_advance(&spc->spc_statclock, statclock_avg);
115329e3480Scheloha 
11696496668Scheloha 	/*
11796496668Scheloha 	 * XXX Need to find a better place to do this.  We can't do it in
11896496668Scheloha 	 * sched_init_cpu() because initclocks() runs after it.
11996496668Scheloha 	 */
1201d970828Scheloha 	if (spc->spc_itimer.cl_expiration == 0) {
1211d970828Scheloha 		clockintr_stagger(&spc->spc_itimer, hardclock_period,
12244e0cbf2Scheloha 		    multiplier, MAXCPUS);
12344e0cbf2Scheloha 	}
1241d970828Scheloha 	if (spc->spc_profclock.cl_expiration == 0) {
1251d970828Scheloha 		clockintr_stagger(&spc->spc_profclock, profclock_period,
12696496668Scheloha 		    multiplier, MAXCPUS);
12796496668Scheloha 	}
1281d970828Scheloha 	if (spc->spc_roundrobin.cl_expiration == 0) {
1291d970828Scheloha 		clockintr_stagger(&spc->spc_roundrobin, hardclock_period,
1309ac452c7Scheloha 		    multiplier, MAXCPUS);
1319ac452c7Scheloha 	}
1321d970828Scheloha 	clockintr_advance(&spc->spc_roundrobin, roundrobin_period);
13396496668Scheloha 
1345d423768Scheloha 	if (reset_cq_intrclock)
1355d423768Scheloha 		SET(cq->cq_flags, CQ_INTRCLOCK);
136329e3480Scheloha }
137329e3480Scheloha 
138329e3480Scheloha /*
139329e3480Scheloha  * If we have an intrclock, trigger it to start the dispatch cycle.
140329e3480Scheloha  */
141329e3480Scheloha void
142329e3480Scheloha clockintr_trigger(void)
143329e3480Scheloha {
144c737cf90Scheloha 	struct clockqueue *cq = &curcpu()->ci_queue;
145329e3480Scheloha 
146a866e893Scheloha 	KASSERT(ISSET(cq->cq_flags, CQ_INIT));
147329e3480Scheloha 
148a866e893Scheloha 	if (ISSET(cq->cq_flags, CQ_INTRCLOCK))
149329e3480Scheloha 		intrclock_trigger(&cq->cq_intrclock);
150329e3480Scheloha }
151329e3480Scheloha 
152329e3480Scheloha /*
153329e3480Scheloha  * Run all expired events scheduled on the calling CPU.
154329e3480Scheloha  */
155329e3480Scheloha int
156329e3480Scheloha clockintr_dispatch(void *frame)
157329e3480Scheloha {
158ace67ae8Scheloha 	uint64_t lateness, run = 0, start;
159329e3480Scheloha 	struct cpu_info *ci = curcpu();
160106c68c4Scheloha 	struct clockintr *cl;
161c737cf90Scheloha 	struct clockqueue *cq = &ci->ci_queue;
162106c68c4Scheloha 	struct clockrequest *request = &cq->cq_request;
163106c68c4Scheloha 	void *arg;
164106c68c4Scheloha 	void (*func)(struct clockrequest *, void *, void *);
1658a8b133bScheloha 	uint32_t ogen;
166329e3480Scheloha 
167329e3480Scheloha 	if (cq->cq_dispatch != 0)
168329e3480Scheloha 		panic("%s: recursive dispatch", __func__);
169329e3480Scheloha 	cq->cq_dispatch = 1;
170329e3480Scheloha 
171329e3480Scheloha 	splassert(IPL_CLOCK);
172a866e893Scheloha 	KASSERT(ISSET(cq->cq_flags, CQ_INIT));
173329e3480Scheloha 
1742137d3d7Scheloha 	mtx_enter(&cq->cq_mtx);
1752137d3d7Scheloha 
176329e3480Scheloha 	/*
177f95e5436Scheloha 	 * If nothing is scheduled or we arrived too early, we have
178f95e5436Scheloha 	 * nothing to do.
179329e3480Scheloha 	 */
180329e3480Scheloha 	start = nsecuptime();
181ace67ae8Scheloha 	cq->cq_uptime = start;
182f95e5436Scheloha 	if (TAILQ_EMPTY(&cq->cq_pend))
183f95e5436Scheloha 		goto stats;
184f95e5436Scheloha 	if (cq->cq_uptime < clockqueue_next(cq))
185f95e5436Scheloha 		goto rearm;
186f95e5436Scheloha 	lateness = start - clockqueue_next(cq);
187329e3480Scheloha 
188329e3480Scheloha 	/*
189329e3480Scheloha 	 * Dispatch expired events.
190329e3480Scheloha 	 */
191f95e5436Scheloha 	for (;;) {
192f95e5436Scheloha 		cl = TAILQ_FIRST(&cq->cq_pend);
193f95e5436Scheloha 		if (cl == NULL)
194f95e5436Scheloha 			break;
195f95e5436Scheloha 		if (cq->cq_uptime < cl->cl_expiration) {
196f95e5436Scheloha 			/* Double-check the time before giving up. */
197ace67ae8Scheloha 			cq->cq_uptime = nsecuptime();
198f95e5436Scheloha 			if (cq->cq_uptime < cl->cl_expiration)
199f95e5436Scheloha 				break;
200f95e5436Scheloha 		}
20100ef33eaScheloha 
20200ef33eaScheloha 		/*
203106c68c4Scheloha 		 * This clockintr has expired.  Execute it.
20400ef33eaScheloha 		 */
20583ca3f0fScheloha 		clockqueue_pend_delete(cq, cl);
206106c68c4Scheloha 		request->cr_expiration = cl->cl_expiration;
207106c68c4Scheloha 		arg = cl->cl_arg;
208106c68c4Scheloha 		func = cl->cl_func;
209f95e5436Scheloha 		cq->cq_running = cl;
2102137d3d7Scheloha 		mtx_leave(&cq->cq_mtx);
211f95e5436Scheloha 
212106c68c4Scheloha 		func(request, frame, arg);
213f95e5436Scheloha 
2142137d3d7Scheloha 		mtx_enter(&cq->cq_mtx);
215f95e5436Scheloha 		cq->cq_running = NULL;
21689b5be12Scheloha 		if (ISSET(cq->cq_flags, CQ_IGNORE_REQUEST)) {
21789b5be12Scheloha 			CLR(cq->cq_flags, CQ_IGNORE_REQUEST);
218106c68c4Scheloha 			CLR(request->cr_flags, CR_RESCHEDULE);
21958898dd5Scheloha 		}
220106c68c4Scheloha 		if (ISSET(request->cr_flags, CR_RESCHEDULE)) {
221106c68c4Scheloha 			CLR(request->cr_flags, CR_RESCHEDULE);
222106c68c4Scheloha 			clockqueue_pend_insert(cq, cl, request->cr_expiration);
22358898dd5Scheloha 		}
2240d53143dScheloha 		if (ISSET(cq->cq_flags, CQ_NEED_WAKEUP)) {
2250d53143dScheloha 			CLR(cq->cq_flags, CQ_NEED_WAKEUP);
2260d53143dScheloha 			mtx_leave(&cq->cq_mtx);
2270d53143dScheloha 			wakeup(&cq->cq_running);
2280d53143dScheloha 			mtx_enter(&cq->cq_mtx);
2290d53143dScheloha 		}
230f95e5436Scheloha 		run++;
231f95e5436Scheloha 	}
232329e3480Scheloha 
233329e3480Scheloha 	/*
234329e3480Scheloha 	 * Dispatch complete.
235329e3480Scheloha 	 */
236f95e5436Scheloha rearm:
237329e3480Scheloha 	/* Rearm the interrupt clock if we have one. */
238a866e893Scheloha 	if (ISSET(cq->cq_flags, CQ_INTRCLOCK)) {
239f95e5436Scheloha 		if (!TAILQ_EMPTY(&cq->cq_pend)) {
240f95e5436Scheloha 			intrclock_rearm(&cq->cq_intrclock,
241f95e5436Scheloha 			    clockqueue_next(cq) - cq->cq_uptime);
242f95e5436Scheloha 		}
243f95e5436Scheloha 	}
244f95e5436Scheloha stats:
245329e3480Scheloha 	/* Update our stats. */
246329e3480Scheloha 	ogen = cq->cq_gen;
247329e3480Scheloha 	cq->cq_gen = 0;
248329e3480Scheloha 	membar_producer();
249ace67ae8Scheloha 	cq->cq_stat.cs_dispatched += cq->cq_uptime - start;
250329e3480Scheloha 	if (run > 0) {
251329e3480Scheloha 		cq->cq_stat.cs_lateness += lateness;
252329e3480Scheloha 		cq->cq_stat.cs_prompt++;
253329e3480Scheloha 		cq->cq_stat.cs_run += run;
254f95e5436Scheloha 	} else if (!TAILQ_EMPTY(&cq->cq_pend)) {
255329e3480Scheloha 		cq->cq_stat.cs_early++;
256f95e5436Scheloha 		cq->cq_stat.cs_earliness += clockqueue_next(cq) - cq->cq_uptime;
257f95e5436Scheloha 	} else
258f95e5436Scheloha 		cq->cq_stat.cs_spurious++;
259329e3480Scheloha 	membar_producer();
260329e3480Scheloha 	cq->cq_gen = MAX(1, ogen + 1);
261329e3480Scheloha 
2622137d3d7Scheloha 	mtx_leave(&cq->cq_mtx);
2632137d3d7Scheloha 
264329e3480Scheloha 	if (cq->cq_dispatch != 1)
265329e3480Scheloha 		panic("%s: unexpected value: %u", __func__, cq->cq_dispatch);
266329e3480Scheloha 	cq->cq_dispatch = 0;
267329e3480Scheloha 
268329e3480Scheloha 	return run > 0;
269329e3480Scheloha }
270329e3480Scheloha 
271ace67ae8Scheloha uint64_t
272ace67ae8Scheloha clockintr_advance(struct clockintr *cl, uint64_t period)
273ace67ae8Scheloha {
274f95e5436Scheloha 	uint64_t count, expiration;
275c737cf90Scheloha 	struct clockqueue *cq = cl->cl_queue;
276f95e5436Scheloha 
2772137d3d7Scheloha 	mtx_enter(&cq->cq_mtx);
278f95e5436Scheloha 	expiration = cl->cl_expiration;
27989af9044Scheloha 	count = nsec_advance(&expiration, period, nsecuptime());
280c7a8681cScheloha 	clockintr_schedule_locked(cl, expiration);
2812137d3d7Scheloha 	mtx_leave(&cq->cq_mtx);
282106c68c4Scheloha 
283f95e5436Scheloha 	return count;
284ace67ae8Scheloha }
285ace67ae8Scheloha 
286b441a2d8Scheloha uint64_t
287106c68c4Scheloha clockrequest_advance(struct clockrequest *cr, uint64_t period)
288106c68c4Scheloha {
289c737cf90Scheloha 	struct clockqueue *cq = cr->cr_queue;
290106c68c4Scheloha 
291106c68c4Scheloha 	KASSERT(cr == &cq->cq_request);
292106c68c4Scheloha 
293106c68c4Scheloha 	SET(cr->cr_flags, CR_RESCHEDULE);
294106c68c4Scheloha 	return nsec_advance(&cr->cr_expiration, period, cq->cq_uptime);
295106c68c4Scheloha }
296106c68c4Scheloha 
297106c68c4Scheloha uint64_t
298106c68c4Scheloha clockrequest_advance_random(struct clockrequest *cr, uint64_t min,
299106c68c4Scheloha     uint32_t mask)
300b441a2d8Scheloha {
301b441a2d8Scheloha 	uint64_t count = 0;
302c737cf90Scheloha 	struct clockqueue *cq = cr->cr_queue;
303b441a2d8Scheloha 	uint32_t off;
304b441a2d8Scheloha 
305106c68c4Scheloha 	KASSERT(cr == &cq->cq_request);
306b441a2d8Scheloha 
307106c68c4Scheloha 	while (cr->cr_expiration <= cq->cq_uptime) {
308b441a2d8Scheloha 		while ((off = (random() & mask)) == 0)
309b441a2d8Scheloha 			continue;
310106c68c4Scheloha 		cr->cr_expiration += min + off;
311b441a2d8Scheloha 		count++;
312b441a2d8Scheloha 	}
313106c68c4Scheloha 	SET(cr->cr_flags, CR_RESCHEDULE);
314b441a2d8Scheloha 	return count;
315b441a2d8Scheloha }
316b441a2d8Scheloha 
317a7d6b88aScheloha void
3189d1f3b84Scheloha clockintr_cancel(struct clockintr *cl)
3199d1f3b84Scheloha {
320c737cf90Scheloha 	struct clockqueue *cq = cl->cl_queue;
3219d1f3b84Scheloha 
3229d1f3b84Scheloha 	mtx_enter(&cq->cq_mtx);
32334cee562Scheloha 	clockintr_cancel_locked(cl);
32434cee562Scheloha 	mtx_leave(&cq->cq_mtx);
32534cee562Scheloha }
32634cee562Scheloha 
32734cee562Scheloha void
32834cee562Scheloha clockintr_cancel_locked(struct clockintr *cl)
32934cee562Scheloha {
330c737cf90Scheloha 	struct clockqueue *cq = cl->cl_queue;
33134cee562Scheloha 	int was_next;
33234cee562Scheloha 
33334cee562Scheloha 	MUTEX_ASSERT_LOCKED(&cq->cq_mtx);
33434cee562Scheloha 
335768b2f7bScheloha 	if (ISSET(cl->cl_flags, CLST_PENDING)) {
336768b2f7bScheloha 		was_next = cl == TAILQ_FIRST(&cq->cq_pend);
33783ca3f0fScheloha 		clockqueue_pend_delete(cq, cl);
338768b2f7bScheloha 		if (ISSET(cq->cq_flags, CQ_INTRCLOCK)) {
339768b2f7bScheloha 			if (was_next && !TAILQ_EMPTY(&cq->cq_pend)) {
340768b2f7bScheloha 				if (cq == &curcpu()->ci_queue)
3412b39aa20Scheloha 					clockqueue_intrclock_reprogram(cq);
342768b2f7bScheloha 			}
343768b2f7bScheloha 		}
344768b2f7bScheloha 	}
34558898dd5Scheloha 	if (cl == cq->cq_running)
34689b5be12Scheloha 		SET(cq->cq_flags, CQ_IGNORE_REQUEST);
3479d1f3b84Scheloha }
3489d1f3b84Scheloha 
3491d970828Scheloha void
3501d970828Scheloha clockintr_bind(struct clockintr *cl, struct cpu_info *ci,
351106c68c4Scheloha     void (*func)(struct clockrequest *, void *, void *), void *arg)
352ace67ae8Scheloha {
353c737cf90Scheloha 	struct clockqueue *cq = &ci->ci_queue;
354ace67ae8Scheloha 
3550d53143dScheloha 	splassert(IPL_NONE);
3560d53143dScheloha 	KASSERT(cl->cl_queue == NULL);
3570d53143dScheloha 
3580d53143dScheloha 	mtx_enter(&cq->cq_mtx);
359a3464c93Scheloha 	cl->cl_arg = arg;
360ace67ae8Scheloha 	cl->cl_func = func;
361ace67ae8Scheloha 	cl->cl_queue = cq;
3620d53143dScheloha 	TAILQ_INSERT_TAIL(&cq->cq_all, cl, cl_alink);
3630d53143dScheloha 	mtx_leave(&cq->cq_mtx);
3640d53143dScheloha }
3650d53143dScheloha 
3660d53143dScheloha void
3670d53143dScheloha clockintr_unbind(struct clockintr *cl, uint32_t flags)
3680d53143dScheloha {
369c737cf90Scheloha 	struct clockqueue *cq = cl->cl_queue;
3700d53143dScheloha 
3710d53143dScheloha 	KASSERT(!ISSET(flags, ~CL_FLAG_MASK));
3722137d3d7Scheloha 
3732137d3d7Scheloha 	mtx_enter(&cq->cq_mtx);
3740d53143dScheloha 
3750d53143dScheloha 	clockintr_cancel_locked(cl);
3760d53143dScheloha 
3770d53143dScheloha 	cl->cl_arg = NULL;
3780d53143dScheloha 	cl->cl_func = NULL;
3790d53143dScheloha 	cl->cl_queue = NULL;
3800d53143dScheloha 	TAILQ_REMOVE(&cq->cq_all, cl, cl_alink);
3810d53143dScheloha 
3820d53143dScheloha 	if (ISSET(flags, CL_BARRIER) && cl == cq->cq_running) {
3830d53143dScheloha 		SET(cq->cq_flags, CQ_NEED_WAKEUP);
3840d53143dScheloha 		msleep_nsec(&cq->cq_running, &cq->cq_mtx, PWAIT | PNORELOCK,
3850d53143dScheloha 		    "clkbar", INFSLP);
3860d53143dScheloha 	} else
3872137d3d7Scheloha 		mtx_leave(&cq->cq_mtx);
388ace67ae8Scheloha }
389ace67ae8Scheloha 
390ace67ae8Scheloha void
391ace67ae8Scheloha clockintr_schedule(struct clockintr *cl, uint64_t expiration)
392ace67ae8Scheloha {
393c737cf90Scheloha 	struct clockqueue *cq = cl->cl_queue;
3942137d3d7Scheloha 
3952137d3d7Scheloha 	mtx_enter(&cq->cq_mtx);
3967f1ddf07Scheloha 	clockintr_schedule_locked(cl, expiration);
3977f1ddf07Scheloha 	mtx_leave(&cq->cq_mtx);
3987f1ddf07Scheloha }
3997f1ddf07Scheloha 
4007f1ddf07Scheloha void
4017f1ddf07Scheloha clockintr_schedule_locked(struct clockintr *cl, uint64_t expiration)
4027f1ddf07Scheloha {
403c737cf90Scheloha 	struct clockqueue *cq = cl->cl_queue;
4047f1ddf07Scheloha 
4057f1ddf07Scheloha 	MUTEX_ASSERT_LOCKED(&cq->cq_mtx);
4067f1ddf07Scheloha 
407a7d6b88aScheloha 	if (ISSET(cl->cl_flags, CLST_PENDING))
40883ca3f0fScheloha 		clockqueue_pend_delete(cq, cl);
4099bc72213Scheloha 	clockqueue_pend_insert(cq, cl, expiration);
410768b2f7bScheloha 	if (ISSET(cq->cq_flags, CQ_INTRCLOCK)) {
411768b2f7bScheloha 		if (cl == TAILQ_FIRST(&cq->cq_pend)) {
412768b2f7bScheloha 			if (cq == &curcpu()->ci_queue)
4132b39aa20Scheloha 				clockqueue_intrclock_reprogram(cq);
414768b2f7bScheloha 		}
415768b2f7bScheloha 	}
41658898dd5Scheloha 	if (cl == cq->cq_running)
41789b5be12Scheloha 		SET(cq->cq_flags, CQ_IGNORE_REQUEST);
4182137d3d7Scheloha }
4192137d3d7Scheloha 
4202137d3d7Scheloha void
421cbdb8ca3Scheloha clockintr_stagger(struct clockintr *cl, uint64_t period, uint32_t numer,
422cbdb8ca3Scheloha     uint32_t denom)
423f289141eScheloha {
424c737cf90Scheloha 	struct clockqueue *cq = cl->cl_queue;
425f289141eScheloha 
426cbdb8ca3Scheloha 	KASSERT(numer < denom);
427f289141eScheloha 
428f289141eScheloha 	mtx_enter(&cq->cq_mtx);
429f289141eScheloha 	if (ISSET(cl->cl_flags, CLST_PENDING))
430f289141eScheloha 		panic("%s: clock interrupt pending", __func__);
431cbdb8ca3Scheloha 	cl->cl_expiration = period / denom * numer;
432f289141eScheloha 	mtx_leave(&cq->cq_mtx);
433f289141eScheloha }
434f289141eScheloha 
435ace67ae8Scheloha void
436106c68c4Scheloha clockintr_hardclock(struct clockrequest *cr, void *frame, void *arg)
437ace67ae8Scheloha {
438ace67ae8Scheloha 	uint64_t count, i;
439ace67ae8Scheloha 
440106c68c4Scheloha 	count = clockrequest_advance(cr, hardclock_period);
441ace67ae8Scheloha 	for (i = 0; i < count; i++)
442ace67ae8Scheloha 		hardclock(frame);
443ace67ae8Scheloha }
444ace67ae8Scheloha 
445ace67ae8Scheloha void
446c737cf90Scheloha clockqueue_init(struct clockqueue *cq)
447d972f9c4Scheloha {
448d972f9c4Scheloha 	if (ISSET(cq->cq_flags, CQ_INIT))
449d972f9c4Scheloha 		return;
450d972f9c4Scheloha 
451106c68c4Scheloha 	cq->cq_request.cr_queue = cq;
452d972f9c4Scheloha 	mtx_init(&cq->cq_mtx, IPL_CLOCK);
4539c0655baScheloha 	TAILQ_INIT(&cq->cq_all);
454d972f9c4Scheloha 	TAILQ_INIT(&cq->cq_pend);
455d972f9c4Scheloha 	cq->cq_gen = 1;
456d972f9c4Scheloha 	SET(cq->cq_flags, CQ_INIT);
457d972f9c4Scheloha }
458d972f9c4Scheloha 
459d3ae44b7Scheloha void
460c737cf90Scheloha clockqueue_intrclock_install(struct clockqueue *cq,
461d3ae44b7Scheloha     const struct intrclock *ic)
462d3ae44b7Scheloha {
463d3ae44b7Scheloha 	mtx_enter(&cq->cq_mtx);
464d3ae44b7Scheloha 	if (!ISSET(cq->cq_flags, CQ_INTRCLOCK)) {
465d3ae44b7Scheloha 		cq->cq_intrclock = *ic;
466d3ae44b7Scheloha 		SET(cq->cq_flags, CQ_INTRCLOCK);
467d3ae44b7Scheloha 	}
468d3ae44b7Scheloha 	mtx_leave(&cq->cq_mtx);
469d3ae44b7Scheloha }
470d3ae44b7Scheloha 
471f95e5436Scheloha uint64_t
472c737cf90Scheloha clockqueue_next(const struct clockqueue *cq)
473f95e5436Scheloha {
4742137d3d7Scheloha 	MUTEX_ASSERT_LOCKED(&cq->cq_mtx);
475f95e5436Scheloha 	return TAILQ_FIRST(&cq->cq_pend)->cl_expiration;
476f95e5436Scheloha }
477f95e5436Scheloha 
478768b2f7bScheloha void
479c737cf90Scheloha clockqueue_pend_delete(struct clockqueue *cq, struct clockintr *cl)
48083ca3f0fScheloha {
48183ca3f0fScheloha 	MUTEX_ASSERT_LOCKED(&cq->cq_mtx);
48283ca3f0fScheloha 	KASSERT(ISSET(cl->cl_flags, CLST_PENDING));
48383ca3f0fScheloha 
48483ca3f0fScheloha 	TAILQ_REMOVE(&cq->cq_pend, cl, cl_plink);
48583ca3f0fScheloha 	CLR(cl->cl_flags, CLST_PENDING);
48683ca3f0fScheloha }
48783ca3f0fScheloha 
4889bc72213Scheloha void
489c737cf90Scheloha clockqueue_pend_insert(struct clockqueue *cq, struct clockintr *cl,
4909bc72213Scheloha     uint64_t expiration)
4919bc72213Scheloha {
4929bc72213Scheloha 	struct clockintr *elm;
4939bc72213Scheloha 
4949bc72213Scheloha 	MUTEX_ASSERT_LOCKED(&cq->cq_mtx);
4959bc72213Scheloha 	KASSERT(!ISSET(cl->cl_flags, CLST_PENDING));
4969bc72213Scheloha 
4979bc72213Scheloha 	cl->cl_expiration = expiration;
4989bc72213Scheloha 	TAILQ_FOREACH(elm, &cq->cq_pend, cl_plink) {
4999bc72213Scheloha 		if (cl->cl_expiration < elm->cl_expiration)
5009bc72213Scheloha 			break;
5019bc72213Scheloha 	}
5029bc72213Scheloha 	if (elm == NULL)
5039bc72213Scheloha 		TAILQ_INSERT_TAIL(&cq->cq_pend, cl, cl_plink);
5049bc72213Scheloha 	else
5059bc72213Scheloha 		TAILQ_INSERT_BEFORE(elm, cl, cl_plink);
5069bc72213Scheloha 	SET(cl->cl_flags, CLST_PENDING);
5079bc72213Scheloha }
5089bc72213Scheloha 
50983ca3f0fScheloha void
510c737cf90Scheloha clockqueue_intrclock_reprogram(struct clockqueue *cq)
511768b2f7bScheloha {
512768b2f7bScheloha 	uint64_t exp, now;
513768b2f7bScheloha 
514768b2f7bScheloha 	MUTEX_ASSERT_LOCKED(&cq->cq_mtx);
515768b2f7bScheloha 	KASSERT(ISSET(cq->cq_flags, CQ_INTRCLOCK));
516768b2f7bScheloha 
517768b2f7bScheloha 	exp = clockqueue_next(cq);
518768b2f7bScheloha 	now = nsecuptime();
519768b2f7bScheloha 	if (now < exp)
520768b2f7bScheloha 		intrclock_rearm(&cq->cq_intrclock, exp - now);
521768b2f7bScheloha 	else
522768b2f7bScheloha 		intrclock_trigger(&cq->cq_intrclock);
523768b2f7bScheloha }
524768b2f7bScheloha 
525799b5cc6Scheloha void
526799b5cc6Scheloha intrclock_rearm(struct intrclock *ic, uint64_t nsecs)
527799b5cc6Scheloha {
528799b5cc6Scheloha 	ic->ic_rearm(ic->ic_cookie, nsecs);
529799b5cc6Scheloha }
530799b5cc6Scheloha 
531799b5cc6Scheloha void
532799b5cc6Scheloha intrclock_trigger(struct intrclock *ic)
533799b5cc6Scheloha {
534799b5cc6Scheloha 	ic->ic_trigger(ic->ic_cookie);
535799b5cc6Scheloha }
536799b5cc6Scheloha 
537329e3480Scheloha /*
538329e3480Scheloha  * Advance *next in increments of period until it exceeds now.
539329e3480Scheloha  * Returns the number of increments *next was advanced.
540329e3480Scheloha  *
541329e3480Scheloha  * We check the common cases first to avoid division if possible.
542329e3480Scheloha  * This does no overflow checking.
543329e3480Scheloha  */
544329e3480Scheloha uint64_t
545329e3480Scheloha nsec_advance(uint64_t *next, uint64_t period, uint64_t now)
546329e3480Scheloha {
547329e3480Scheloha 	uint64_t elapsed;
548329e3480Scheloha 
549329e3480Scheloha 	if (now < *next)
550329e3480Scheloha 		return 0;
551329e3480Scheloha 
552329e3480Scheloha 	if (now < *next + period) {
553329e3480Scheloha 		*next += period;
554329e3480Scheloha 		return 1;
555329e3480Scheloha 	}
556329e3480Scheloha 
557329e3480Scheloha 	elapsed = (now - *next) / period + 1;
558329e3480Scheloha 	*next += period * elapsed;
559329e3480Scheloha 	return elapsed;
560329e3480Scheloha }
561329e3480Scheloha 
562329e3480Scheloha int
563329e3480Scheloha sysctl_clockintr(int *name, u_int namelen, void *oldp, size_t *oldlenp,
564329e3480Scheloha     void *newp, size_t newlen)
565329e3480Scheloha {
5662329db47Scheloha 	struct clockintr_stat sum, tmp;
567c737cf90Scheloha 	struct clockqueue *cq;
568329e3480Scheloha 	struct cpu_info *ci;
569329e3480Scheloha 	CPU_INFO_ITERATOR cii;
5708a8b133bScheloha 	uint32_t gen;
571329e3480Scheloha 
572329e3480Scheloha 	if (namelen != 1)
573329e3480Scheloha 		return ENOTDIR;
574329e3480Scheloha 
575329e3480Scheloha 	switch (name[0]) {
576329e3480Scheloha 	case KERN_CLOCKINTR_STATS:
5772329db47Scheloha 		memset(&sum, 0, sizeof sum);
578329e3480Scheloha 		CPU_INFO_FOREACH(cii, ci) {
579329e3480Scheloha 			cq = &ci->ci_queue;
580a866e893Scheloha 			if (!ISSET(cq->cq_flags, CQ_INIT))
581329e3480Scheloha 				continue;
582329e3480Scheloha 			do {
583329e3480Scheloha 				gen = cq->cq_gen;
584329e3480Scheloha 				membar_consumer();
585329e3480Scheloha 				tmp = cq->cq_stat;
586329e3480Scheloha 				membar_consumer();
587329e3480Scheloha 			} while (gen == 0 || gen != cq->cq_gen);
588329e3480Scheloha 			sum.cs_dispatched += tmp.cs_dispatched;
589329e3480Scheloha 			sum.cs_early += tmp.cs_early;
590329e3480Scheloha 			sum.cs_earliness += tmp.cs_earliness;
591329e3480Scheloha 			sum.cs_lateness += tmp.cs_lateness;
592329e3480Scheloha 			sum.cs_prompt += tmp.cs_prompt;
593329e3480Scheloha 			sum.cs_run += tmp.cs_run;
594f95e5436Scheloha 			sum.cs_spurious += tmp.cs_spurious;
595329e3480Scheloha 		}
596329e3480Scheloha 		return sysctl_rdstruct(oldp, oldlenp, newp, &sum, sizeof sum);
597329e3480Scheloha 	default:
598329e3480Scheloha 		break;
599329e3480Scheloha 	}
600329e3480Scheloha 
601329e3480Scheloha 	return EINVAL;
602329e3480Scheloha }
603329e3480Scheloha 
604329e3480Scheloha #ifdef DDB
605329e3480Scheloha 
606329e3480Scheloha #include <machine/db_machdep.h>
607329e3480Scheloha 
608329e3480Scheloha #include <ddb/db_interface.h>
609329e3480Scheloha #include <ddb/db_output.h>
610329e3480Scheloha #include <ddb/db_sym.h>
611329e3480Scheloha 
61237084734Scheloha void db_show_clockintr(const struct clockintr *, const char *, u_int);
613329e3480Scheloha void db_show_clockintr_cpu(struct cpu_info *);
614329e3480Scheloha 
615329e3480Scheloha void
616329e3480Scheloha db_show_all_clockintr(db_expr_t addr, int haddr, db_expr_t count, char *modif)
617329e3480Scheloha {
618329e3480Scheloha 	struct timespec now;
619329e3480Scheloha 	struct cpu_info *ci;
620329e3480Scheloha 	CPU_INFO_ITERATOR cii;
6213973774fScheloha 	int width = sizeof(long) * 2 + 2;	/* +2 for "0x" prefix */
622329e3480Scheloha 
623329e3480Scheloha 	nanouptime(&now);
624329e3480Scheloha 	db_printf("%20s\n", "UPTIME");
625329e3480Scheloha 	db_printf("%10lld.%09ld\n", now.tv_sec, now.tv_nsec);
626329e3480Scheloha 	db_printf("\n");
6273973774fScheloha 	db_printf("%20s  %5s  %3s  %*s  %s\n",
6283973774fScheloha 	    "EXPIRATION", "STATE", "CPU", width, "ARG", "NAME");
629329e3480Scheloha 	CPU_INFO_FOREACH(cii, ci) {
630a866e893Scheloha 		if (ISSET(ci->ci_queue.cq_flags, CQ_INIT))
631329e3480Scheloha 			db_show_clockintr_cpu(ci);
632329e3480Scheloha 	}
633329e3480Scheloha }
634329e3480Scheloha 
635329e3480Scheloha void
636329e3480Scheloha db_show_clockintr_cpu(struct cpu_info *ci)
637329e3480Scheloha {
638f95e5436Scheloha 	struct clockintr *elm;
639c737cf90Scheloha 	struct clockqueue *cq = &ci->ci_queue;
640329e3480Scheloha 	u_int cpu = CPU_INFO_UNIT(ci);
641329e3480Scheloha 
642f95e5436Scheloha 	if (cq->cq_running != NULL)
64337084734Scheloha 		db_show_clockintr(cq->cq_running, "run", cpu);
644f95e5436Scheloha 	TAILQ_FOREACH(elm, &cq->cq_pend, cl_plink)
64537084734Scheloha 		db_show_clockintr(elm, "pend", cpu);
6469c0655baScheloha 	TAILQ_FOREACH(elm, &cq->cq_all, cl_alink) {
647f95e5436Scheloha 		if (!ISSET(elm->cl_flags, CLST_PENDING))
64878c44e9aScheloha 			db_show_clockintr(elm, "idle", cpu);
649f95e5436Scheloha 	}
650329e3480Scheloha }
651329e3480Scheloha 
652329e3480Scheloha void
65337084734Scheloha db_show_clockintr(const struct clockintr *cl, const char *state, u_int cpu)
654329e3480Scheloha {
655329e3480Scheloha 	struct timespec ts;
656*949c1c4eSmiod 	const char *name;
657ace67ae8Scheloha 	db_expr_t offset;
6583973774fScheloha 	int width = sizeof(long) * 2;
659329e3480Scheloha 
660ace67ae8Scheloha 	NSEC_TO_TIMESPEC(cl->cl_expiration, &ts);
661ace67ae8Scheloha 	db_find_sym_and_offset((vaddr_t)cl->cl_func, &name, &offset);
662ace67ae8Scheloha 	if (name == NULL)
663ace67ae8Scheloha 		name = "?";
6643973774fScheloha 	db_printf("%10lld.%09ld  %5s  %3u  0x%0*lx  %s\n",
6653973774fScheloha 	    ts.tv_sec, ts.tv_nsec, state, cpu,
6663973774fScheloha 	    width, (unsigned long)cl->cl_arg, name);
667329e3480Scheloha }
668329e3480Scheloha 
669329e3480Scheloha #endif /* DDB */
670