xref: /netbsd-src/sys/kern/kern_time.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: kern_time.c,v 1.211 2021/04/03 12:57:21 simonb Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009, 2020
5  *     The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Christopher G. Demetriou, by Andrew Doran, and by Jason R. Thorpe.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)kern_time.c	8.4 (Berkeley) 5/26/95
62  */
63 
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.211 2021/04/03 12:57:21 simonb Exp $");
66 
67 #include <sys/param.h>
68 #include <sys/resourcevar.h>
69 #include <sys/kernel.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/vnode.h>
73 #include <sys/signalvar.h>
74 #include <sys/syslog.h>
75 #include <sys/timetc.h>
76 #include <sys/timex.h>
77 #include <sys/kauth.h>
78 #include <sys/mount.h>
79 #include <sys/syscallargs.h>
80 #include <sys/cpu.h>
81 
82 kmutex_t	itimer_mutex __cacheline_aligned;	/* XXX static */
83 static struct itlist itimer_realtime_changed_notify;
84 
85 static void	ptimer_intr(void *);
86 static void	*ptimer_sih __read_mostly;
87 static TAILQ_HEAD(, ptimer) ptimer_queue;
88 
89 #define	CLOCK_VIRTUAL_P(clockid)	\
90 	((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF)
91 
92 CTASSERT(ITIMER_REAL == CLOCK_REALTIME);
93 CTASSERT(ITIMER_VIRTUAL == CLOCK_VIRTUAL);
94 CTASSERT(ITIMER_PROF == CLOCK_PROF);
95 CTASSERT(ITIMER_MONOTONIC == CLOCK_MONOTONIC);
96 
97 #define	DELAYTIMER_MAX	32
98 
99 /*
100  * Initialize timekeeping.
101  */
102 void
103 time_init(void)
104 {
105 
106 	mutex_init(&itimer_mutex, MUTEX_DEFAULT, IPL_SCHED);
107 	LIST_INIT(&itimer_realtime_changed_notify);
108 
109 	TAILQ_INIT(&ptimer_queue);
110 	ptimer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
111 	    ptimer_intr, NULL);
112 }
113 
114 /*
115  * Check if the time will wrap if set to ts.
116  *
117  * ts - timespec describing the new time
118  * delta - the delta between the current time and ts
119  */
120 bool
121 time_wraps(struct timespec *ts, struct timespec *delta)
122 {
123 
124 	/*
125 	 * Don't allow the time to be set forward so far it
126 	 * will wrap and become negative, thus allowing an
127 	 * attacker to bypass the next check below.  The
128 	 * cutoff is 1 year before rollover occurs, so even
129 	 * if the attacker uses adjtime(2) to move the time
130 	 * past the cutoff, it will take a very long time
131 	 * to get to the wrap point.
132 	 */
133 	if ((ts->tv_sec > LLONG_MAX - 365*24*60*60) ||
134 	    (delta->tv_sec < 0 || delta->tv_nsec < 0))
135 		return true;
136 
137 	return false;
138 }
139 
140 /*
141  * itimer_lock:
142  *
143  *	Acquire the interval timer data lock.
144  */
145 void
146 itimer_lock(void)
147 {
148 	mutex_spin_enter(&itimer_mutex);
149 }
150 
151 /*
152  * itimer_unlock:
153  *
154  *	Release the interval timer data lock.
155  */
156 void
157 itimer_unlock(void)
158 {
159 	mutex_spin_exit(&itimer_mutex);
160 }
161 
162 /*
163  * itimer_lock_held:
164  *
165  *	Check that the interval timer lock is held for diagnostic
166  *	assertions.
167  */
168 inline bool __diagused
169 itimer_lock_held(void)
170 {
171 	return mutex_owned(&itimer_mutex);
172 }
173 
174 /*
175  * Time of day and interval timer support.
176  *
177  * These routines provide the kernel entry points to get and set
178  * the time-of-day and per-process interval timers.  Subroutines
179  * here provide support for adding and subtracting timeval structures
180  * and decrementing interval timers, optionally reloading the interval
181  * timers when they expire.
182  */
183 
184 /* This function is used by clock_settime and settimeofday */
185 static int
186 settime1(struct proc *p, const struct timespec *ts, bool check_kauth)
187 {
188 	struct timespec delta, now;
189 
190 	/*
191 	 * The time being set to an unreasonable value will cause
192 	 * unreasonable system behaviour.
193 	 */
194 	if (ts->tv_sec < 0 || ts->tv_sec > (1LL << 36))
195 		return (EINVAL);
196 
197 	nanotime(&now);
198 	timespecsub(ts, &now, &delta);
199 
200 	if (check_kauth && kauth_authorize_system(kauth_cred_get(),
201 	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts),
202 	    &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) {
203 		return (EPERM);
204 	}
205 
206 #ifdef notyet
207 	if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */
208 		return (EPERM);
209 	}
210 #endif
211 
212 	tc_setclock(ts);
213 
214 	resettodr();
215 
216 	/*
217 	 * Notify pending CLOCK_REALTIME timers about the real time change.
218 	 * There may be inactive timers on this list, but this happens
219 	 * comparatively less often than timers firing, and so it's better
220 	 * to put the extra checks here than to complicate the other code
221 	 * path.
222 	 */
223 	struct itimer *it;
224 	itimer_lock();
225 	LIST_FOREACH(it, &itimer_realtime_changed_notify, it_rtchgq) {
226 		KASSERT(it->it_ops->ito_realtime_changed != NULL);
227 		if (timespecisset(&it->it_time.it_value)) {
228 			(*it->it_ops->ito_realtime_changed)(it);
229 		}
230 	}
231 	itimer_unlock();
232 
233 	return (0);
234 }
235 
236 int
237 settime(struct proc *p, struct timespec *ts)
238 {
239 	return (settime1(p, ts, true));
240 }
241 
242 /* ARGSUSED */
243 int
244 sys___clock_gettime50(struct lwp *l,
245     const struct sys___clock_gettime50_args *uap, register_t *retval)
246 {
247 	/* {
248 		syscallarg(clockid_t) clock_id;
249 		syscallarg(struct timespec *) tp;
250 	} */
251 	int error;
252 	struct timespec ats;
253 
254 	error = clock_gettime1(SCARG(uap, clock_id), &ats);
255 	if (error != 0)
256 		return error;
257 
258 	return copyout(&ats, SCARG(uap, tp), sizeof(ats));
259 }
260 
261 /* ARGSUSED */
262 int
263 sys___clock_settime50(struct lwp *l,
264     const struct sys___clock_settime50_args *uap, register_t *retval)
265 {
266 	/* {
267 		syscallarg(clockid_t) clock_id;
268 		syscallarg(const struct timespec *) tp;
269 	} */
270 	int error;
271 	struct timespec ats;
272 
273 	if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0)
274 		return error;
275 
276 	return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true);
277 }
278 
279 
280 int
281 clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp,
282     bool check_kauth)
283 {
284 	int error;
285 
286 	if (tp->tv_nsec < 0 || tp->tv_nsec >= 1000000000L)
287 		return EINVAL;
288 
289 	switch (clock_id) {
290 	case CLOCK_REALTIME:
291 		if ((error = settime1(p, tp, check_kauth)) != 0)
292 			return (error);
293 		break;
294 	case CLOCK_MONOTONIC:
295 		return (EINVAL);	/* read-only clock */
296 	default:
297 		return (EINVAL);
298 	}
299 
300 	return 0;
301 }
302 
303 int
304 sys___clock_getres50(struct lwp *l, const struct sys___clock_getres50_args *uap,
305     register_t *retval)
306 {
307 	/* {
308 		syscallarg(clockid_t) clock_id;
309 		syscallarg(struct timespec *) tp;
310 	} */
311 	struct timespec ts;
312 	int error;
313 
314 	if ((error = clock_getres1(SCARG(uap, clock_id), &ts)) != 0)
315 		return error;
316 
317 	if (SCARG(uap, tp))
318 		error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
319 
320 	return error;
321 }
322 
323 int
324 clock_getres1(clockid_t clock_id, struct timespec *ts)
325 {
326 
327 	switch (clock_id) {
328 	case CLOCK_REALTIME:
329 	case CLOCK_MONOTONIC:
330 		ts->tv_sec = 0;
331 		if (tc_getfrequency() > 1000000000)
332 			ts->tv_nsec = 1;
333 		else
334 			ts->tv_nsec = 1000000000 / tc_getfrequency();
335 		break;
336 	default:
337 		return EINVAL;
338 	}
339 
340 	return 0;
341 }
342 
343 /* ARGSUSED */
344 int
345 sys___nanosleep50(struct lwp *l, const struct sys___nanosleep50_args *uap,
346     register_t *retval)
347 {
348 	/* {
349 		syscallarg(struct timespec *) rqtp;
350 		syscallarg(struct timespec *) rmtp;
351 	} */
352 	struct timespec rmt, rqt;
353 	int error, error1;
354 
355 	error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
356 	if (error)
357 		return (error);
358 
359 	error = nanosleep1(l, CLOCK_MONOTONIC, 0, &rqt,
360 	    SCARG(uap, rmtp) ? &rmt : NULL);
361 	if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
362 		return error;
363 
364 	error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt));
365 	return error1 ? error1 : error;
366 }
367 
368 /* ARGSUSED */
369 int
370 sys_clock_nanosleep(struct lwp *l, const struct sys_clock_nanosleep_args *uap,
371     register_t *retval)
372 {
373 	/* {
374 		syscallarg(clockid_t) clock_id;
375 		syscallarg(int) flags;
376 		syscallarg(struct timespec *) rqtp;
377 		syscallarg(struct timespec *) rmtp;
378 	} */
379 	struct timespec rmt, rqt;
380 	int error, error1;
381 
382 	error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
383 	if (error)
384 		goto out;
385 
386 	error = nanosleep1(l, SCARG(uap, clock_id), SCARG(uap, flags), &rqt,
387 	    SCARG(uap, rmtp) ? &rmt : NULL);
388 	if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
389 		goto out;
390 
391 	if ((SCARG(uap, flags) & TIMER_ABSTIME) == 0 &&
392 	    (error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt))) != 0)
393 		error = error1;
394 out:
395 	*retval = error;
396 	return 0;
397 }
398 
399 int
400 nanosleep1(struct lwp *l, clockid_t clock_id, int flags, struct timespec *rqt,
401     struct timespec *rmt)
402 {
403 	struct timespec rmtstart;
404 	int error, timo;
405 
406 	if ((error = ts2timo(clock_id, flags, rqt, &timo, &rmtstart)) != 0) {
407 		if (error == ETIMEDOUT) {
408 			error = 0;
409 			if (rmt != NULL)
410 				rmt->tv_sec = rmt->tv_nsec = 0;
411 		}
412 		return error;
413 	}
414 
415 	/*
416 	 * Avoid inadvertently sleeping forever
417 	 */
418 	if (timo == 0)
419 		timo = 1;
420 again:
421 	error = kpause("nanoslp", true, timo, NULL);
422 	if (error == EWOULDBLOCK)
423 		error = 0;
424 	if (rmt != NULL || error == 0) {
425 		struct timespec rmtend;
426 		struct timespec t0;
427 		struct timespec *t;
428 		int err;
429 
430 		err = clock_gettime1(clock_id, &rmtend);
431 		if (err != 0)
432 			return err;
433 
434 		t = (rmt != NULL) ? rmt : &t0;
435 		if (flags & TIMER_ABSTIME) {
436 			timespecsub(rqt, &rmtend, t);
437 		} else {
438 			timespecsub(&rmtend, &rmtstart, t);
439 			timespecsub(rqt, t, t);
440 		}
441 		if (t->tv_sec < 0)
442 			timespecclear(t);
443 		if (error == 0) {
444 			timo = tstohz(t);
445 			if (timo > 0)
446 				goto again;
447 		}
448 	}
449 
450 	if (error == ERESTART)
451 		error = EINTR;
452 
453 	return error;
454 }
455 
456 int
457 sys_clock_getcpuclockid2(struct lwp *l,
458     const struct sys_clock_getcpuclockid2_args *uap,
459     register_t *retval)
460 {
461 	/* {
462 		syscallarg(idtype_t idtype;
463 		syscallarg(id_t id);
464 		syscallarg(clockid_t *)clock_id;
465 	} */
466 	pid_t pid;
467 	lwpid_t lid;
468 	clockid_t clock_id;
469 	id_t id = SCARG(uap, id);
470 
471 	switch (SCARG(uap, idtype)) {
472 	case P_PID:
473 		pid = id == 0 ? l->l_proc->p_pid : id;
474 		clock_id = CLOCK_PROCESS_CPUTIME_ID | pid;
475 		break;
476 	case P_LWPID:
477 		lid = id == 0 ? l->l_lid : id;
478 		clock_id = CLOCK_THREAD_CPUTIME_ID | lid;
479 		break;
480 	default:
481 		return EINVAL;
482 	}
483 	return copyout(&clock_id, SCARG(uap, clock_id), sizeof(clock_id));
484 }
485 
486 /* ARGSUSED */
487 int
488 sys___gettimeofday50(struct lwp *l, const struct sys___gettimeofday50_args *uap,
489     register_t *retval)
490 {
491 	/* {
492 		syscallarg(struct timeval *) tp;
493 		syscallarg(void *) tzp;		really "struct timezone *";
494 	} */
495 	struct timeval atv;
496 	int error = 0;
497 	struct timezone tzfake;
498 
499 	if (SCARG(uap, tp)) {
500 		memset(&atv, 0, sizeof(atv));
501 		microtime(&atv);
502 		error = copyout(&atv, SCARG(uap, tp), sizeof(atv));
503 		if (error)
504 			return (error);
505 	}
506 	if (SCARG(uap, tzp)) {
507 		/*
508 		 * NetBSD has no kernel notion of time zone, so we just
509 		 * fake up a timezone struct and return it if demanded.
510 		 */
511 		tzfake.tz_minuteswest = 0;
512 		tzfake.tz_dsttime = 0;
513 		error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
514 	}
515 	return (error);
516 }
517 
518 /* ARGSUSED */
519 int
520 sys___settimeofday50(struct lwp *l, const struct sys___settimeofday50_args *uap,
521     register_t *retval)
522 {
523 	/* {
524 		syscallarg(const struct timeval *) tv;
525 		syscallarg(const void *) tzp; really "const struct timezone *";
526 	} */
527 
528 	return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true);
529 }
530 
531 int
532 settimeofday1(const struct timeval *utv, bool userspace,
533     const void *utzp, struct lwp *l, bool check_kauth)
534 {
535 	struct timeval atv;
536 	struct timespec ts;
537 	int error;
538 
539 	/* Verify all parameters before changing time. */
540 
541 	/*
542 	 * NetBSD has no kernel notion of time zone, and only an
543 	 * obsolete program would try to set it, so we log a warning.
544 	 */
545 	if (utzp)
546 		log(LOG_WARNING, "pid %d attempted to set the "
547 		    "(obsolete) kernel time zone\n", l->l_proc->p_pid);
548 
549 	if (utv == NULL)
550 		return 0;
551 
552 	if (userspace) {
553 		if ((error = copyin(utv, &atv, sizeof(atv))) != 0)
554 			return error;
555 		utv = &atv;
556 	}
557 
558 	if (utv->tv_usec < 0 || utv->tv_usec >= 1000000)
559 		return EINVAL;
560 
561 	TIMEVAL_TO_TIMESPEC(utv, &ts);
562 	return settime1(l->l_proc, &ts, check_kauth);
563 }
564 
565 int	time_adjusted;			/* set if an adjustment is made */
566 
567 /* ARGSUSED */
568 int
569 sys___adjtime50(struct lwp *l, const struct sys___adjtime50_args *uap,
570     register_t *retval)
571 {
572 	/* {
573 		syscallarg(const struct timeval *) delta;
574 		syscallarg(struct timeval *) olddelta;
575 	} */
576 	int error;
577 	struct timeval atv, oldatv;
578 
579 	if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
580 	    KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0)
581 		return error;
582 
583 	if (SCARG(uap, delta)) {
584 		error = copyin(SCARG(uap, delta), &atv,
585 		    sizeof(*SCARG(uap, delta)));
586 		if (error)
587 			return (error);
588 	}
589 	adjtime1(SCARG(uap, delta) ? &atv : NULL,
590 	    SCARG(uap, olddelta) ? &oldatv : NULL, l->l_proc);
591 	if (SCARG(uap, olddelta))
592 		error = copyout(&oldatv, SCARG(uap, olddelta),
593 		    sizeof(*SCARG(uap, olddelta)));
594 	return error;
595 }
596 
597 void
598 adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p)
599 {
600 	extern int64_t time_adjtime;  /* in kern_ntptime.c */
601 
602 	if (olddelta) {
603 		memset(olddelta, 0, sizeof(*olddelta));
604 		mutex_spin_enter(&timecounter_lock);
605 		olddelta->tv_sec = time_adjtime / 1000000;
606 		olddelta->tv_usec = time_adjtime % 1000000;
607 		if (olddelta->tv_usec < 0) {
608 			olddelta->tv_usec += 1000000;
609 			olddelta->tv_sec--;
610 		}
611 		mutex_spin_exit(&timecounter_lock);
612 	}
613 
614 	if (delta) {
615 		mutex_spin_enter(&timecounter_lock);
616 		time_adjtime = delta->tv_sec * 1000000 + delta->tv_usec;
617 
618 		if (time_adjtime) {
619 			/* We need to save the system time during shutdown */
620 			time_adjusted |= 1;
621 		}
622 		mutex_spin_exit(&timecounter_lock);
623 	}
624 }
625 
626 /*
627  * Interval timer support.
628  *
629  * The itimer_*() routines provide generic support for interval timers,
630  * both real (CLOCK_REALTIME, CLOCK_MONOTIME), and virtual (CLOCK_VIRTUAL,
631  * CLOCK_PROF).
632  *
633  * Real timers keep their deadline as an absolute time, and are fired
634  * by a callout.  Virtual timers are kept as a linked-list of deltas,
635  * and are processed by hardclock().
636  *
637  * Because the real time timer callout may be delayed in real time due
638  * to interrupt processing on the system, it is possible for the real
639  * time timeout routine (itimer_callout()) run past after its deadline.
640  * It does not suffice, therefore, to reload the real timer .it_value
641  * from the timer's .it_interval.  Rather, we compute the next deadline
642  * in absolute time based on the current time and the .it_interval value,
643  * and report any overruns.
644  *
645  * Note that while the virtual timers are supported in a generic fashion
646  * here, they only (currently) make sense as per-process timers, and thus
647  * only really work for that case.
648  */
649 
650 /*
651  * itimer_init:
652  *
653  *	Initialize the common data for an interval timer.
654  */
655 void
656 itimer_init(struct itimer * const it, const struct itimer_ops * const ops,
657     clockid_t const id, struct itlist * const itl)
658 {
659 
660 	KASSERT(itimer_lock_held());
661 	KASSERT(ops != NULL);
662 
663 	timespecclear(&it->it_time.it_value);
664 	it->it_ops = ops;
665 	it->it_clockid = id;
666 	it->it_overruns = 0;
667 	it->it_dying = false;
668 	if (!CLOCK_VIRTUAL_P(id)) {
669 		KASSERT(itl == NULL);
670 		callout_init(&it->it_ch, CALLOUT_MPSAFE);
671 		if (id == CLOCK_REALTIME && ops->ito_realtime_changed != NULL) {
672 			LIST_INSERT_HEAD(&itimer_realtime_changed_notify,
673 			    it, it_rtchgq);
674 		}
675 	} else {
676 		KASSERT(itl != NULL);
677 		it->it_vlist = itl;
678 		it->it_active = false;
679 	}
680 }
681 
682 /*
683  * itimer_poison:
684  *
685  *	Poison an interval timer, preventing it from being scheduled
686  *	or processed, in preparation for freeing the timer.
687  */
688 void
689 itimer_poison(struct itimer * const it)
690 {
691 
692 	KASSERT(itimer_lock_held());
693 
694 	it->it_dying = true;
695 
696 	/*
697 	 * For non-virtual timers, stop the callout, or wait for it to
698 	 * run if it has already fired.  It cannot restart again after
699 	 * this point: the callout won't restart itself when dying, no
700 	 * other users holding the lock can restart it, and any other
701 	 * users waiting for callout_halt concurrently (itimer_settime)
702 	 * will restart from the top.
703 	 */
704 	if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
705 		callout_halt(&it->it_ch, &itimer_mutex);
706 		if (it->it_clockid == CLOCK_REALTIME &&
707 		    it->it_ops->ito_realtime_changed != NULL) {
708 			LIST_REMOVE(it, it_rtchgq);
709 		}
710 	}
711 }
712 
713 /*
714  * itimer_fini:
715  *
716  *	Release resources used by an interval timer.
717  *
718  *	N.B. itimer_lock must be held on entry, and is released on exit.
719  */
720 void
721 itimer_fini(struct itimer * const it)
722 {
723 
724 	KASSERT(itimer_lock_held());
725 
726 	/* All done with the global state. */
727 	itimer_unlock();
728 
729 	/* Destroy the callout, if needed. */
730 	if (!CLOCK_VIRTUAL_P(it->it_clockid))
731 		callout_destroy(&it->it_ch);
732 }
733 
734 /*
735  * itimer_decr:
736  *
737  *	Decrement an interval timer by a specified number of nanoseconds,
738  *	which must be less than a second, i.e. < 1000000000.  If the timer
739  *	expires, then reload it.  In this case, carry over (nsec - old value)
740  *	to reduce the value reloaded into the timer so that the timer does
741  *	not drift.  This routine assumes that it is called in a context where
742  *	the timers on which it is operating cannot change in value.
743  *
744  *	Returns true if the timer has expired.
745  */
746 static bool
747 itimer_decr(struct itimer *it, int nsec)
748 {
749 	struct itimerspec *itp;
750 	int error __diagused;
751 
752 	KASSERT(itimer_lock_held());
753 	KASSERT(CLOCK_VIRTUAL_P(it->it_clockid));
754 
755 	itp = &it->it_time;
756 	if (itp->it_value.tv_nsec < nsec) {
757 		if (itp->it_value.tv_sec == 0) {
758 			/* expired, and already in next interval */
759 			nsec -= itp->it_value.tv_nsec;
760 			goto expire;
761 		}
762 		itp->it_value.tv_nsec += 1000000000;
763 		itp->it_value.tv_sec--;
764 	}
765 	itp->it_value.tv_nsec -= nsec;
766 	nsec = 0;
767 	if (timespecisset(&itp->it_value))
768 		return false;
769 	/* expired, exactly at end of interval */
770  expire:
771 	if (timespecisset(&itp->it_interval)) {
772 		itp->it_value = itp->it_interval;
773 		itp->it_value.tv_nsec -= nsec;
774 		if (itp->it_value.tv_nsec < 0) {
775 			itp->it_value.tv_nsec += 1000000000;
776 			itp->it_value.tv_sec--;
777 		}
778 		error = itimer_settime(it);
779 		KASSERT(error == 0); /* virtual, never fails */
780 	} else
781 		itp->it_value.tv_nsec = 0;		/* sec is already 0 */
782 	return true;
783 }
784 
785 static void itimer_callout(void *);
786 
787 /*
788  * itimer_arm_real:
789  *
790  *	Arm a non-virtual timer.
791  */
792 static void
793 itimer_arm_real(struct itimer * const it)
794 {
795 	/*
796 	 * Don't need to check tshzto() return value, here.
797 	 * callout_reset() does it for us.
798 	 */
799 	callout_reset(&it->it_ch,
800 	    (it->it_clockid == CLOCK_MONOTONIC
801 		? tshztoup(&it->it_time.it_value)
802 		: tshzto(&it->it_time.it_value)),
803 	    itimer_callout, it);
804 }
805 
806 /*
807  * itimer_callout:
808  *
809  *	Callout to expire a non-virtual timer.  Queue it up for processing,
810  *	and then reload, if it is configured to do so.
811  *
812  *	N.B. A delay in processing this callout causes multiple
813  *	SIGALRM calls to be compressed into one.
814  */
815 static void
816 itimer_callout(void *arg)
817 {
818 	uint64_t last_val, next_val, interval, now_ns;
819 	struct timespec now, next;
820 	struct itimer * const it = arg;
821 	int backwards;
822 
823 	itimer_lock();
824 	(*it->it_ops->ito_fire)(it);
825 
826 	if (!timespecisset(&it->it_time.it_interval)) {
827 		timespecclear(&it->it_time.it_value);
828 		itimer_unlock();
829 		return;
830 	}
831 
832 	if (it->it_clockid == CLOCK_MONOTONIC) {
833 		getnanouptime(&now);
834 	} else {
835 		getnanotime(&now);
836 	}
837 	backwards = (timespeccmp(&it->it_time.it_value, &now, >));
838 	timespecadd(&it->it_time.it_value, &it->it_time.it_interval, &next);
839 	/* Handle the easy case of non-overflown timers first. */
840 	if (!backwards && timespeccmp(&next, &now, >)) {
841 		it->it_time.it_value = next;
842 	} else {
843 		now_ns = timespec2ns(&now);
844 		last_val = timespec2ns(&it->it_time.it_value);
845 		interval = timespec2ns(&it->it_time.it_interval);
846 
847 		next_val = now_ns +
848 		    (now_ns - last_val + interval - 1) % interval;
849 
850 		if (backwards)
851 			next_val += interval;
852 		else
853 			it->it_overruns += (now_ns - last_val) / interval;
854 
855 		it->it_time.it_value.tv_sec = next_val / 1000000000;
856 		it->it_time.it_value.tv_nsec = next_val % 1000000000;
857 	}
858 
859 	/*
860 	 * Reset the callout, if it's not going away.
861 	 */
862 	if (!it->it_dying)
863 		itimer_arm_real(it);
864 	itimer_unlock();
865 }
866 
867 /*
868  * itimer_settime:
869  *
870  *	Set up the given interval timer. The value in it->it_time.it_value
871  *	is taken to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC
872  *	timers and a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers.
873  *
874  *	If the callout had already fired but not yet run, fails with
875  *	ERESTART -- caller must restart from the top to look up a timer.
876  */
877 int
878 itimer_settime(struct itimer *it)
879 {
880 	struct itimer *itn, *pitn;
881 	struct itlist *itl;
882 
883 	KASSERT(itimer_lock_held());
884 
885 	if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
886 		/*
887 		 * Try to stop the callout.  However, if it had already
888 		 * fired, we have to drop the lock to wait for it, so
889 		 * the world may have changed and pt may not be there
890 		 * any more.  In that case, tell the caller to start
891 		 * over from the top.
892 		 */
893 		if (callout_halt(&it->it_ch, &itimer_mutex))
894 			return ERESTART;
895 
896 		/* Now we can touch it and start it up again. */
897 		if (timespecisset(&it->it_time.it_value))
898 			itimer_arm_real(it);
899 	} else {
900 		if (it->it_active) {
901 			itn = LIST_NEXT(it, it_list);
902 			LIST_REMOVE(it, it_list);
903 			for ( ; itn; itn = LIST_NEXT(itn, it_list))
904 				timespecadd(&it->it_time.it_value,
905 				    &itn->it_time.it_value,
906 				    &itn->it_time.it_value);
907 		}
908 		if (timespecisset(&it->it_time.it_value)) {
909 			itl = it->it_vlist;
910 			for (itn = LIST_FIRST(itl), pitn = NULL;
911 			     itn && timespeccmp(&it->it_time.it_value,
912 				 &itn->it_time.it_value, >);
913 			     pitn = itn, itn = LIST_NEXT(itn, it_list))
914 				timespecsub(&it->it_time.it_value,
915 				    &itn->it_time.it_value,
916 				    &it->it_time.it_value);
917 
918 			if (pitn)
919 				LIST_INSERT_AFTER(pitn, it, it_list);
920 			else
921 				LIST_INSERT_HEAD(itl, it, it_list);
922 
923 			for ( ; itn ; itn = LIST_NEXT(itn, it_list))
924 				timespecsub(&itn->it_time.it_value,
925 				    &it->it_time.it_value,
926 				    &itn->it_time.it_value);
927 
928 			it->it_active = true;
929 		} else {
930 			it->it_active = false;
931 		}
932 	}
933 
934 	/* Success!  */
935 	return 0;
936 }
937 
938 /*
939  * itimer_gettime:
940  *
941  *	Return the remaining time of an interval timer.
942  */
943 void
944 itimer_gettime(const struct itimer *it, struct itimerspec *aits)
945 {
946 	struct timespec now;
947 	struct itimer *itn;
948 
949 	KASSERT(itimer_lock_held());
950 
951 	*aits = it->it_time;
952 	if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
953 		/*
954 		 * Convert from absolute to relative time in .it_value
955 		 * part of real time timer.  If time for real time
956 		 * timer has passed return 0, else return difference
957 		 * between current time and time for the timer to go
958 		 * off.
959 		 */
960 		if (timespecisset(&aits->it_value)) {
961 			if (it->it_clockid == CLOCK_REALTIME) {
962 				getnanotime(&now);
963 			} else { /* CLOCK_MONOTONIC */
964 				getnanouptime(&now);
965 			}
966 			if (timespeccmp(&aits->it_value, &now, <))
967 				timespecclear(&aits->it_value);
968 			else
969 				timespecsub(&aits->it_value, &now,
970 				    &aits->it_value);
971 		}
972 	} else if (it->it_active) {
973 		for (itn = LIST_FIRST(it->it_vlist); itn && itn != it;
974 		     itn = LIST_NEXT(itn, it_list))
975 			timespecadd(&aits->it_value,
976 			    &itn->it_time.it_value, &aits->it_value);
977 		KASSERT(itn != NULL); /* it should be findable on the list */
978 	} else
979 		timespecclear(&aits->it_value);
980 }
981 
982 /*
983  * Per-process timer support.
984  *
985  * Both the BSD getitimer() family and the POSIX timer_*() family of
986  * routines are supported.
987  *
988  * All timers are kept in an array pointed to by p_timers, which is
989  * allocated on demand - many processes don't use timers at all. The
990  * first four elements in this array are reserved for the BSD timers:
991  * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, element
992  * 2 is ITIMER_PROF, and element 3 is ITIMER_MONOTONIC. The rest may be
993  * allocated by the timer_create() syscall.
994  *
995  * These timers are a "sub-class" of interval timer.
996  */
997 
998 /*
999  * ptimer_free:
1000  *
1001  *	Free the per-process timer at the specified index.
1002  */
1003 static void
1004 ptimer_free(struct ptimers *pts, int index)
1005 {
1006 	struct itimer *it;
1007 	struct ptimer *pt;
1008 
1009 	KASSERT(itimer_lock_held());
1010 
1011 	it = pts->pts_timers[index];
1012 	pt = container_of(it, struct ptimer, pt_itimer);
1013 	pts->pts_timers[index] = NULL;
1014 	itimer_poison(it);
1015 
1016 	/*
1017 	 * Remove it from the queue to be signalled.  Must be done
1018 	 * after itimer is poisoned, because we may have had to wait
1019 	 * for the callout to complete.
1020 	 */
1021 	if (pt->pt_queued) {
1022 		TAILQ_REMOVE(&ptimer_queue, pt, pt_chain);
1023 		pt->pt_queued = false;
1024 	}
1025 
1026 	itimer_fini(it);	/* releases itimer_lock */
1027 	kmem_free(pt, sizeof(*pt));
1028 }
1029 
1030 /*
1031  * ptimers_alloc:
1032  *
1033  *	Allocate a ptimers for the specified process.
1034  */
1035 static struct ptimers *
1036 ptimers_alloc(struct proc *p)
1037 {
1038 	struct ptimers *pts;
1039 	int i;
1040 
1041 	pts = kmem_alloc(sizeof(*pts), KM_SLEEP);
1042 	LIST_INIT(&pts->pts_virtual);
1043 	LIST_INIT(&pts->pts_prof);
1044 	for (i = 0; i < TIMER_MAX; i++)
1045 		pts->pts_timers[i] = NULL;
1046 	itimer_lock();
1047 	if (p->p_timers == NULL) {
1048 		p->p_timers = pts;
1049 		itimer_unlock();
1050 		return pts;
1051 	}
1052 	itimer_unlock();
1053 	kmem_free(pts, sizeof(*pts));
1054 	return p->p_timers;
1055 }
1056 
1057 /*
1058  * ptimers_free:
1059  *
1060  *	Clean up the per-process timers. If "which" is set to TIMERS_ALL,
1061  *	then clean up all timers and free all the data structures. If
1062  *	"which" is set to TIMERS_POSIX, only clean up the timers allocated
1063  *	by timer_create(), not the BSD setitimer() timers, and only free the
1064  *	structure if none of those remain.
1065  *
1066  *	This function is exported because it is needed in the exec and
1067  *	exit code paths.
1068  */
1069 void
1070 ptimers_free(struct proc *p, int which)
1071 {
1072 	struct ptimers *pts;
1073 	struct itimer *itn;
1074 	struct timespec ts;
1075 	int i;
1076 
1077 	if (p->p_timers == NULL)
1078 		return;
1079 
1080 	pts = p->p_timers;
1081 	itimer_lock();
1082 	if (which == TIMERS_ALL) {
1083 		p->p_timers = NULL;
1084 		i = 0;
1085 	} else {
1086 		timespecclear(&ts);
1087 		for (itn = LIST_FIRST(&pts->pts_virtual);
1088 		     itn && itn != pts->pts_timers[ITIMER_VIRTUAL];
1089 		     itn = LIST_NEXT(itn, it_list)) {
1090 			KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
1091 			timespecadd(&ts, &itn->it_time.it_value, &ts);
1092 		}
1093 		LIST_FIRST(&pts->pts_virtual) = NULL;
1094 		if (itn) {
1095 			KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
1096 			timespecadd(&ts, &itn->it_time.it_value,
1097 			    &itn->it_time.it_value);
1098 			LIST_INSERT_HEAD(&pts->pts_virtual, itn, it_list);
1099 		}
1100 		timespecclear(&ts);
1101 		for (itn = LIST_FIRST(&pts->pts_prof);
1102 		     itn && itn != pts->pts_timers[ITIMER_PROF];
1103 		     itn = LIST_NEXT(itn, it_list)) {
1104 			KASSERT(itn->it_clockid == CLOCK_PROF);
1105 			timespecadd(&ts, &itn->it_time.it_value, &ts);
1106 		}
1107 		LIST_FIRST(&pts->pts_prof) = NULL;
1108 		if (itn) {
1109 			KASSERT(itn->it_clockid == CLOCK_PROF);
1110 			timespecadd(&ts, &itn->it_time.it_value,
1111 			    &itn->it_time.it_value);
1112 			LIST_INSERT_HEAD(&pts->pts_prof, itn, it_list);
1113 		}
1114 		i = TIMER_MIN;
1115 	}
1116 	for ( ; i < TIMER_MAX; i++) {
1117 		if (pts->pts_timers[i] != NULL) {
1118 			/* Free the timer and release the lock.  */
1119 			ptimer_free(pts, i);
1120 			/* Reacquire the lock for the next one.  */
1121 			itimer_lock();
1122 		}
1123 	}
1124 	if (pts->pts_timers[0] == NULL && pts->pts_timers[1] == NULL &&
1125 	    pts->pts_timers[2] == NULL && pts->pts_timers[3] == NULL) {
1126 		p->p_timers = NULL;
1127 		itimer_unlock();
1128 		kmem_free(pts, sizeof(*pts));
1129 	} else
1130 		itimer_unlock();
1131 }
1132 
1133 /*
1134  * ptimer_fire:
1135  *
1136  *	Fire a per-process timer.
1137  */
1138 static void
1139 ptimer_fire(struct itimer *it)
1140 {
1141 	struct ptimer *pt = container_of(it, struct ptimer, pt_itimer);
1142 
1143 	KASSERT(itimer_lock_held());
1144 
1145 	/*
1146 	 * XXX Can overrun, but we don't do signal queueing yet, anyway.
1147 	 * XXX Relying on the clock interrupt is stupid.
1148 	 */
1149 	if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
1150 		return;
1151 	}
1152 
1153 	if (!pt->pt_queued) {
1154 		TAILQ_INSERT_TAIL(&ptimer_queue, pt, pt_chain);
1155 		pt->pt_queued = true;
1156 		softint_schedule(ptimer_sih);
1157 	}
1158 }
1159 
1160 /*
1161  * Operations vector for per-process timers (BSD and POSIX).
1162  */
1163 static const struct itimer_ops ptimer_itimer_ops = {
1164 	.ito_fire = ptimer_fire,
1165 };
1166 
1167 /*
1168  * sys_timer_create:
1169  *
1170  *	System call to create a POSIX timer.
1171  */
1172 int
1173 sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap,
1174     register_t *retval)
1175 {
1176 	/* {
1177 		syscallarg(clockid_t) clock_id;
1178 		syscallarg(struct sigevent *) evp;
1179 		syscallarg(timer_t *) timerid;
1180 	} */
1181 
1182 	return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
1183 	    SCARG(uap, evp), copyin, l);
1184 }
1185 
1186 int
1187 timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
1188     copyin_t fetch_event, struct lwp *l)
1189 {
1190 	int error;
1191 	timer_t timerid;
1192 	struct itlist *itl;
1193 	struct ptimers *pts;
1194 	struct ptimer *pt;
1195 	struct proc *p;
1196 
1197 	p = l->l_proc;
1198 
1199 	if ((u_int)id > CLOCK_MONOTONIC)
1200 		return (EINVAL);
1201 
1202 	if ((pts = p->p_timers) == NULL)
1203 		pts = ptimers_alloc(p);
1204 
1205 	pt = kmem_zalloc(sizeof(*pt), KM_SLEEP);
1206 	if (evp != NULL) {
1207 		if (((error =
1208 		    (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
1209 		    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
1210 			(pt->pt_ev.sigev_notify > SIGEV_SA)) ||
1211 			(pt->pt_ev.sigev_notify == SIGEV_SIGNAL &&
1212 			 (pt->pt_ev.sigev_signo <= 0 ||
1213 			  pt->pt_ev.sigev_signo >= NSIG))) {
1214 			kmem_free(pt, sizeof(*pt));
1215 			return (error ? error : EINVAL);
1216 		}
1217 	}
1218 
1219 	/* Find a free timer slot, skipping those reserved for setitimer(). */
1220 	itimer_lock();
1221 	for (timerid = TIMER_MIN; timerid < TIMER_MAX; timerid++)
1222 		if (pts->pts_timers[timerid] == NULL)
1223 			break;
1224 	if (timerid == TIMER_MAX) {
1225 		itimer_unlock();
1226 		kmem_free(pt, sizeof(*pt));
1227 		return EAGAIN;
1228 	}
1229 	if (evp == NULL) {
1230 		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1231 		switch (id) {
1232 		case CLOCK_REALTIME:
1233 		case CLOCK_MONOTONIC:
1234 			pt->pt_ev.sigev_signo = SIGALRM;
1235 			break;
1236 		case CLOCK_VIRTUAL:
1237 			pt->pt_ev.sigev_signo = SIGVTALRM;
1238 			break;
1239 		case CLOCK_PROF:
1240 			pt->pt_ev.sigev_signo = SIGPROF;
1241 			break;
1242 		}
1243 		pt->pt_ev.sigev_value.sival_int = timerid;
1244 	}
1245 
1246 	switch (id) {
1247 	case CLOCK_VIRTUAL:
1248 		itl = &pts->pts_virtual;
1249 		break;
1250 	case CLOCK_PROF:
1251 		itl = &pts->pts_prof;
1252 		break;
1253 	default:
1254 		itl = NULL;
1255 	}
1256 
1257 	itimer_init(&pt->pt_itimer, &ptimer_itimer_ops, id, itl);
1258 	pt->pt_proc = p;
1259 	pt->pt_poverruns = 0;
1260 	pt->pt_entry = timerid;
1261 	pt->pt_queued = false;
1262 
1263 	pts->pts_timers[timerid] = &pt->pt_itimer;
1264 	itimer_unlock();
1265 
1266 	return copyout(&timerid, tid, sizeof(timerid));
1267 }
1268 
1269 /*
1270  * sys_timer_delete:
1271  *
1272  *	System call to delete a POSIX timer.
1273  */
1274 int
1275 sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap,
1276     register_t *retval)
1277 {
1278 	/* {
1279 		syscallarg(timer_t) timerid;
1280 	} */
1281 	struct proc *p = l->l_proc;
1282 	timer_t timerid;
1283 	struct ptimers *pts;
1284 	struct itimer *it, *itn;
1285 
1286 	timerid = SCARG(uap, timerid);
1287 	pts = p->p_timers;
1288 
1289 	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1290 		return (EINVAL);
1291 
1292 	itimer_lock();
1293 	if ((it = pts->pts_timers[timerid]) == NULL) {
1294 		itimer_unlock();
1295 		return (EINVAL);
1296 	}
1297 
1298 	if (CLOCK_VIRTUAL_P(it->it_clockid)) {
1299 		if (it->it_active) {
1300 			itn = LIST_NEXT(it, it_list);
1301 			LIST_REMOVE(it, it_list);
1302 			for ( ; itn; itn = LIST_NEXT(itn, it_list))
1303 				timespecadd(&it->it_time.it_value,
1304 				    &itn->it_time.it_value,
1305 				    &itn->it_time.it_value);
1306 			it->it_active = false;
1307 		}
1308 	}
1309 
1310 	/* Free the timer and release the lock.  */
1311 	ptimer_free(pts, timerid);
1312 
1313 	return (0);
1314 }
1315 
1316 /*
1317  * sys___timer_settime50:
1318  *
1319  *	System call to set/arm a POSIX timer.
1320  */
1321 int
1322 sys___timer_settime50(struct lwp *l,
1323     const struct sys___timer_settime50_args *uap,
1324     register_t *retval)
1325 {
1326 	/* {
1327 		syscallarg(timer_t) timerid;
1328 		syscallarg(int) flags;
1329 		syscallarg(const struct itimerspec *) value;
1330 		syscallarg(struct itimerspec *) ovalue;
1331 	} */
1332 	int error;
1333 	struct itimerspec value, ovalue, *ovp = NULL;
1334 
1335 	if ((error = copyin(SCARG(uap, value), &value,
1336 	    sizeof(struct itimerspec))) != 0)
1337 		return (error);
1338 
1339 	if (SCARG(uap, ovalue))
1340 		ovp = &ovalue;
1341 
1342 	if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp,
1343 	    SCARG(uap, flags), l->l_proc)) != 0)
1344 		return error;
1345 
1346 	if (ovp)
1347 		return copyout(&ovalue, SCARG(uap, ovalue),
1348 		    sizeof(struct itimerspec));
1349 	return 0;
1350 }
1351 
1352 int
1353 dotimer_settime(int timerid, struct itimerspec *value,
1354     struct itimerspec *ovalue, int flags, struct proc *p)
1355 {
1356 	struct timespec now;
1357 	struct itimerspec val, oval;
1358 	struct ptimers *pts;
1359 	struct itimer *it;
1360 	int error;
1361 
1362 	pts = p->p_timers;
1363 
1364 	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1365 		return EINVAL;
1366 	val = *value;
1367 	if ((error = itimespecfix(&val.it_value)) != 0 ||
1368 	    (error = itimespecfix(&val.it_interval)) != 0)
1369 		return error;
1370 
1371 	itimer_lock();
1372  restart:
1373 	if ((it = pts->pts_timers[timerid]) == NULL) {
1374 		itimer_unlock();
1375 		return EINVAL;
1376 	}
1377 
1378 	oval = it->it_time;
1379 	it->it_time = val;
1380 
1381 	/*
1382 	 * If we've been passed a relative time for a realtime timer,
1383 	 * convert it to absolute; if an absolute time for a virtual
1384 	 * timer, convert it to relative and make sure we don't set it
1385 	 * to zero, which would cancel the timer, or let it go
1386 	 * negative, which would confuse the comparison tests.
1387 	 */
1388 	if (timespecisset(&it->it_time.it_value)) {
1389 		if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
1390 			if ((flags & TIMER_ABSTIME) == 0) {
1391 				if (it->it_clockid == CLOCK_REALTIME) {
1392 					getnanotime(&now);
1393 				} else { /* CLOCK_MONOTONIC */
1394 					getnanouptime(&now);
1395 				}
1396 				timespecadd(&it->it_time.it_value, &now,
1397 				    &it->it_time.it_value);
1398 			}
1399 		} else {
1400 			if ((flags & TIMER_ABSTIME) != 0) {
1401 				getnanotime(&now);
1402 				timespecsub(&it->it_time.it_value, &now,
1403 				    &it->it_time.it_value);
1404 				if (!timespecisset(&it->it_time.it_value) ||
1405 				    it->it_time.it_value.tv_sec < 0) {
1406 					it->it_time.it_value.tv_sec = 0;
1407 					it->it_time.it_value.tv_nsec = 1;
1408 				}
1409 			}
1410 		}
1411 	}
1412 
1413 	error = itimer_settime(it);
1414 	if (error == ERESTART) {
1415 		KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
1416 		goto restart;
1417 	}
1418 	KASSERT(error == 0);
1419 	itimer_unlock();
1420 
1421 	if (ovalue)
1422 		*ovalue = oval;
1423 
1424 	return (0);
1425 }
1426 
1427 /*
1428  * sys___timer_gettime50:
1429  *
1430  *	System call to return the time remaining until a POSIX timer fires.
1431  */
1432 int
1433 sys___timer_gettime50(struct lwp *l,
1434     const struct sys___timer_gettime50_args *uap, register_t *retval)
1435 {
1436 	/* {
1437 		syscallarg(timer_t) timerid;
1438 		syscallarg(struct itimerspec *) value;
1439 	} */
1440 	struct itimerspec its;
1441 	int error;
1442 
1443 	if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc,
1444 	    &its)) != 0)
1445 		return error;
1446 
1447 	return copyout(&its, SCARG(uap, value), sizeof(its));
1448 }
1449 
1450 int
1451 dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its)
1452 {
1453 	struct itimer *it;
1454 	struct ptimers *pts;
1455 
1456 	pts = p->p_timers;
1457 	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1458 		return (EINVAL);
1459 	itimer_lock();
1460 	if ((it = pts->pts_timers[timerid]) == NULL) {
1461 		itimer_unlock();
1462 		return (EINVAL);
1463 	}
1464 	itimer_gettime(it, its);
1465 	itimer_unlock();
1466 
1467 	return 0;
1468 }
1469 
1470 /*
1471  * sys_timer_getoverrun:
1472  *
1473  *	System call to return the number of times a POSIX timer has
1474  *	expired while a notification was already pending.  The counter
1475  *	is reset when a timer expires and a notification can be posted.
1476  */
1477 int
1478 sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap,
1479     register_t *retval)
1480 {
1481 	/* {
1482 		syscallarg(timer_t) timerid;
1483 	} */
1484 	struct proc *p = l->l_proc;
1485 	struct ptimers *pts;
1486 	int timerid;
1487 	struct itimer *it;
1488 	struct ptimer *pt;
1489 
1490 	timerid = SCARG(uap, timerid);
1491 
1492 	pts = p->p_timers;
1493 	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1494 		return (EINVAL);
1495 	itimer_lock();
1496 	if ((it = pts->pts_timers[timerid]) == NULL) {
1497 		itimer_unlock();
1498 		return (EINVAL);
1499 	}
1500 	pt = container_of(it, struct ptimer, pt_itimer);
1501 	*retval = pt->pt_poverruns;
1502 	if (*retval >= DELAYTIMER_MAX)
1503 		*retval = DELAYTIMER_MAX;
1504 	itimer_unlock();
1505 
1506 	return (0);
1507 }
1508 
1509 /*
1510  * sys___getitimer50:
1511  *
1512  *	System call to get the time remaining before a BSD timer fires.
1513  */
1514 int
1515 sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap,
1516     register_t *retval)
1517 {
1518 	/* {
1519 		syscallarg(int) which;
1520 		syscallarg(struct itimerval *) itv;
1521 	} */
1522 	struct proc *p = l->l_proc;
1523 	struct itimerval aitv;
1524 	int error;
1525 
1526 	memset(&aitv, 0, sizeof(aitv));
1527 	error = dogetitimer(p, SCARG(uap, which), &aitv);
1528 	if (error)
1529 		return error;
1530 	return (copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval)));
1531 }
1532 
1533 int
1534 dogetitimer(struct proc *p, int which, struct itimerval *itvp)
1535 {
1536 	struct ptimers *pts;
1537 	struct itimer *it;
1538 	struct itimerspec its;
1539 
1540 	if ((u_int)which > ITIMER_MONOTONIC)
1541 		return (EINVAL);
1542 
1543 	itimer_lock();
1544 	pts = p->p_timers;
1545 	if (pts == NULL || (it = pts->pts_timers[which]) == NULL) {
1546 		timerclear(&itvp->it_value);
1547 		timerclear(&itvp->it_interval);
1548 	} else {
1549 		itimer_gettime(it, &its);
1550 		TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value);
1551 		TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval);
1552 	}
1553 	itimer_unlock();
1554 
1555 	return 0;
1556 }
1557 
1558 /*
1559  * sys___setitimer50:
1560  *
1561  *	System call to set/arm a BSD timer.
1562  */
1563 int
1564 sys___setitimer50(struct lwp *l, const struct sys___setitimer50_args *uap,
1565     register_t *retval)
1566 {
1567 	/* {
1568 		syscallarg(int) which;
1569 		syscallarg(const struct itimerval *) itv;
1570 		syscallarg(struct itimerval *) oitv;
1571 	} */
1572 	struct proc *p = l->l_proc;
1573 	int which = SCARG(uap, which);
1574 	struct sys___getitimer50_args getargs;
1575 	const struct itimerval *itvp;
1576 	struct itimerval aitv;
1577 	int error;
1578 
1579 	itvp = SCARG(uap, itv);
1580 	if (itvp &&
1581 	    (error = copyin(itvp, &aitv, sizeof(struct itimerval))) != 0)
1582 		return (error);
1583 	if (SCARG(uap, oitv) != NULL) {
1584 		SCARG(&getargs, which) = which;
1585 		SCARG(&getargs, itv) = SCARG(uap, oitv);
1586 		if ((error = sys___getitimer50(l, &getargs, retval)) != 0)
1587 			return (error);
1588 	}
1589 	if (itvp == 0)
1590 		return (0);
1591 
1592 	return dosetitimer(p, which, &aitv);
1593 }
1594 
1595 int
1596 dosetitimer(struct proc *p, int which, struct itimerval *itvp)
1597 {
1598 	struct timespec now;
1599 	struct ptimers *pts;
1600 	struct ptimer *spare;
1601 	struct itimer *it;
1602 	struct itlist *itl;
1603 	int error;
1604 
1605 	if ((u_int)which > ITIMER_MONOTONIC)
1606 		return (EINVAL);
1607 	if (itimerfix(&itvp->it_value) || itimerfix(&itvp->it_interval))
1608 		return (EINVAL);
1609 
1610 	/*
1611 	 * Don't bother allocating data structures if the process just
1612 	 * wants to clear the timer.
1613 	 */
1614 	spare = NULL;
1615 	pts = p->p_timers;
1616  retry:
1617 	if (!timerisset(&itvp->it_value) && (pts == NULL ||
1618 	    pts->pts_timers[which] == NULL))
1619 		return (0);
1620 	if (pts == NULL)
1621 		pts = ptimers_alloc(p);
1622 	itimer_lock();
1623  restart:
1624 	it = pts->pts_timers[which];
1625 	if (it == NULL) {
1626 		struct ptimer *pt;
1627 
1628 		if (spare == NULL) {
1629 			itimer_unlock();
1630 			spare = kmem_zalloc(sizeof(*spare), KM_SLEEP);
1631 			goto retry;
1632 		}
1633 		pt = spare;
1634 		spare = NULL;
1635 
1636 		it = &pt->pt_itimer;
1637 		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1638 		pt->pt_ev.sigev_value.sival_int = which;
1639 
1640 		switch (which) {
1641 		case ITIMER_REAL:
1642 		case ITIMER_MONOTONIC:
1643 			itl = NULL;
1644 			pt->pt_ev.sigev_signo = SIGALRM;
1645 			break;
1646 		case ITIMER_VIRTUAL:
1647 			itl = &pts->pts_virtual;
1648 			pt->pt_ev.sigev_signo = SIGVTALRM;
1649 			break;
1650 		case ITIMER_PROF:
1651 			itl = &pts->pts_prof;
1652 			pt->pt_ev.sigev_signo = SIGPROF;
1653 			break;
1654 		default:
1655 			panic("%s: can't happen %d", __func__, which);
1656 		}
1657 		itimer_init(it, &ptimer_itimer_ops, which, itl);
1658 		pt->pt_proc = p;
1659 		pt->pt_entry = which;
1660 
1661 		pts->pts_timers[which] = it;
1662 	}
1663 
1664 	TIMEVAL_TO_TIMESPEC(&itvp->it_value, &it->it_time.it_value);
1665 	TIMEVAL_TO_TIMESPEC(&itvp->it_interval, &it->it_time.it_interval);
1666 
1667 	if (timespecisset(&it->it_time.it_value)) {
1668 		/* Convert to absolute time */
1669 		/* XXX need to wrap in splclock for timecounters case? */
1670 		switch (which) {
1671 		case ITIMER_REAL:
1672 			getnanotime(&now);
1673 			timespecadd(&it->it_time.it_value, &now,
1674 			    &it->it_time.it_value);
1675 			break;
1676 		case ITIMER_MONOTONIC:
1677 			getnanouptime(&now);
1678 			timespecadd(&it->it_time.it_value, &now,
1679 			    &it->it_time.it_value);
1680 			break;
1681 		default:
1682 			break;
1683 		}
1684 	}
1685 	error = itimer_settime(it);
1686 	if (error == ERESTART) {
1687 		KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
1688 		goto restart;
1689 	}
1690 	KASSERT(error == 0);
1691 	itimer_unlock();
1692 	if (spare != NULL)
1693 		kmem_free(spare, sizeof(*spare));
1694 
1695 	return (0);
1696 }
1697 
1698 /*
1699  * ptimer_tick:
1700  *
1701  *	Called from hardclock() to decrement per-process virtual timers.
1702  */
1703 void
1704 ptimer_tick(lwp_t *l, bool user)
1705 {
1706 	struct ptimers *pts;
1707 	struct itimer *it;
1708 	proc_t *p;
1709 
1710 	p = l->l_proc;
1711 	if (p->p_timers == NULL)
1712 		return;
1713 
1714 	itimer_lock();
1715 	if ((pts = l->l_proc->p_timers) != NULL) {
1716 		/*
1717 		 * Run current process's virtual and profile time, as needed.
1718 		 */
1719 		if (user && (it = LIST_FIRST(&pts->pts_virtual)) != NULL)
1720 			if (itimer_decr(it, tick * 1000))
1721 				(*it->it_ops->ito_fire)(it);
1722 		if ((it = LIST_FIRST(&pts->pts_prof)) != NULL)
1723 			if (itimer_decr(it, tick * 1000))
1724 				(*it->it_ops->ito_fire)(it);
1725 	}
1726 	itimer_unlock();
1727 }
1728 
1729 /*
1730  * ptimer_intr:
1731  *
1732  *	Software interrupt handler for processing per-process
1733  *	timer expiration.
1734  */
1735 static void
1736 ptimer_intr(void *cookie)
1737 {
1738 	ksiginfo_t ksi;
1739 	struct itimer *it;
1740 	struct ptimer *pt;
1741 	proc_t *p;
1742 
1743 	mutex_enter(&proc_lock);
1744 	itimer_lock();
1745 	while ((pt = TAILQ_FIRST(&ptimer_queue)) != NULL) {
1746 		it = &pt->pt_itimer;
1747 
1748 		TAILQ_REMOVE(&ptimer_queue, pt, pt_chain);
1749 		KASSERT(pt->pt_queued);
1750 		pt->pt_queued = false;
1751 
1752 		p = pt->pt_proc;
1753 		if (p->p_timers == NULL) {
1754 			/* Process is dying. */
1755 			continue;
1756 		}
1757 		if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
1758 			continue;
1759 		}
1760 		if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo)) {
1761 			it->it_overruns++;
1762 			continue;
1763 		}
1764 
1765 		KSI_INIT(&ksi);
1766 		ksi.ksi_signo = pt->pt_ev.sigev_signo;
1767 		ksi.ksi_code = SI_TIMER;
1768 		ksi.ksi_value = pt->pt_ev.sigev_value;
1769 		pt->pt_poverruns = it->it_overruns;
1770 		it->it_overruns = 0;
1771 		itimer_unlock();
1772 		kpsignal(p, &ksi, NULL);
1773 		itimer_lock();
1774 	}
1775 	itimer_unlock();
1776 	mutex_exit(&proc_lock);
1777 }
1778