xref: /netbsd-src/sys/kern/sys_timerfd.c (revision fb5eed702691094bd687fbf1ded189c87457cd35)
1 /*	$NetBSD: sys_timerfd.c,v 1.6 2021/09/27 00:40:49 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: sys_timerfd.c,v 1.6 2021/09/27 00:40:49 thorpej Exp $");
34 
35 /*
36  * timerfd
37  *
38  * Timerfd objects are similar to POSIX timers, except they are associated
39  * with a file descriptor rather than a process.  Timerfd objects are
40  * created with the timerfd_create(2) system call, similar to timer_create(2).
41  * The timerfd analogues for timer_gettime(2) and timer_settime(2) are
42  * timerfd_gettime(2) and timerfd_settime(2), respectively.
43  *
44  * When a timerfd object's timer fires, an internal counter is incremented.
45  * When this counter is non-zero, the descriptor associated with the timerfd
46  * object is "readable".  Note that this is slightly different than the
47  * POSIX timer "overrun" counter, which only increments if the timer fires
48  * again while the notification signal is already pending.  Thus, we are
49  * responsible for incrementing the "overrun" counter each time the timerfd
50  * timer fires.
51  *
52  * This implementation is API compatible with the Linux timerfd interface.
53  */
54 
55 #include <sys/param.h>
56 #include <sys/types.h>
57 #include <sys/condvar.h>
58 #include <sys/file.h>
59 #include <sys/filedesc.h>
60 #include <sys/kauth.h>
61 #include <sys/mutex.h>
62 #include <sys/poll.h>
63 #include <sys/proc.h>
64 #include <sys/select.h>
65 #include <sys/stat.h>
66 #include <sys/syscallargs.h>
67 #include <sys/timerfd.h>
68 #include <sys/uio.h>
69 
70 /* N.B. all timerfd state is protected by itimer_lock() */
71 struct timerfd {
72 	struct itimer	tfd_itimer;
73 	kcondvar_t	tfd_read_wait;
74 	kcondvar_t	tfd_restart_wait;
75 	struct selinfo	tfd_read_sel;
76 	int64_t		tfd_nwaiters;
77 	bool		tfd_cancel_on_set;
78 	bool		tfd_cancelled;
79 	bool		tfd_restarting;
80 
81 	/*
82 	 * Information kept for stat(2).
83 	 */
84 	struct timespec tfd_btime;	/* time created */
85 	struct timespec	tfd_mtime;	/* last timerfd_settime() */
86 	struct timespec	tfd_atime;	/* last read */
87 };
88 
89 static void	timerfd_wake(struct timerfd *);
90 
91 static inline uint64_t
92 timerfd_fire_count(const struct timerfd * const tfd)
93 {
94 	return (unsigned int)tfd->tfd_itimer.it_overruns;
95 }
96 
97 static inline bool
98 timerfd_is_readable(const struct timerfd * const tfd)
99 {
100 	return tfd->tfd_itimer.it_overruns != 0 || tfd->tfd_cancelled;
101 }
102 
103 /*
104  * timerfd_fire:
105  *
106  *	Called when the timerfd's timer fires.
107  *
108  *	Called from a callout with itimer lock held.
109  */
110 static void
111 timerfd_fire(struct itimer * const it)
112 {
113 	struct timerfd * const tfd =
114 	    container_of(it, struct timerfd, tfd_itimer);
115 
116 	it->it_overruns++;
117 	timerfd_wake(tfd);
118 }
119 
120 /*
121  * timerfd_realtime_changed:
122  *
123  *	Called when CLOCK_REALTIME is changed with clock_settime()
124  *	or settimeofday().
125  *
126  *	Called with itimer lock held.
127  */
128 static void
129 timerfd_realtime_changed(struct itimer * const it)
130 {
131 	struct timerfd * const tfd =
132 	    container_of(it, struct timerfd, tfd_itimer);
133 
134 	/* Should only be called when timer is armed. */
135 	KASSERT(timespecisset(&it->it_time.it_value));
136 
137 	if (tfd->tfd_cancel_on_set) {
138 		tfd->tfd_cancelled = true;
139 		timerfd_wake(tfd);
140 	}
141 }
142 
143 static const struct itimer_ops timerfd_itimer_monotonic_ops = {
144 	.ito_fire = timerfd_fire,
145 };
146 
147 static const struct itimer_ops timerfd_itimer_realtime_ops = {
148 	.ito_fire = timerfd_fire,
149 	.ito_realtime_changed = timerfd_realtime_changed,
150 };
151 
152 /*
153  * timerfd_create:
154  *
155  *	Create a timerfd object.
156  */
157 static struct timerfd *
158 timerfd_create(clockid_t const clock_id, int const flags)
159 {
160 	struct timerfd * const tfd = kmem_zalloc(sizeof(*tfd), KM_SLEEP);
161 
162 	KASSERT(clock_id == CLOCK_REALTIME || clock_id == CLOCK_MONOTONIC);
163 
164 	cv_init(&tfd->tfd_read_wait, "tfdread");
165 	cv_init(&tfd->tfd_restart_wait, "tfdrstrt");
166 	selinit(&tfd->tfd_read_sel);
167 	getnanotime(&tfd->tfd_btime);
168 
169 	/* Caller deals with TFD_CLOEXEC and TFD_NONBLOCK. */
170 
171 	itimer_lock();
172 	itimer_init(&tfd->tfd_itimer,
173 	    clock_id == CLOCK_REALTIME ? &timerfd_itimer_realtime_ops
174 				       : &timerfd_itimer_monotonic_ops,
175 	    clock_id, NULL);
176 	itimer_unlock();
177 
178 	return tfd;
179 }
180 
181 /*
182  * timerfd_destroy:
183  *
184  *	Destroy a timerfd object.
185  */
186 static void
187 timerfd_destroy(struct timerfd * const tfd)
188 {
189 
190 	KASSERT(tfd->tfd_nwaiters == 0);
191 	KASSERT(tfd->tfd_restarting == false);
192 
193 	itimer_lock();
194 	itimer_poison(&tfd->tfd_itimer);
195 	itimer_fini(&tfd->tfd_itimer);	/* drops itimer lock */
196 
197 	cv_destroy(&tfd->tfd_read_wait);
198 	cv_destroy(&tfd->tfd_restart_wait);
199 
200 	seldestroy(&tfd->tfd_read_sel);
201 
202 	kmem_free(tfd, sizeof(*tfd));
203 }
204 
205 /*
206  * timerfd_wait:
207  *
208  *	Block on a timerfd.  Handles non-blocking, as well as
209  *	the restart cases.
210  */
211 static int
212 timerfd_wait(struct timerfd * const tfd, int const fflag)
213 {
214 	extern kmutex_t	itimer_mutex;	/* XXX */
215 	int error;
216 
217 	if (fflag & FNONBLOCK) {
218 		return EAGAIN;
219 	}
220 
221 	/*
222 	 * We're going to block.  If there is a restart in-progress,
223 	 * wait for that to complete first.
224 	 */
225 	while (tfd->tfd_restarting) {
226 		cv_wait(&tfd->tfd_restart_wait, &itimer_mutex);
227 	}
228 
229 	tfd->tfd_nwaiters++;
230 	KASSERT(tfd->tfd_nwaiters > 0);
231 	error = cv_wait_sig(&tfd->tfd_read_wait, &itimer_mutex);
232 	tfd->tfd_nwaiters--;
233 	KASSERT(tfd->tfd_nwaiters >= 0);
234 
235 	/*
236 	 * If a restart was triggered while we were asleep, we need
237 	 * to return ERESTART if no other error was returned.  If we
238 	 * are the last waiter coming out of the restart drain, clear
239 	 * the condition.
240 	 */
241 	if (tfd->tfd_restarting) {
242 		if (error == 0) {
243 			error = ERESTART;
244 		}
245 		if (tfd->tfd_nwaiters == 0) {
246 			tfd->tfd_restarting = false;
247 			cv_broadcast(&tfd->tfd_restart_wait);
248 		}
249 	}
250 
251 	return error;
252 }
253 
254 /*
255  * timerfd_wake:
256  *
257  *	Wake LWPs blocked on a timerfd.
258  */
259 static void
260 timerfd_wake(struct timerfd * const tfd)
261 {
262 
263 	if (tfd->tfd_nwaiters) {
264 		cv_broadcast(&tfd->tfd_read_wait);
265 	}
266 	selnotify(&tfd->tfd_read_sel, POLLIN | POLLRDNORM, NOTE_SUBMIT);
267 }
268 
269 /*
270  * timerfd file operations
271  */
272 
273 static int
274 timerfd_fop_read(file_t * const fp, off_t * const offset,
275     struct uio * const uio, kauth_cred_t const cred, int const flags)
276 {
277 	struct timerfd * const tfd = fp->f_timerfd;
278 	struct itimer * const it = &tfd->tfd_itimer;
279 	int const fflag = fp->f_flag;
280 	uint64_t return_value;
281 	int error;
282 
283 	if (uio->uio_resid < sizeof(uint64_t)) {
284 		return EINVAL;
285 	}
286 
287 	itimer_lock();
288 
289 	while (!timerfd_is_readable(tfd)) {
290 		if ((error = timerfd_wait(tfd, fflag)) != 0) {
291 			itimer_unlock();
292 			return error;
293 		}
294 	}
295 
296 	if (tfd->tfd_cancelled) {
297 		itimer_unlock();
298 		return ECANCELED;
299 	}
300 
301 	return_value = timerfd_fire_count(tfd);
302 	it->it_overruns = 0;
303 
304 	getnanotime(&tfd->tfd_atime);
305 
306 	itimer_unlock();
307 
308 	error = uiomove(&return_value, sizeof(return_value), uio);
309 
310 	return error;
311 }
312 
313 static int
314 timerfd_fop_ioctl(file_t * const fp, unsigned long const cmd, void * const data)
315 {
316 	struct timerfd * const tfd = fp->f_timerfd;
317 	int error = 0;
318 
319 	switch (cmd) {
320 	case TFD_IOC_SET_TICKS: {
321 		const uint64_t * const new_ticksp = data;
322 		if (*new_ticksp > INT_MAX) {
323 			return EINVAL;
324 		}
325 		itimer_lock();
326 		tfd->tfd_itimer.it_overruns = (int)*new_ticksp;
327 		itimer_unlock();
328 		break;
329 	    }
330 
331 	default:
332 		error = EPASSTHROUGH;
333 	}
334 
335 	return error;
336 }
337 
338 static int
339 timerfd_fop_poll(file_t * const fp, int const events)
340 {
341 	struct timerfd * const tfd = fp->f_timerfd;
342 	int revents = events & (POLLOUT | POLLWRNORM);
343 
344 	if (events & (POLLIN | POLLRDNORM)) {
345 		itimer_lock();
346 		if (timerfd_is_readable(tfd)) {
347 			revents |= events & (POLLIN | POLLRDNORM);
348 		} else {
349 			selrecord(curlwp, &tfd->tfd_read_sel);
350 		}
351 		itimer_unlock();
352 	}
353 
354 	return revents;
355 }
356 
357 static int
358 timerfd_fop_stat(file_t * const fp, struct stat * const st)
359 {
360 	struct timerfd * const tfd = fp->f_timerfd;
361 
362 	memset(st, 0, sizeof(*st));
363 
364 	itimer_lock();
365 	st->st_size = (off_t)timerfd_fire_count(tfd);
366 	st->st_atimespec = tfd->tfd_atime;
367 	st->st_mtimespec = tfd->tfd_mtime;
368 	itimer_unlock();
369 
370 	st->st_blksize = sizeof(uint64_t);
371 	st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
372 	st->st_blocks = 1;
373 	st->st_birthtimespec = tfd->tfd_btime;
374 	st->st_ctimespec = st->st_mtimespec;
375 	st->st_uid = kauth_cred_geteuid(fp->f_cred);
376 	st->st_gid = kauth_cred_getegid(fp->f_cred);
377 
378 	return 0;
379 }
380 
381 static int
382 timerfd_fop_close(file_t * const fp)
383 {
384 	struct timerfd * const tfd = fp->f_timerfd;
385 
386 	fp->f_timerfd = NULL;
387 	timerfd_destroy(tfd);
388 
389 	return 0;
390 }
391 
392 static void
393 timerfd_filt_read_detach(struct knote * const kn)
394 {
395 	struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
396 
397 	itimer_lock();
398 	KASSERT(kn->kn_hook == tfd);
399 	selremove_knote(&tfd->tfd_read_sel, kn);
400 	itimer_unlock();
401 }
402 
403 static int
404 timerfd_filt_read(struct knote * const kn, long const hint)
405 {
406 	struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
407 	int rv;
408 
409 	if (hint & NOTE_SUBMIT) {
410 		KASSERT(itimer_lock_held());
411 	} else {
412 		itimer_lock();
413 	}
414 
415 	kn->kn_data = (int64_t)timerfd_fire_count(tfd);
416 	rv = kn->kn_data != 0;
417 
418 	if ((hint & NOTE_SUBMIT) == 0) {
419 		itimer_unlock();
420 	}
421 
422 	return rv;
423 }
424 
425 static const struct filterops timerfd_read_filterops = {
426 	.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
427 	.f_detach = timerfd_filt_read_detach,
428 	.f_event = timerfd_filt_read,
429 };
430 
431 static int
432 timerfd_fop_kqfilter(file_t * const fp, struct knote * const kn)
433 {
434 	struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
435 	struct selinfo *sel;
436 
437 	switch (kn->kn_filter) {
438 	case EVFILT_READ:
439 		sel = &tfd->tfd_read_sel;
440 		kn->kn_fop = &timerfd_read_filterops;
441 		break;
442 
443 	default:
444 		return EINVAL;
445 	}
446 
447 	kn->kn_hook = tfd;
448 
449 	itimer_lock();
450 	selrecord_knote(sel, kn);
451 	itimer_unlock();
452 
453 	return 0;
454 }
455 
456 static void
457 timerfd_fop_restart(file_t * const fp)
458 {
459 	struct timerfd * const tfd = fp->f_timerfd;
460 
461 	/*
462 	 * Unblock blocked reads in order to allow close() to complete.
463 	 * System calls return ERESTART so that the fd is revalidated.
464 	 */
465 
466 	itimer_lock();
467 
468 	if (tfd->tfd_nwaiters != 0) {
469 		tfd->tfd_restarting = true;
470 		cv_broadcast(&tfd->tfd_read_wait);
471 	}
472 
473 	itimer_unlock();
474 }
475 
476 static const struct fileops timerfd_fileops = {
477 	.fo_name = "timerfd",
478 	.fo_read = timerfd_fop_read,
479 	.fo_write = fbadop_write,
480 	.fo_ioctl = timerfd_fop_ioctl,
481 	.fo_fcntl = fnullop_fcntl,
482 	.fo_poll = timerfd_fop_poll,
483 	.fo_stat = timerfd_fop_stat,
484 	.fo_close = timerfd_fop_close,
485 	.fo_kqfilter = timerfd_fop_kqfilter,
486 	.fo_restart = timerfd_fop_restart,
487 };
488 
489 /*
490  * timerfd_create(2) system call
491  */
492 int
493 do_timerfd_create(struct lwp * const l, clockid_t const clock_id,
494     int const flags, register_t *retval)
495 {
496 	file_t *fp;
497 	int fd, error;
498 
499 	if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) {
500 		return EINVAL;
501 	}
502 
503 	switch (clock_id) {
504 	case CLOCK_REALTIME:
505 	case CLOCK_MONOTONIC:
506 		/* allowed */
507 		break;
508 
509 	default:
510 		return EINVAL;
511 	}
512 
513 	if ((error = fd_allocfile(&fp, &fd)) != 0) {
514 		return error;
515 	}
516 
517 	fp->f_flag = FREAD;
518 	if (flags & TFD_NONBLOCK) {
519 		fp->f_flag |= FNONBLOCK;
520 	}
521 	fp->f_type = DTYPE_TIMERFD;
522 	fp->f_ops = &timerfd_fileops;
523 	fp->f_timerfd = timerfd_create(clock_id, flags);
524 	fd_set_exclose(l, fd, !!(flags & TFD_CLOEXEC));
525 	fd_affix(curproc, fp, fd);
526 
527 	*retval = fd;
528 	return 0;
529 }
530 
531 int
532 sys_timerfd_create(struct lwp *l, const struct sys_timerfd_create_args *uap,
533     register_t *retval)
534 {
535 	/* {
536 		syscallarg(clockid_t) clock_id;
537 		syscallarg(int) flags;
538 	} */
539 
540 	return do_timerfd_create(l, SCARG(uap, clock_id), SCARG(uap, flags),
541 	    retval);
542 }
543 
544 /*
545  * timerfd_gettime(2) system call.
546  */
547 int
548 do_timerfd_gettime(struct lwp *l, int fd, struct itimerspec *curr_value,
549     register_t *retval)
550 {
551 	file_t *fp;
552 
553 	if ((fp = fd_getfile(fd)) == NULL) {
554 		return EBADF;
555 	}
556 
557 	if (fp->f_ops != &timerfd_fileops) {
558 		fd_putfile(fd);
559 		return EINVAL;
560 	}
561 
562 	struct timerfd * const tfd = fp->f_timerfd;
563 	itimer_lock();
564 	itimer_gettime(&tfd->tfd_itimer, curr_value);
565 	itimer_unlock();
566 
567 	fd_putfile(fd);
568 	return 0;
569 }
570 
571 int
572 sys_timerfd_gettime(struct lwp *l, const struct sys_timerfd_gettime_args *uap,
573     register_t *retval)
574 {
575 	/* {
576 		syscallarg(int) fd;
577 		syscallarg(struct itimerspec *) curr_value;
578 	} */
579 
580 	struct itimerspec oits;
581 	int error;
582 
583 	error = do_timerfd_gettime(l, SCARG(uap, fd), &oits, retval);
584 	if (error == 0) {
585 		error = copyout(&oits, SCARG(uap, curr_value), sizeof(oits));
586 	}
587 	return error;
588 }
589 
590 /*
591  * timerfd_settime(2) system call.
592  */
593 int
594 do_timerfd_settime(struct lwp *l, int fd, int flags,
595     const struct itimerspec *new_value, struct itimerspec *old_value,
596     register_t *retval)
597 {
598 	file_t *fp;
599 	int error;
600 
601 	if (flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) {
602 		return EINVAL;
603 	}
604 
605 	if ((fp = fd_getfile(fd)) == NULL) {
606 		return EBADF;
607 	}
608 
609 	if (fp->f_ops != &timerfd_fileops) {
610 		fd_putfile(fd);
611 		return EINVAL;
612 	}
613 
614 	struct timerfd * const tfd = fp->f_timerfd;
615 	struct itimer * const it = &tfd->tfd_itimer;
616 
617 	itimer_lock();
618 
619  restart:
620 	if (old_value != NULL) {
621 		*old_value = it->it_time;
622 	}
623 	it->it_time = *new_value;
624 
625 	/*
626 	 * If we've been passed a relative value, convert it to an
627 	 * absolute, as that's what the itimer facility expects for
628 	 * non-virtual timers.  Also ensure that this doesn't set it
629 	 * to zero or lets it go negative.
630 	 * XXXJRT re-factor.
631 	 */
632 	if (timespecisset(&it->it_time.it_value) &&
633 	    (flags & TFD_TIMER_ABSTIME) == 0) {
634 		struct timespec now;
635 		if (it->it_clockid == CLOCK_REALTIME) {
636 			getnanotime(&now);
637 		} else { /* CLOCK_MONOTONIC */
638 			getnanouptime(&now);
639 		}
640 		timespecadd(&it->it_time.it_value, &now,
641 		    &it->it_time.it_value);
642 	}
643 
644 	error = itimer_settime(it);
645 	if (error == ERESTART) {
646 		goto restart;
647 	}
648 	KASSERT(error == 0);
649 
650 	/* Reset the expirations counter. */
651 	it->it_overruns = 0;
652 
653 	if (it->it_clockid == CLOCK_REALTIME) {
654 		tfd->tfd_cancelled = false;
655 		tfd->tfd_cancel_on_set = !!(flags & TFD_TIMER_CANCEL_ON_SET);
656 	}
657 
658 	getnanotime(&tfd->tfd_mtime);
659 	itimer_unlock();
660 
661 	fd_putfile(fd);
662 	return error;
663 }
664 
665 int
666 sys_timerfd_settime(struct lwp *l, const struct sys_timerfd_settime_args *uap,
667     register_t *retval)
668 {
669 	/* {
670 		syscallarg(int) fd;
671 		syscallarg(int) flags;
672 		syscallarg(const struct itimerspec *) new_value;
673 		syscallarg(struct itimerspec *) old_value;
674 	} */
675 
676 	struct itimerspec nits, oits, *oitsp = NULL;
677 	int error;
678 
679 	error = copyin(SCARG(uap, new_value), &nits, sizeof(nits));
680 	if (error) {
681 		return error;
682 	}
683 
684 	if (SCARG(uap, old_value) != NULL) {
685 		oitsp = &oits;
686 	}
687 
688 	error = do_timerfd_settime(l, SCARG(uap, fd), SCARG(uap, flags),
689 	    &nits, oitsp, retval);
690 	if (error == 0 && oitsp != NULL) {
691 		error = copyout(oitsp, SCARG(uap, old_value), sizeof(*oitsp));
692 	}
693 	return error;
694 }
695