xref: /openbsd-src/sys/kern/sys_futex.c (revision 3b07b489473d508aa238393c6ea8b51dc122ebfe)
1 /*	$OpenBSD: sys_futex.c,v 1.5 2017/12/19 16:41:43 deraadt Exp $ */
2 
3 /*
4  * Copyright (c) 2016-2017 Martin Pieuchot
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/proc.h>
22 #include <sys/kernel.h>
23 #include <sys/mount.h>
24 #include <sys/syscallargs.h>
25 #include <sys/pool.h>
26 #include <sys/time.h>
27 #include <sys/rwlock.h>
28 #include <sys/futex.h>
29 
30 #ifdef KTRACE
31 #include <sys/ktrace.h>
32 #endif
33 
34 /*
35  * Atomicity is only needed on MULTIPROCESSOR kernels.  Fall back on
36  * copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9)
37  * implementation.
38  */
39 #ifndef MULTIPROCESSOR
40 #define copyin32(uaddr, kaddr)	copyin((uaddr), (kaddr), sizeof(uint32_t))
41 #endif
42 
43 /*
44  * Kernel representation of a futex.
45  */
46 struct futex {
47 	LIST_ENTRY(futex)	 ft_list;	/* list of all futexes */
48 	TAILQ_HEAD(, proc)	 ft_threads;	/* sleeping queue */
49 	uint32_t		*ft_uaddr;	/* userspace address */
50 	pid_t			 ft_pid;	/* process identifier */
51 	unsigned int		 ft_refcnt;	/* # of references */
52 };
53 
54 /* Syscall helpers. */
55 int		 futex_wait(uint32_t *, uint32_t, const struct timespec *);
56 int		 futex_wake(uint32_t *, uint32_t);
57 int		 futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t);
58 
59 /* Flags for futex_get(). */
60 #define FT_CREATE	0x1	/* Create a futex if it doesn't exist. */
61 
62 struct futex	*futex_get(uint32_t *, int);
63 void		 futex_put(struct futex *);
64 
65 /*
66  * The global futex lock serialize futex(2) calls such that no wakeup
67  * event are lost, protect the global list of all futexes and their
68  * states.
69  */
70 struct rwlock			ftlock = RWLOCK_INITIALIZER("futex");
71 static LIST_HEAD(, futex)	ftlist;
72 struct pool			ftpool;
73 
74 
75 void
76 futex_init(void)
77 {
78 	pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE,
79 	    PR_WAITOK | PR_RWLOCK, "futexpl", NULL);
80 }
81 
82 int
83 sys_futex(struct proc *p, void *v, register_t *retval)
84 {
85 	struct sys_futex_args /* {
86 		syscallarg(uint32_t *) f;
87 		syscallarg(int) op;
88 		syscallarg(inr) val;
89 		syscallarg(const struct timespec *) timeout;
90 		syscallarg(uint32_t *) g;
91 	} */ *uap = v;
92 	uint32_t *uaddr = SCARG(uap, f);
93 	int op = SCARG(uap, op);
94 	uint32_t val = SCARG(uap, val);
95 	const struct timespec *timeout = SCARG(uap, timeout);
96 	void *g = SCARG(uap, g);
97 
98 	switch (op) {
99 	case FUTEX_WAIT:
100 		KERNEL_LOCK();
101 		rw_enter_write(&ftlock);
102 		*retval = futex_wait(uaddr, val, timeout);
103 		rw_exit_write(&ftlock);
104 		KERNEL_UNLOCK();
105 		break;
106 	case FUTEX_WAKE:
107 		rw_enter_write(&ftlock);
108 		*retval = futex_wake(uaddr, val);
109 		rw_exit_write(&ftlock);
110 		break;
111 	case FUTEX_REQUEUE:
112 		rw_enter_write(&ftlock);
113 		*retval = futex_requeue(uaddr, val, g, (unsigned long)timeout);
114 		rw_exit_write(&ftlock);
115 		break;
116 	default:
117 		*retval = ENOSYS;
118 		break;
119 	}
120 
121 	return 0;
122 }
123 
124 /*
125  * Return an existing futex matching userspace address ``uaddr''.
126  *
127  * If such futex does not exist and FT_CREATE is given, create it.
128  */
129 struct futex *
130 futex_get(uint32_t *uaddr, int flag)
131 {
132 	struct proc *p = curproc;
133 	struct futex *f;
134 
135 	rw_assert_wrlock(&ftlock);
136 
137 	LIST_FOREACH(f, &ftlist, ft_list) {
138 		if (f->ft_uaddr == uaddr && f->ft_pid == p->p_p->ps_pid) {
139 			f->ft_refcnt++;
140 			break;
141 		}
142 	}
143 
144 	if ((f == NULL) && (flag & FT_CREATE)) {
145 		/*
146 		 * We rely on the rwlock to ensure that no other thread
147 		 * create the same futex.
148 		 */
149 		f = pool_get(&ftpool, PR_WAITOK);
150 		TAILQ_INIT(&f->ft_threads);
151 		f->ft_uaddr = uaddr;
152 		f->ft_pid = p->p_p->ps_pid;
153 		f->ft_refcnt = 1;
154 		LIST_INSERT_HEAD(&ftlist, f, ft_list);
155 	}
156 
157 	return f;
158 }
159 
160 /*
161  * Release a given futex.
162  */
163 void
164 futex_put(struct futex *f)
165 {
166 	rw_assert_wrlock(&ftlock);
167 
168 	KASSERT(f->ft_refcnt > 0);
169 
170 	--f->ft_refcnt;
171 	if (f->ft_refcnt == 0) {
172 		KASSERT(TAILQ_EMPTY(&f->ft_threads));
173 		LIST_REMOVE(f, ft_list);
174 		pool_put(&ftpool, f);
175 	}
176 }
177 
178 /*
179  * Put the current thread on the sleep queue of the futex at address
180  * ``uaddr''.  Let it sleep for the specified ``timeout'' time, or
181  * indefinitly if the argument is NULL.
182  */
183 int
184 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout)
185 {
186 	struct proc *p = curproc;
187 	struct futex *f;
188 	uint64_t to_ticks = 0;
189 	uint32_t cval;
190 	int error;
191 
192 	/*
193 	 * After reading the value a race is still possible but
194 	 * we deal with it by serializing all futex syscalls.
195 	 */
196 	rw_assert_wrlock(&ftlock);
197 
198 	/*
199 	 * Read user space futex value
200 	 */
201 	if ((error = copyin32(uaddr, &cval)))
202 		return error;
203 
204 	/* If the value changed, stop here. */
205 	if (cval != val)
206 		return EAGAIN;
207 
208 	if (timeout != NULL) {
209 		struct timespec ts;
210 
211 		if ((error = copyin(timeout, &ts, sizeof(ts))))
212 			return error;
213 #ifdef KTRACE
214 		if (KTRPOINT(p, KTR_STRUCT))
215 			ktrabstimespec(p, timeout);
216 #endif
217 		to_ticks = (uint64_t)hz * ts.tv_sec +
218 		    (ts.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
219 		if (to_ticks > INT_MAX)
220 			to_ticks = INT_MAX;
221 	}
222 
223 	f = futex_get(uaddr, FT_CREATE);
224 	TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
225 	p->p_futex = f;
226 
227 	error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks);
228 	if (error == ERESTART)
229 		error = EINTR;
230 	else if (error == EWOULDBLOCK) {
231 		/* A race occured between a wakeup and a timeout. */
232 		if (p->p_futex == NULL)
233 			error = 0;
234 		else
235 			error = ETIMEDOUT;
236 	}
237 
238 	/* Remove ourself if we haven't been awaken. */
239 	if ((f = p->p_futex) != NULL) {
240 		p->p_futex = NULL;
241 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
242 		futex_put(f);
243 	}
244 
245 	return error;
246 }
247 
248 /*
249  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
250  * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at
251  * address ``uaddr2''.
252  */
253 int
254 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m)
255 {
256 	struct futex *f, *g;
257 	struct proc *p;
258 	uint32_t count = 0;
259 
260 	rw_assert_wrlock(&ftlock);
261 
262 	f = futex_get(uaddr, 0);
263 	if (f == NULL)
264 		return 0;
265 
266 	while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) {
267 		p->p_futex = NULL;
268 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
269 		futex_put(f);
270 
271 		if (count < n) {
272 			wakeup_one(p);
273 		} else if (uaddr2 != NULL) {
274 			g = futex_get(uaddr2, FT_CREATE);
275 			TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link);
276 			p->p_futex = g;
277 		}
278 		count++;
279 	}
280 
281 	futex_put(f);
282 
283 	return count;
284 }
285 
286 /*
287  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
288  * ``uaddr''.
289  */
290 int
291 futex_wake(uint32_t *uaddr, uint32_t n)
292 {
293 	return futex_requeue(uaddr, n, NULL, 0);
294 }
295