xref: /openbsd-src/sys/kern/sys_futex.c (revision 41daea4c2ff14977f73b99b147b1609bb011a79f)
1 /*	$OpenBSD: sys_futex.c,v 1.2 2017/04/30 10:10:21 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2016-2017 Martin Pieuchot
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/proc.h>
22 #include <sys/kernel.h>
23 #include <sys/mount.h>
24 #include <sys/syscallargs.h>
25 #include <sys/pool.h>
26 #include <sys/time.h>
27 #include <sys/rwlock.h>
28 #include <sys/futex.h>
29 
30 #ifdef KTRACE
31 #include <sys/ktrace.h>
32 #endif
33 
34 /*
35  * Kernel representation of a futex.
36  */
37 struct futex {
38 	LIST_ENTRY(futex)	 ft_list;	/* list of all futexes */
39 	TAILQ_HEAD(, proc)	 ft_threads;	/* sleeping queue */
40 	uint32_t		*ft_uaddr;	/* userspace address */
41 	pid_t			 ft_pid;	/* process identifier */
42 	unsigned int		 ft_refcnt;	/* # of references */
43 };
44 
45 /* Syscall helpers. */
46 int		 futex_wait(uint32_t *, uint32_t, const struct timespec *);
47 int		 futex_wake(uint32_t *, uint32_t);
48 int		 futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t);
49 
50 /* Flags for futex_get(). */
51 #define FT_CREATE	0x1	/* Create a futex if it doesn't exist. */
52 
53 struct futex	*futex_get(uint32_t *, int);
54 void		 futex_put(struct futex *);
55 
56 /*
57  * The global futex lock serialize futex(2) calls such that no wakeup
58  * event are lost, protect the global list of all futexes and their
59  * states.
60  */
61 struct rwlock			ftlock = RWLOCK_INITIALIZER("futex");
62 static LIST_HEAD(, futex)	ftlist;
63 struct pool			ftpool;
64 
65 
66 void
67 futex_init(void)
68 {
69 	pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, 0, "futexpl",
70 	    NULL);
71 }
72 
73 int
74 sys_futex(struct proc *p, void *v, register_t *retval)
75 {
76 	struct sys_futex_args /* {
77 		syscallarg(uint32_t *) f;
78 		syscallarg(int) op;
79 		syscallarg(inr) val;
80 		syscallarg(const struct timespec *) timeout;
81 		syscallarg(uint32_t *) g;
82 	} */ *uap = v;
83 	uint32_t *uaddr = SCARG(uap, f);
84 	int op = SCARG(uap, op);
85 	uint32_t val = SCARG(uap, val);
86 	const struct timespec *timeout = SCARG(uap, timeout);
87 	void *g = SCARG(uap, g);
88 
89 	switch (op) {
90 	case FUTEX_WAIT:
91 		KERNEL_LOCK();
92 		rw_enter_write(&ftlock);
93 		*retval = futex_wait(uaddr, val, timeout);
94 		rw_exit_write(&ftlock);
95 		KERNEL_UNLOCK();
96 		break;
97 	case FUTEX_WAKE:
98 		rw_enter_write(&ftlock);
99 		*retval = futex_wake(uaddr, val);
100 		rw_exit_write(&ftlock);
101 		break;
102 	case FUTEX_REQUEUE:
103 		rw_enter_write(&ftlock);
104 		*retval = futex_requeue(uaddr, val, g, (unsigned long)timeout);
105 		rw_exit_write(&ftlock);
106 		break;
107 	default:
108 		*retval = ENOSYS;
109 		break;
110 	}
111 
112 	return 0;
113 }
114 
115 /*
116  * Return an existing futex matching userspace address ``uaddr''.
117  *
118  * If such futex does not exist and FT_CREATE is given, create it.
119  */
120 struct futex *
121 futex_get(uint32_t *uaddr, int flag)
122 {
123 	struct futex *f;
124 
125 	rw_assert_wrlock(&ftlock);
126 
127 	LIST_FOREACH(f, &ftlist, ft_list) {
128 		if (f->ft_uaddr == uaddr && f->ft_pid == curproc->p_p->ps_pid) {
129 			f->ft_refcnt++;
130 			break;
131 		}
132 	}
133 
134 	if ((f == NULL) && (flag & FT_CREATE)) {
135 		/*
136 		 * We rely on the rwlock to ensure that no other thread
137 		 * create the same futex.
138 		 */
139 		f = pool_get(&ftpool, PR_WAITOK);
140 		TAILQ_INIT(&f->ft_threads);
141 		f->ft_uaddr = uaddr;
142 		f->ft_pid = curproc->p_p->ps_pid;
143 		f->ft_refcnt = 1;
144 		LIST_INSERT_HEAD(&ftlist, f, ft_list);
145 	}
146 
147 	return f;
148 }
149 
150 /*
151  * Release a given futex.
152  */
153 void
154 futex_put(struct futex *f)
155 {
156 	rw_assert_wrlock(&ftlock);
157 
158 	KASSERT(f->ft_refcnt > 0);
159 
160 	--f->ft_refcnt;
161 	if (f->ft_refcnt == 0) {
162 		KASSERT(TAILQ_EMPTY(&f->ft_threads));
163 		LIST_REMOVE(f, ft_list);
164 		pool_put(&ftpool, f);
165 	}
166 }
167 
168 /*
169  * Put the current thread on the sleep queue of the futex at address
170  * ``uaddr''.  Let it sleep for the specified ``timeout'' time, or
171  * indefinitly if the argument is NULL.
172  */
173 int
174 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout)
175 {
176 	struct proc *p = curproc;
177 	struct futex *f;
178 	uint64_t to_ticks = 0;
179 	uint32_t cval;
180 	int error;
181 
182 	/*
183 	 * After reading the value a race is still possible but
184 	 * we deal with it by serializing all futex syscalls.
185 	 */
186 	rw_assert_wrlock(&ftlock);
187 
188 	/*
189 	 * Read user space futex value
190 	 *
191 	 * XXX copyin(9) is not guaranteed to be atomic.
192 	 */
193 	if ((error = copyin(uaddr, &cval, sizeof(cval))))
194 		return error;
195 
196 	/* If the value changed, stop here. */
197 	if (cval != val)
198 		return EAGAIN;
199 
200 	if (timeout != NULL) {
201 		struct timespec ts;
202 
203 		if ((error = copyin(timeout, &ts, sizeof(ts))))
204 			return error;
205 #ifdef KTRACE
206 		if (KTRPOINT(p, KTR_STRUCT))
207 			ktrabstimespec(p, timeout);
208 #endif
209 		to_ticks = (uint64_t)hz * ts.tv_sec +
210 		    (ts.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
211 		if (to_ticks > INT_MAX)
212 			to_ticks = INT_MAX;
213 	}
214 
215 	f = futex_get(uaddr, FT_CREATE);
216 	TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
217 	p->p_futex = f;
218 
219 	error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks);
220 	if (error == ERESTART)
221 		error = EINTR;
222 	else if (error == EWOULDBLOCK) {
223 		/* A race occured between a wakeup and a timeout. */
224 		if (p->p_futex == NULL)
225 			error = 0;
226 		else
227 			error = ETIMEDOUT;
228 	}
229 
230 	/* Remove ourself if we haven't been awaken. */
231 	if ((f = p->p_futex) != NULL) {
232 		p->p_futex = NULL;
233 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
234 		futex_put(f);
235 	}
236 
237 	return error;
238 }
239 
240 /*
241  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
242  * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at
243  * address ``uaddr2''.
244  */
245 int
246 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m)
247 {
248 	struct futex *f, *g;
249 	struct proc *p;
250 	uint32_t count = 0;
251 
252 	rw_assert_wrlock(&ftlock);
253 
254 	f = futex_get(uaddr, 0);
255 	if (f == NULL)
256 		return 0;
257 
258 	while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) {
259 		p->p_futex = NULL;
260 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
261 		futex_put(f);
262 
263 		if (count < n) {
264 			wakeup_one(p);
265 		} else if (uaddr2 != NULL) {
266 			g = futex_get(uaddr2, FT_CREATE);
267 			TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link);
268 			p->p_futex = g;
269 		}
270 		count++;
271 	}
272 
273 	futex_put(f);
274 
275 	return count;
276 }
277 
278 /*
279  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
280  * ``uaddr''.
281  */
282 int
283 futex_wake(uint32_t *uaddr, uint32_t n)
284 {
285 	return futex_requeue(uaddr, n, NULL, 0);
286 }
287