xref: /openbsd-src/sys/kern/sys_futex.c (revision 5a38ef86d0b61900239c7913d24a05e7b88a58f0)
1 /*	$OpenBSD: sys_futex.c,v 1.20 2021/11/22 14:57:17 visa Exp $ */
2 
3 /*
4  * Copyright (c) 2016-2017 Martin Pieuchot
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/proc.h>
22 #include <sys/kernel.h>
23 #include <sys/mount.h>
24 #include <sys/syscallargs.h>
25 #include <sys/pool.h>
26 #include <sys/time.h>
27 #include <sys/rwlock.h>
28 #include <sys/futex.h>
29 
30 #ifdef KTRACE
31 #include <sys/ktrace.h>
32 #endif
33 
34 #include <uvm/uvm.h>
35 
36 /*
37  * Atomicity is only needed on MULTIPROCESSOR kernels.  Fall back on
38  * copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9)
39  * implementation.
40  */
41 #ifndef MULTIPROCESSOR
42 #define copyin32(uaddr, kaddr)	copyin((uaddr), (kaddr), sizeof(uint32_t))
43 #endif
44 
45 /*
46  * Kernel representation of a futex.
47  */
48 struct futex {
49 	LIST_ENTRY(futex)	 ft_list;	/* list of all futexes */
50 	TAILQ_HEAD(, proc)	 ft_threads;	/* sleeping queue */
51 	struct uvm_object	*ft_obj;	/* UVM object */
52 	struct vm_amap		*ft_amap;	/* UVM amap */
53 	voff_t			 ft_off;	/* UVM offset */
54 	unsigned int		 ft_refcnt;	/* # of references */
55 };
56 
57 /* Syscall helpers. */
58 int	 futex_wait(uint32_t *, uint32_t, const struct timespec *, int);
59 int	 futex_wake(uint32_t *, uint32_t, int);
60 int	 futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int);
61 
62 /* Flags for futex_get(). */
63 #define FT_CREATE	0x1	/* Create a futex if it doesn't exist. */
64 #define FT_PRIVATE	0x2	/* Futex is process-private. */
65 
66 struct futex *futex_get(uint32_t *, int);
67 void	 futex_put(struct futex *);
68 
69 /*
70  * The global futex lock serializes futex(2) calls so that no wakeup
71  * event is lost, and protects all futex lists and futex states.
72  */
73 struct rwlock			ftlock = RWLOCK_INITIALIZER("futex");
74 static struct futex_list	ftlist_shared =
75 				    LIST_HEAD_INITIALIZER(ftlist_shared);
76 struct pool			ftpool;
77 
78 
79 void
80 futex_init(void)
81 {
82 	pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE,
83 	    PR_WAITOK | PR_RWLOCK, "futexpl", NULL);
84 }
85 
86 int
87 sys_futex(struct proc *p, void *v, register_t *retval)
88 {
89 	struct sys_futex_args /* {
90 		syscallarg(uint32_t *) f;
91 		syscallarg(int) op;
92 		syscallarg(inr) val;
93 		syscallarg(const struct timespec *) timeout;
94 		syscallarg(uint32_t *) g;
95 	} */ *uap = v;
96 	uint32_t *uaddr = SCARG(uap, f);
97 	int op = SCARG(uap, op);
98 	uint32_t val = SCARG(uap, val);
99 	const struct timespec *timeout = SCARG(uap, timeout);
100 	void *g = SCARG(uap, g);
101 	int flags = 0;
102 	int error = 0;
103 
104 	if (op & FUTEX_PRIVATE_FLAG)
105 		flags |= FT_PRIVATE;
106 
107 	rw_enter_write(&ftlock);
108 	switch (op) {
109 	case FUTEX_WAIT:
110 	case FUTEX_WAIT_PRIVATE:
111 		error = futex_wait(uaddr, val, timeout, flags);
112 		break;
113 	case FUTEX_WAKE:
114 	case FUTEX_WAKE_PRIVATE:
115 		*retval = futex_wake(uaddr, val, flags);
116 		break;
117 	case FUTEX_REQUEUE:
118 	case FUTEX_REQUEUE_PRIVATE:
119 		*retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags);
120 		break;
121 	default:
122 		error = ENOSYS;
123 		break;
124 	}
125 	rw_exit_write(&ftlock);
126 
127 	return error;
128 }
129 
130 /*
131  * Return an existing futex matching userspace address ``uaddr''.
132  *
133  * If such futex does not exist and FT_CREATE is given, create it.
134  */
135 struct futex *
136 futex_get(uint32_t *uaddr, int flags)
137 {
138 	struct proc *p = curproc;
139 	vm_map_t map = &p->p_vmspace->vm_map;
140 	vm_map_entry_t entry;
141 	struct uvm_object *obj = NULL;
142 	struct vm_amap *amap = NULL;
143 	voff_t off = (vaddr_t)uaddr;
144 	struct futex *f;
145 	struct futex_list *ftlist = &p->p_p->ps_ftlist;
146 
147 	rw_assert_wrlock(&ftlock);
148 
149 	if (!(flags & FT_PRIVATE)) {
150 		vm_map_lock_read(map);
151 		if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) &&
152 		    entry->inheritance == MAP_INHERIT_SHARE) {
153 			if (UVM_ET_ISOBJ(entry)) {
154 				ftlist = &ftlist_shared;
155 				obj = entry->object.uvm_obj;
156 				off = entry->offset +
157 				    ((vaddr_t)uaddr - entry->start);
158 			} else if (entry->aref.ar_amap) {
159 				ftlist = &ftlist_shared;
160 				amap = entry->aref.ar_amap;
161 				off = ptoa(entry->aref.ar_pageoff) +
162 				    ((vaddr_t)uaddr - entry->start);
163 			}
164 		}
165 		vm_map_unlock_read(map);
166 	}
167 
168 	LIST_FOREACH(f, ftlist, ft_list) {
169 		if (f->ft_obj == obj && f->ft_amap == amap &&
170 		    f->ft_off == off) {
171 			f->ft_refcnt++;
172 			break;
173 		}
174 	}
175 
176 	if ((f == NULL) && (flags & FT_CREATE)) {
177 		/*
178 		 * We rely on the rwlock to ensure that no other thread
179 		 * create the same futex.
180 		 */
181 		f = pool_get(&ftpool, PR_WAITOK);
182 		TAILQ_INIT(&f->ft_threads);
183 		f->ft_obj = obj;
184 		f->ft_amap = amap;
185 		f->ft_off = off;
186 		f->ft_refcnt = 1;
187 		LIST_INSERT_HEAD(ftlist, f, ft_list);
188 	}
189 
190 	return f;
191 }
192 
193 /*
194  * Release a given futex.
195  */
196 void
197 futex_put(struct futex *f)
198 {
199 	rw_assert_wrlock(&ftlock);
200 
201 	KASSERT(f->ft_refcnt > 0);
202 
203 	--f->ft_refcnt;
204 	if (f->ft_refcnt == 0) {
205 		KASSERT(TAILQ_EMPTY(&f->ft_threads));
206 		LIST_REMOVE(f, ft_list);
207 		pool_put(&ftpool, f);
208 	}
209 }
210 
211 /*
212  * Put the current thread on the sleep queue of the futex at address
213  * ``uaddr''.  Let it sleep for the specified ``timeout'' time, or
214  * indefinitely if the argument is NULL.
215  */
216 int
217 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout,
218     int flags)
219 {
220 	struct proc *p = curproc;
221 	struct futex *f;
222 	uint64_t nsecs = INFSLP;
223 	uint32_t cval;
224 	int error;
225 
226 	/*
227 	 * After reading the value a race is still possible but
228 	 * we deal with it by serializing all futex syscalls.
229 	 */
230 	rw_assert_wrlock(&ftlock);
231 
232 	/*
233 	 * Read user space futex value
234 	 */
235 	if ((error = copyin32(uaddr, &cval)))
236 		return error;
237 
238 	/* If the value changed, stop here. */
239 	if (cval != val)
240 		return EAGAIN;
241 
242 	if (timeout != NULL) {
243 		struct timespec ts;
244 
245 		if ((error = copyin(timeout, &ts, sizeof(ts))))
246 			return error;
247 #ifdef KTRACE
248 		if (KTRPOINT(p, KTR_STRUCT))
249 			ktrreltimespec(p, &ts);
250 #endif
251 		if (ts.tv_sec < 0 || !timespecisvalid(&ts))
252 			return EINVAL;
253 		nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP));
254 	}
255 
256 	f = futex_get(uaddr, flags | FT_CREATE);
257 	TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
258 	p->p_futex = f;
259 
260 	error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs);
261 	if (error == ERESTART)
262 		error = ECANCELED;
263 	else if (error == EWOULDBLOCK) {
264 		/* A race occurred between a wakeup and a timeout. */
265 		if (p->p_futex == NULL)
266 			error = 0;
267 		else
268 			error = ETIMEDOUT;
269 	}
270 
271 	/* Remove ourself if we haven't been awaken. */
272 	if ((f = p->p_futex) != NULL) {
273 		p->p_futex = NULL;
274 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
275 		futex_put(f);
276 	}
277 
278 	return error;
279 }
280 
281 /*
282  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
283  * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at
284  * address ``uaddr2''.
285  */
286 int
287 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m,
288     int flags)
289 {
290 	struct futex *f, *g;
291 	struct proc *p;
292 	uint32_t count = 0;
293 
294 	rw_assert_wrlock(&ftlock);
295 
296 	f = futex_get(uaddr, flags);
297 	if (f == NULL)
298 		return 0;
299 
300 	while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) {
301 		p->p_futex = NULL;
302 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
303 		futex_put(f);
304 
305 		if (count < n) {
306 			wakeup_one(p);
307 		} else if (uaddr2 != NULL) {
308 			g = futex_get(uaddr2, FT_CREATE);
309 			TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link);
310 			p->p_futex = g;
311 		}
312 		count++;
313 	}
314 
315 	futex_put(f);
316 
317 	return count;
318 }
319 
320 /*
321  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
322  * ``uaddr''.
323  */
324 int
325 futex_wake(uint32_t *uaddr, uint32_t n, int flags)
326 {
327 	return futex_requeue(uaddr, n, NULL, 0, flags);
328 }
329