1 /* $OpenBSD: sys_futex.c,v 1.5 2017/12/19 16:41:43 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 2016-2017 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/kernel.h> 23 #include <sys/mount.h> 24 #include <sys/syscallargs.h> 25 #include <sys/pool.h> 26 #include <sys/time.h> 27 #include <sys/rwlock.h> 28 #include <sys/futex.h> 29 30 #ifdef KTRACE 31 #include <sys/ktrace.h> 32 #endif 33 34 /* 35 * Atomicity is only needed on MULTIPROCESSOR kernels. Fall back on 36 * copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9) 37 * implementation. 38 */ 39 #ifndef MULTIPROCESSOR 40 #define copyin32(uaddr, kaddr) copyin((uaddr), (kaddr), sizeof(uint32_t)) 41 #endif 42 43 /* 44 * Kernel representation of a futex. 45 */ 46 struct futex { 47 LIST_ENTRY(futex) ft_list; /* list of all futexes */ 48 TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */ 49 uint32_t *ft_uaddr; /* userspace address */ 50 pid_t ft_pid; /* process identifier */ 51 unsigned int ft_refcnt; /* # of references */ 52 }; 53 54 /* Syscall helpers. */ 55 int futex_wait(uint32_t *, uint32_t, const struct timespec *); 56 int futex_wake(uint32_t *, uint32_t); 57 int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t); 58 59 /* Flags for futex_get(). */ 60 #define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */ 61 62 struct futex *futex_get(uint32_t *, int); 63 void futex_put(struct futex *); 64 65 /* 66 * The global futex lock serialize futex(2) calls such that no wakeup 67 * event are lost, protect the global list of all futexes and their 68 * states. 69 */ 70 struct rwlock ftlock = RWLOCK_INITIALIZER("futex"); 71 static LIST_HEAD(, futex) ftlist; 72 struct pool ftpool; 73 74 75 void 76 futex_init(void) 77 { 78 pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, 79 PR_WAITOK | PR_RWLOCK, "futexpl", NULL); 80 } 81 82 int 83 sys_futex(struct proc *p, void *v, register_t *retval) 84 { 85 struct sys_futex_args /* { 86 syscallarg(uint32_t *) f; 87 syscallarg(int) op; 88 syscallarg(inr) val; 89 syscallarg(const struct timespec *) timeout; 90 syscallarg(uint32_t *) g; 91 } */ *uap = v; 92 uint32_t *uaddr = SCARG(uap, f); 93 int op = SCARG(uap, op); 94 uint32_t val = SCARG(uap, val); 95 const struct timespec *timeout = SCARG(uap, timeout); 96 void *g = SCARG(uap, g); 97 98 switch (op) { 99 case FUTEX_WAIT: 100 KERNEL_LOCK(); 101 rw_enter_write(&ftlock); 102 *retval = futex_wait(uaddr, val, timeout); 103 rw_exit_write(&ftlock); 104 KERNEL_UNLOCK(); 105 break; 106 case FUTEX_WAKE: 107 rw_enter_write(&ftlock); 108 *retval = futex_wake(uaddr, val); 109 rw_exit_write(&ftlock); 110 break; 111 case FUTEX_REQUEUE: 112 rw_enter_write(&ftlock); 113 *retval = futex_requeue(uaddr, val, g, (unsigned long)timeout); 114 rw_exit_write(&ftlock); 115 break; 116 default: 117 *retval = ENOSYS; 118 break; 119 } 120 121 return 0; 122 } 123 124 /* 125 * Return an existing futex matching userspace address ``uaddr''. 126 * 127 * If such futex does not exist and FT_CREATE is given, create it. 128 */ 129 struct futex * 130 futex_get(uint32_t *uaddr, int flag) 131 { 132 struct proc *p = curproc; 133 struct futex *f; 134 135 rw_assert_wrlock(&ftlock); 136 137 LIST_FOREACH(f, &ftlist, ft_list) { 138 if (f->ft_uaddr == uaddr && f->ft_pid == p->p_p->ps_pid) { 139 f->ft_refcnt++; 140 break; 141 } 142 } 143 144 if ((f == NULL) && (flag & FT_CREATE)) { 145 /* 146 * We rely on the rwlock to ensure that no other thread 147 * create the same futex. 148 */ 149 f = pool_get(&ftpool, PR_WAITOK); 150 TAILQ_INIT(&f->ft_threads); 151 f->ft_uaddr = uaddr; 152 f->ft_pid = p->p_p->ps_pid; 153 f->ft_refcnt = 1; 154 LIST_INSERT_HEAD(&ftlist, f, ft_list); 155 } 156 157 return f; 158 } 159 160 /* 161 * Release a given futex. 162 */ 163 void 164 futex_put(struct futex *f) 165 { 166 rw_assert_wrlock(&ftlock); 167 168 KASSERT(f->ft_refcnt > 0); 169 170 --f->ft_refcnt; 171 if (f->ft_refcnt == 0) { 172 KASSERT(TAILQ_EMPTY(&f->ft_threads)); 173 LIST_REMOVE(f, ft_list); 174 pool_put(&ftpool, f); 175 } 176 } 177 178 /* 179 * Put the current thread on the sleep queue of the futex at address 180 * ``uaddr''. Let it sleep for the specified ``timeout'' time, or 181 * indefinitly if the argument is NULL. 182 */ 183 int 184 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout) 185 { 186 struct proc *p = curproc; 187 struct futex *f; 188 uint64_t to_ticks = 0; 189 uint32_t cval; 190 int error; 191 192 /* 193 * After reading the value a race is still possible but 194 * we deal with it by serializing all futex syscalls. 195 */ 196 rw_assert_wrlock(&ftlock); 197 198 /* 199 * Read user space futex value 200 */ 201 if ((error = copyin32(uaddr, &cval))) 202 return error; 203 204 /* If the value changed, stop here. */ 205 if (cval != val) 206 return EAGAIN; 207 208 if (timeout != NULL) { 209 struct timespec ts; 210 211 if ((error = copyin(timeout, &ts, sizeof(ts)))) 212 return error; 213 #ifdef KTRACE 214 if (KTRPOINT(p, KTR_STRUCT)) 215 ktrabstimespec(p, timeout); 216 #endif 217 to_ticks = (uint64_t)hz * ts.tv_sec + 218 (ts.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1; 219 if (to_ticks > INT_MAX) 220 to_ticks = INT_MAX; 221 } 222 223 f = futex_get(uaddr, FT_CREATE); 224 TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link); 225 p->p_futex = f; 226 227 error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks); 228 if (error == ERESTART) 229 error = EINTR; 230 else if (error == EWOULDBLOCK) { 231 /* A race occured between a wakeup and a timeout. */ 232 if (p->p_futex == NULL) 233 error = 0; 234 else 235 error = ETIMEDOUT; 236 } 237 238 /* Remove ourself if we haven't been awaken. */ 239 if ((f = p->p_futex) != NULL) { 240 p->p_futex = NULL; 241 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 242 futex_put(f); 243 } 244 245 return error; 246 } 247 248 /* 249 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 250 * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at 251 * address ``uaddr2''. 252 */ 253 int 254 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m) 255 { 256 struct futex *f, *g; 257 struct proc *p; 258 uint32_t count = 0; 259 260 rw_assert_wrlock(&ftlock); 261 262 f = futex_get(uaddr, 0); 263 if (f == NULL) 264 return 0; 265 266 while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) { 267 p->p_futex = NULL; 268 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 269 futex_put(f); 270 271 if (count < n) { 272 wakeup_one(p); 273 } else if (uaddr2 != NULL) { 274 g = futex_get(uaddr2, FT_CREATE); 275 TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link); 276 p->p_futex = g; 277 } 278 count++; 279 } 280 281 futex_put(f); 282 283 return count; 284 } 285 286 /* 287 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 288 * ``uaddr''. 289 */ 290 int 291 futex_wake(uint32_t *uaddr, uint32_t n) 292 { 293 return futex_requeue(uaddr, n, NULL, 0); 294 } 295