1 /* $OpenBSD: sys_futex.c,v 1.2 2017/04/30 10:10:21 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2016-2017 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/kernel.h> 23 #include <sys/mount.h> 24 #include <sys/syscallargs.h> 25 #include <sys/pool.h> 26 #include <sys/time.h> 27 #include <sys/rwlock.h> 28 #include <sys/futex.h> 29 30 #ifdef KTRACE 31 #include <sys/ktrace.h> 32 #endif 33 34 /* 35 * Kernel representation of a futex. 36 */ 37 struct futex { 38 LIST_ENTRY(futex) ft_list; /* list of all futexes */ 39 TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */ 40 uint32_t *ft_uaddr; /* userspace address */ 41 pid_t ft_pid; /* process identifier */ 42 unsigned int ft_refcnt; /* # of references */ 43 }; 44 45 /* Syscall helpers. */ 46 int futex_wait(uint32_t *, uint32_t, const struct timespec *); 47 int futex_wake(uint32_t *, uint32_t); 48 int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t); 49 50 /* Flags for futex_get(). */ 51 #define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */ 52 53 struct futex *futex_get(uint32_t *, int); 54 void futex_put(struct futex *); 55 56 /* 57 * The global futex lock serialize futex(2) calls such that no wakeup 58 * event are lost, protect the global list of all futexes and their 59 * states. 60 */ 61 struct rwlock ftlock = RWLOCK_INITIALIZER("futex"); 62 static LIST_HEAD(, futex) ftlist; 63 struct pool ftpool; 64 65 66 void 67 futex_init(void) 68 { 69 pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, 0, "futexpl", 70 NULL); 71 } 72 73 int 74 sys_futex(struct proc *p, void *v, register_t *retval) 75 { 76 struct sys_futex_args /* { 77 syscallarg(uint32_t *) f; 78 syscallarg(int) op; 79 syscallarg(inr) val; 80 syscallarg(const struct timespec *) timeout; 81 syscallarg(uint32_t *) g; 82 } */ *uap = v; 83 uint32_t *uaddr = SCARG(uap, f); 84 int op = SCARG(uap, op); 85 uint32_t val = SCARG(uap, val); 86 const struct timespec *timeout = SCARG(uap, timeout); 87 void *g = SCARG(uap, g); 88 89 switch (op) { 90 case FUTEX_WAIT: 91 KERNEL_LOCK(); 92 rw_enter_write(&ftlock); 93 *retval = futex_wait(uaddr, val, timeout); 94 rw_exit_write(&ftlock); 95 KERNEL_UNLOCK(); 96 break; 97 case FUTEX_WAKE: 98 rw_enter_write(&ftlock); 99 *retval = futex_wake(uaddr, val); 100 rw_exit_write(&ftlock); 101 break; 102 case FUTEX_REQUEUE: 103 rw_enter_write(&ftlock); 104 *retval = futex_requeue(uaddr, val, g, (unsigned long)timeout); 105 rw_exit_write(&ftlock); 106 break; 107 default: 108 *retval = ENOSYS; 109 break; 110 } 111 112 return 0; 113 } 114 115 /* 116 * Return an existing futex matching userspace address ``uaddr''. 117 * 118 * If such futex does not exist and FT_CREATE is given, create it. 119 */ 120 struct futex * 121 futex_get(uint32_t *uaddr, int flag) 122 { 123 struct futex *f; 124 125 rw_assert_wrlock(&ftlock); 126 127 LIST_FOREACH(f, &ftlist, ft_list) { 128 if (f->ft_uaddr == uaddr && f->ft_pid == curproc->p_p->ps_pid) { 129 f->ft_refcnt++; 130 break; 131 } 132 } 133 134 if ((f == NULL) && (flag & FT_CREATE)) { 135 /* 136 * We rely on the rwlock to ensure that no other thread 137 * create the same futex. 138 */ 139 f = pool_get(&ftpool, PR_WAITOK); 140 TAILQ_INIT(&f->ft_threads); 141 f->ft_uaddr = uaddr; 142 f->ft_pid = curproc->p_p->ps_pid; 143 f->ft_refcnt = 1; 144 LIST_INSERT_HEAD(&ftlist, f, ft_list); 145 } 146 147 return f; 148 } 149 150 /* 151 * Release a given futex. 152 */ 153 void 154 futex_put(struct futex *f) 155 { 156 rw_assert_wrlock(&ftlock); 157 158 KASSERT(f->ft_refcnt > 0); 159 160 --f->ft_refcnt; 161 if (f->ft_refcnt == 0) { 162 KASSERT(TAILQ_EMPTY(&f->ft_threads)); 163 LIST_REMOVE(f, ft_list); 164 pool_put(&ftpool, f); 165 } 166 } 167 168 /* 169 * Put the current thread on the sleep queue of the futex at address 170 * ``uaddr''. Let it sleep for the specified ``timeout'' time, or 171 * indefinitly if the argument is NULL. 172 */ 173 int 174 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout) 175 { 176 struct proc *p = curproc; 177 struct futex *f; 178 uint64_t to_ticks = 0; 179 uint32_t cval; 180 int error; 181 182 /* 183 * After reading the value a race is still possible but 184 * we deal with it by serializing all futex syscalls. 185 */ 186 rw_assert_wrlock(&ftlock); 187 188 /* 189 * Read user space futex value 190 * 191 * XXX copyin(9) is not guaranteed to be atomic. 192 */ 193 if ((error = copyin(uaddr, &cval, sizeof(cval)))) 194 return error; 195 196 /* If the value changed, stop here. */ 197 if (cval != val) 198 return EAGAIN; 199 200 if (timeout != NULL) { 201 struct timespec ts; 202 203 if ((error = copyin(timeout, &ts, sizeof(ts)))) 204 return error; 205 #ifdef KTRACE 206 if (KTRPOINT(p, KTR_STRUCT)) 207 ktrabstimespec(p, timeout); 208 #endif 209 to_ticks = (uint64_t)hz * ts.tv_sec + 210 (ts.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1; 211 if (to_ticks > INT_MAX) 212 to_ticks = INT_MAX; 213 } 214 215 f = futex_get(uaddr, FT_CREATE); 216 TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link); 217 p->p_futex = f; 218 219 error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks); 220 if (error == ERESTART) 221 error = EINTR; 222 else if (error == EWOULDBLOCK) { 223 /* A race occured between a wakeup and a timeout. */ 224 if (p->p_futex == NULL) 225 error = 0; 226 else 227 error = ETIMEDOUT; 228 } 229 230 /* Remove ourself if we haven't been awaken. */ 231 if ((f = p->p_futex) != NULL) { 232 p->p_futex = NULL; 233 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 234 futex_put(f); 235 } 236 237 return error; 238 } 239 240 /* 241 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 242 * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at 243 * address ``uaddr2''. 244 */ 245 int 246 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m) 247 { 248 struct futex *f, *g; 249 struct proc *p; 250 uint32_t count = 0; 251 252 rw_assert_wrlock(&ftlock); 253 254 f = futex_get(uaddr, 0); 255 if (f == NULL) 256 return 0; 257 258 while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) { 259 p->p_futex = NULL; 260 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 261 futex_put(f); 262 263 if (count < n) { 264 wakeup_one(p); 265 } else if (uaddr2 != NULL) { 266 g = futex_get(uaddr2, FT_CREATE); 267 TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link); 268 p->p_futex = g; 269 } 270 count++; 271 } 272 273 futex_put(f); 274 275 return count; 276 } 277 278 /* 279 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 280 * ``uaddr''. 281 */ 282 int 283 futex_wake(uint32_t *uaddr, uint32_t n) 284 { 285 return futex_requeue(uaddr, n, NULL, 0); 286 } 287