1 /* $OpenBSD: sys_futex.c,v 1.21 2022/08/14 01:58:28 jsg Exp $ */ 2 3 /* 4 * Copyright (c) 2016-2017 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/syscallargs.h> 24 #include <sys/pool.h> 25 #include <sys/time.h> 26 #include <sys/rwlock.h> 27 #include <sys/futex.h> 28 29 #ifdef KTRACE 30 #include <sys/ktrace.h> 31 #endif 32 33 #include <uvm/uvm.h> 34 35 /* 36 * Atomicity is only needed on MULTIPROCESSOR kernels. Fall back on 37 * copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9) 38 * implementation. 39 */ 40 #ifndef MULTIPROCESSOR 41 #define copyin32(uaddr, kaddr) copyin((uaddr), (kaddr), sizeof(uint32_t)) 42 #endif 43 44 /* 45 * Kernel representation of a futex. 46 */ 47 struct futex { 48 LIST_ENTRY(futex) ft_list; /* list of all futexes */ 49 TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */ 50 struct uvm_object *ft_obj; /* UVM object */ 51 struct vm_amap *ft_amap; /* UVM amap */ 52 voff_t ft_off; /* UVM offset */ 53 unsigned int ft_refcnt; /* # of references */ 54 }; 55 56 /* Syscall helpers. */ 57 int futex_wait(uint32_t *, uint32_t, const struct timespec *, int); 58 int futex_wake(uint32_t *, uint32_t, int); 59 int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int); 60 61 /* Flags for futex_get(). */ 62 #define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */ 63 #define FT_PRIVATE 0x2 /* Futex is process-private. */ 64 65 struct futex *futex_get(uint32_t *, int); 66 void futex_put(struct futex *); 67 68 /* 69 * The global futex lock serializes futex(2) calls so that no wakeup 70 * event is lost, and protects all futex lists and futex states. 71 */ 72 struct rwlock ftlock = RWLOCK_INITIALIZER("futex"); 73 static struct futex_list ftlist_shared = 74 LIST_HEAD_INITIALIZER(ftlist_shared); 75 struct pool ftpool; 76 77 78 void 79 futex_init(void) 80 { 81 pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, 82 PR_WAITOK | PR_RWLOCK, "futexpl", NULL); 83 } 84 85 int 86 sys_futex(struct proc *p, void *v, register_t *retval) 87 { 88 struct sys_futex_args /* { 89 syscallarg(uint32_t *) f; 90 syscallarg(int) op; 91 syscallarg(inr) val; 92 syscallarg(const struct timespec *) timeout; 93 syscallarg(uint32_t *) g; 94 } */ *uap = v; 95 uint32_t *uaddr = SCARG(uap, f); 96 int op = SCARG(uap, op); 97 uint32_t val = SCARG(uap, val); 98 const struct timespec *timeout = SCARG(uap, timeout); 99 void *g = SCARG(uap, g); 100 int flags = 0; 101 int error = 0; 102 103 if (op & FUTEX_PRIVATE_FLAG) 104 flags |= FT_PRIVATE; 105 106 rw_enter_write(&ftlock); 107 switch (op) { 108 case FUTEX_WAIT: 109 case FUTEX_WAIT_PRIVATE: 110 error = futex_wait(uaddr, val, timeout, flags); 111 break; 112 case FUTEX_WAKE: 113 case FUTEX_WAKE_PRIVATE: 114 *retval = futex_wake(uaddr, val, flags); 115 break; 116 case FUTEX_REQUEUE: 117 case FUTEX_REQUEUE_PRIVATE: 118 *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags); 119 break; 120 default: 121 error = ENOSYS; 122 break; 123 } 124 rw_exit_write(&ftlock); 125 126 return error; 127 } 128 129 /* 130 * Return an existing futex matching userspace address ``uaddr''. 131 * 132 * If such futex does not exist and FT_CREATE is given, create it. 133 */ 134 struct futex * 135 futex_get(uint32_t *uaddr, int flags) 136 { 137 struct proc *p = curproc; 138 vm_map_t map = &p->p_vmspace->vm_map; 139 vm_map_entry_t entry; 140 struct uvm_object *obj = NULL; 141 struct vm_amap *amap = NULL; 142 voff_t off = (vaddr_t)uaddr; 143 struct futex *f; 144 struct futex_list *ftlist = &p->p_p->ps_ftlist; 145 146 rw_assert_wrlock(&ftlock); 147 148 if (!(flags & FT_PRIVATE)) { 149 vm_map_lock_read(map); 150 if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) && 151 entry->inheritance == MAP_INHERIT_SHARE) { 152 if (UVM_ET_ISOBJ(entry)) { 153 ftlist = &ftlist_shared; 154 obj = entry->object.uvm_obj; 155 off = entry->offset + 156 ((vaddr_t)uaddr - entry->start); 157 } else if (entry->aref.ar_amap) { 158 ftlist = &ftlist_shared; 159 amap = entry->aref.ar_amap; 160 off = ptoa(entry->aref.ar_pageoff) + 161 ((vaddr_t)uaddr - entry->start); 162 } 163 } 164 vm_map_unlock_read(map); 165 } 166 167 LIST_FOREACH(f, ftlist, ft_list) { 168 if (f->ft_obj == obj && f->ft_amap == amap && 169 f->ft_off == off) { 170 f->ft_refcnt++; 171 break; 172 } 173 } 174 175 if ((f == NULL) && (flags & FT_CREATE)) { 176 /* 177 * We rely on the rwlock to ensure that no other thread 178 * create the same futex. 179 */ 180 f = pool_get(&ftpool, PR_WAITOK); 181 TAILQ_INIT(&f->ft_threads); 182 f->ft_obj = obj; 183 f->ft_amap = amap; 184 f->ft_off = off; 185 f->ft_refcnt = 1; 186 LIST_INSERT_HEAD(ftlist, f, ft_list); 187 } 188 189 return f; 190 } 191 192 /* 193 * Release a given futex. 194 */ 195 void 196 futex_put(struct futex *f) 197 { 198 rw_assert_wrlock(&ftlock); 199 200 KASSERT(f->ft_refcnt > 0); 201 202 --f->ft_refcnt; 203 if (f->ft_refcnt == 0) { 204 KASSERT(TAILQ_EMPTY(&f->ft_threads)); 205 LIST_REMOVE(f, ft_list); 206 pool_put(&ftpool, f); 207 } 208 } 209 210 /* 211 * Put the current thread on the sleep queue of the futex at address 212 * ``uaddr''. Let it sleep for the specified ``timeout'' time, or 213 * indefinitely if the argument is NULL. 214 */ 215 int 216 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout, 217 int flags) 218 { 219 struct proc *p = curproc; 220 struct futex *f; 221 uint64_t nsecs = INFSLP; 222 uint32_t cval; 223 int error; 224 225 /* 226 * After reading the value a race is still possible but 227 * we deal with it by serializing all futex syscalls. 228 */ 229 rw_assert_wrlock(&ftlock); 230 231 /* 232 * Read user space futex value 233 */ 234 if ((error = copyin32(uaddr, &cval))) 235 return error; 236 237 /* If the value changed, stop here. */ 238 if (cval != val) 239 return EAGAIN; 240 241 if (timeout != NULL) { 242 struct timespec ts; 243 244 if ((error = copyin(timeout, &ts, sizeof(ts)))) 245 return error; 246 #ifdef KTRACE 247 if (KTRPOINT(p, KTR_STRUCT)) 248 ktrreltimespec(p, &ts); 249 #endif 250 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 251 return EINVAL; 252 nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP)); 253 } 254 255 f = futex_get(uaddr, flags | FT_CREATE); 256 TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link); 257 p->p_futex = f; 258 259 error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs); 260 if (error == ERESTART) 261 error = ECANCELED; 262 else if (error == EWOULDBLOCK) { 263 /* A race occurred between a wakeup and a timeout. */ 264 if (p->p_futex == NULL) 265 error = 0; 266 else 267 error = ETIMEDOUT; 268 } 269 270 /* Remove ourself if we haven't been awaken. */ 271 if ((f = p->p_futex) != NULL) { 272 p->p_futex = NULL; 273 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 274 futex_put(f); 275 } 276 277 return error; 278 } 279 280 /* 281 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 282 * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at 283 * address ``uaddr2''. 284 */ 285 int 286 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m, 287 int flags) 288 { 289 struct futex *f, *g; 290 struct proc *p; 291 uint32_t count = 0; 292 293 rw_assert_wrlock(&ftlock); 294 295 f = futex_get(uaddr, flags); 296 if (f == NULL) 297 return 0; 298 299 while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) { 300 p->p_futex = NULL; 301 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 302 futex_put(f); 303 304 if (count < n) { 305 wakeup_one(p); 306 } else if (uaddr2 != NULL) { 307 g = futex_get(uaddr2, FT_CREATE); 308 TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link); 309 p->p_futex = g; 310 } 311 count++; 312 } 313 314 futex_put(f); 315 316 return count; 317 } 318 319 /* 320 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 321 * ``uaddr''. 322 */ 323 int 324 futex_wake(uint32_t *uaddr, uint32_t n, int flags) 325 { 326 return futex_requeue(uaddr, n, NULL, 0, flags); 327 } 328