1 /* $OpenBSD: sys_futex.c,v 1.20 2021/11/22 14:57:17 visa Exp $ */ 2 3 /* 4 * Copyright (c) 2016-2017 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/kernel.h> 23 #include <sys/mount.h> 24 #include <sys/syscallargs.h> 25 #include <sys/pool.h> 26 #include <sys/time.h> 27 #include <sys/rwlock.h> 28 #include <sys/futex.h> 29 30 #ifdef KTRACE 31 #include <sys/ktrace.h> 32 #endif 33 34 #include <uvm/uvm.h> 35 36 /* 37 * Atomicity is only needed on MULTIPROCESSOR kernels. Fall back on 38 * copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9) 39 * implementation. 40 */ 41 #ifndef MULTIPROCESSOR 42 #define copyin32(uaddr, kaddr) copyin((uaddr), (kaddr), sizeof(uint32_t)) 43 #endif 44 45 /* 46 * Kernel representation of a futex. 47 */ 48 struct futex { 49 LIST_ENTRY(futex) ft_list; /* list of all futexes */ 50 TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */ 51 struct uvm_object *ft_obj; /* UVM object */ 52 struct vm_amap *ft_amap; /* UVM amap */ 53 voff_t ft_off; /* UVM offset */ 54 unsigned int ft_refcnt; /* # of references */ 55 }; 56 57 /* Syscall helpers. */ 58 int futex_wait(uint32_t *, uint32_t, const struct timespec *, int); 59 int futex_wake(uint32_t *, uint32_t, int); 60 int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int); 61 62 /* Flags for futex_get(). */ 63 #define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */ 64 #define FT_PRIVATE 0x2 /* Futex is process-private. */ 65 66 struct futex *futex_get(uint32_t *, int); 67 void futex_put(struct futex *); 68 69 /* 70 * The global futex lock serializes futex(2) calls so that no wakeup 71 * event is lost, and protects all futex lists and futex states. 72 */ 73 struct rwlock ftlock = RWLOCK_INITIALIZER("futex"); 74 static struct futex_list ftlist_shared = 75 LIST_HEAD_INITIALIZER(ftlist_shared); 76 struct pool ftpool; 77 78 79 void 80 futex_init(void) 81 { 82 pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, 83 PR_WAITOK | PR_RWLOCK, "futexpl", NULL); 84 } 85 86 int 87 sys_futex(struct proc *p, void *v, register_t *retval) 88 { 89 struct sys_futex_args /* { 90 syscallarg(uint32_t *) f; 91 syscallarg(int) op; 92 syscallarg(inr) val; 93 syscallarg(const struct timespec *) timeout; 94 syscallarg(uint32_t *) g; 95 } */ *uap = v; 96 uint32_t *uaddr = SCARG(uap, f); 97 int op = SCARG(uap, op); 98 uint32_t val = SCARG(uap, val); 99 const struct timespec *timeout = SCARG(uap, timeout); 100 void *g = SCARG(uap, g); 101 int flags = 0; 102 int error = 0; 103 104 if (op & FUTEX_PRIVATE_FLAG) 105 flags |= FT_PRIVATE; 106 107 rw_enter_write(&ftlock); 108 switch (op) { 109 case FUTEX_WAIT: 110 case FUTEX_WAIT_PRIVATE: 111 error = futex_wait(uaddr, val, timeout, flags); 112 break; 113 case FUTEX_WAKE: 114 case FUTEX_WAKE_PRIVATE: 115 *retval = futex_wake(uaddr, val, flags); 116 break; 117 case FUTEX_REQUEUE: 118 case FUTEX_REQUEUE_PRIVATE: 119 *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags); 120 break; 121 default: 122 error = ENOSYS; 123 break; 124 } 125 rw_exit_write(&ftlock); 126 127 return error; 128 } 129 130 /* 131 * Return an existing futex matching userspace address ``uaddr''. 132 * 133 * If such futex does not exist and FT_CREATE is given, create it. 134 */ 135 struct futex * 136 futex_get(uint32_t *uaddr, int flags) 137 { 138 struct proc *p = curproc; 139 vm_map_t map = &p->p_vmspace->vm_map; 140 vm_map_entry_t entry; 141 struct uvm_object *obj = NULL; 142 struct vm_amap *amap = NULL; 143 voff_t off = (vaddr_t)uaddr; 144 struct futex *f; 145 struct futex_list *ftlist = &p->p_p->ps_ftlist; 146 147 rw_assert_wrlock(&ftlock); 148 149 if (!(flags & FT_PRIVATE)) { 150 vm_map_lock_read(map); 151 if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) && 152 entry->inheritance == MAP_INHERIT_SHARE) { 153 if (UVM_ET_ISOBJ(entry)) { 154 ftlist = &ftlist_shared; 155 obj = entry->object.uvm_obj; 156 off = entry->offset + 157 ((vaddr_t)uaddr - entry->start); 158 } else if (entry->aref.ar_amap) { 159 ftlist = &ftlist_shared; 160 amap = entry->aref.ar_amap; 161 off = ptoa(entry->aref.ar_pageoff) + 162 ((vaddr_t)uaddr - entry->start); 163 } 164 } 165 vm_map_unlock_read(map); 166 } 167 168 LIST_FOREACH(f, ftlist, ft_list) { 169 if (f->ft_obj == obj && f->ft_amap == amap && 170 f->ft_off == off) { 171 f->ft_refcnt++; 172 break; 173 } 174 } 175 176 if ((f == NULL) && (flags & FT_CREATE)) { 177 /* 178 * We rely on the rwlock to ensure that no other thread 179 * create the same futex. 180 */ 181 f = pool_get(&ftpool, PR_WAITOK); 182 TAILQ_INIT(&f->ft_threads); 183 f->ft_obj = obj; 184 f->ft_amap = amap; 185 f->ft_off = off; 186 f->ft_refcnt = 1; 187 LIST_INSERT_HEAD(ftlist, f, ft_list); 188 } 189 190 return f; 191 } 192 193 /* 194 * Release a given futex. 195 */ 196 void 197 futex_put(struct futex *f) 198 { 199 rw_assert_wrlock(&ftlock); 200 201 KASSERT(f->ft_refcnt > 0); 202 203 --f->ft_refcnt; 204 if (f->ft_refcnt == 0) { 205 KASSERT(TAILQ_EMPTY(&f->ft_threads)); 206 LIST_REMOVE(f, ft_list); 207 pool_put(&ftpool, f); 208 } 209 } 210 211 /* 212 * Put the current thread on the sleep queue of the futex at address 213 * ``uaddr''. Let it sleep for the specified ``timeout'' time, or 214 * indefinitely if the argument is NULL. 215 */ 216 int 217 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout, 218 int flags) 219 { 220 struct proc *p = curproc; 221 struct futex *f; 222 uint64_t nsecs = INFSLP; 223 uint32_t cval; 224 int error; 225 226 /* 227 * After reading the value a race is still possible but 228 * we deal with it by serializing all futex syscalls. 229 */ 230 rw_assert_wrlock(&ftlock); 231 232 /* 233 * Read user space futex value 234 */ 235 if ((error = copyin32(uaddr, &cval))) 236 return error; 237 238 /* If the value changed, stop here. */ 239 if (cval != val) 240 return EAGAIN; 241 242 if (timeout != NULL) { 243 struct timespec ts; 244 245 if ((error = copyin(timeout, &ts, sizeof(ts)))) 246 return error; 247 #ifdef KTRACE 248 if (KTRPOINT(p, KTR_STRUCT)) 249 ktrreltimespec(p, &ts); 250 #endif 251 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 252 return EINVAL; 253 nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP)); 254 } 255 256 f = futex_get(uaddr, flags | FT_CREATE); 257 TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link); 258 p->p_futex = f; 259 260 error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs); 261 if (error == ERESTART) 262 error = ECANCELED; 263 else if (error == EWOULDBLOCK) { 264 /* A race occurred between a wakeup and a timeout. */ 265 if (p->p_futex == NULL) 266 error = 0; 267 else 268 error = ETIMEDOUT; 269 } 270 271 /* Remove ourself if we haven't been awaken. */ 272 if ((f = p->p_futex) != NULL) { 273 p->p_futex = NULL; 274 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 275 futex_put(f); 276 } 277 278 return error; 279 } 280 281 /* 282 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 283 * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at 284 * address ``uaddr2''. 285 */ 286 int 287 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m, 288 int flags) 289 { 290 struct futex *f, *g; 291 struct proc *p; 292 uint32_t count = 0; 293 294 rw_assert_wrlock(&ftlock); 295 296 f = futex_get(uaddr, flags); 297 if (f == NULL) 298 return 0; 299 300 while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) { 301 p->p_futex = NULL; 302 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 303 futex_put(f); 304 305 if (count < n) { 306 wakeup_one(p); 307 } else if (uaddr2 != NULL) { 308 g = futex_get(uaddr2, FT_CREATE); 309 TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link); 310 p->p_futex = g; 311 } 312 count++; 313 } 314 315 futex_put(f); 316 317 return count; 318 } 319 320 /* 321 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 322 * ``uaddr''. 323 */ 324 int 325 futex_wake(uint32_t *uaddr, uint32_t n, int flags) 326 { 327 return futex_requeue(uaddr, n, NULL, 0, flags); 328 } 329