1 /* $NetBSD: kern_condvar.c,v 1.30 2011/07/27 14:35:33 uebayasi Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Kernel condition variable implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.30 2011/07/27 14:35:33 uebayasi Exp $"); 38 39 #include <sys/param.h> 40 #include <sys/proc.h> 41 #include <sys/sched.h> 42 #include <sys/systm.h> 43 #include <sys/condvar.h> 44 #include <sys/sleepq.h> 45 #include <sys/lockdebug.h> 46 #include <sys/cpu.h> 47 48 /* 49 * Accessors for the private contents of the kcondvar_t data type. 50 * 51 * cv_opaque[0] sleepq... 52 * cv_opaque[1] ...pointers 53 * cv_opaque[2] description for ps(1) 54 * 55 * cv_opaque[0..1] is protected by the interlock passed to cv_wait() (enqueue 56 * only), and the sleep queue lock acquired with sleeptab_lookup() (enqueue 57 * and dequeue). 58 * 59 * cv_opaque[2] (the wmesg) is static and does not change throughout the life 60 * of the CV. 61 */ 62 #define CV_SLEEPQ(cv) ((sleepq_t *)(cv)->cv_opaque) 63 #define CV_WMESG(cv) ((const char *)(cv)->cv_opaque[2]) 64 #define CV_SET_WMESG(cv, v) (cv)->cv_opaque[2] = __UNCONST(v) 65 66 #define CV_DEBUG_P(cv) (CV_WMESG(cv) != nodebug) 67 #define CV_RA ((uintptr_t)__builtin_return_address(0)) 68 69 static void cv_unsleep(lwp_t *, bool); 70 static void cv_wakeup_one(kcondvar_t *); 71 static void cv_wakeup_all(kcondvar_t *); 72 73 static syncobj_t cv_syncobj = { 74 SOBJ_SLEEPQ_SORTED, 75 cv_unsleep, 76 sleepq_changepri, 77 sleepq_lendpri, 78 syncobj_noowner, 79 }; 80 81 lockops_t cv_lockops = { 82 "Condition variable", 83 LOCKOPS_CV, 84 NULL 85 }; 86 87 static const char deadcv[] = "deadcv"; 88 static const char nodebug[] = "nodebug"; 89 90 /* 91 * cv_init: 92 * 93 * Initialize a condition variable for use. 94 */ 95 void 96 cv_init(kcondvar_t *cv, const char *wmesg) 97 { 98 #ifdef LOCKDEBUG 99 bool dodebug; 100 101 dodebug = LOCKDEBUG_ALLOC(cv, &cv_lockops, 102 (uintptr_t)__builtin_return_address(0)); 103 if (!dodebug) { 104 /* XXX This will break vfs_lockf. */ 105 wmesg = nodebug; 106 } 107 #endif 108 KASSERT(wmesg != NULL); 109 CV_SET_WMESG(cv, wmesg); 110 sleepq_init(CV_SLEEPQ(cv)); 111 } 112 113 /* 114 * cv_destroy: 115 * 116 * Tear down a condition variable. 117 */ 118 void 119 cv_destroy(kcondvar_t *cv) 120 { 121 122 LOCKDEBUG_FREE(CV_DEBUG_P(cv), cv); 123 #ifdef DIAGNOSTIC 124 KASSERT(cv_is_valid(cv)); 125 CV_SET_WMESG(cv, deadcv); 126 #endif 127 } 128 129 /* 130 * cv_enter: 131 * 132 * Look up and lock the sleep queue corresponding to the given 133 * condition variable, and increment the number of waiters. 134 */ 135 static inline void 136 cv_enter(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l) 137 { 138 sleepq_t *sq; 139 kmutex_t *mp; 140 141 KASSERT(cv_is_valid(cv)); 142 KASSERT(!cpu_intr_p()); 143 KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL); 144 145 LOCKDEBUG_LOCKED(CV_DEBUG_P(cv), cv, mtx, CV_RA, 0); 146 147 l->l_kpriority = true; 148 mp = sleepq_hashlock(cv); 149 sq = CV_SLEEPQ(cv); 150 sleepq_enter(sq, l, mp); 151 sleepq_enqueue(sq, cv, CV_WMESG(cv), &cv_syncobj); 152 mutex_exit(mtx); 153 KASSERT(cv_has_waiters(cv)); 154 } 155 156 /* 157 * cv_exit: 158 * 159 * After resuming execution, check to see if we have been restarted 160 * as a result of cv_signal(). If we have, but cannot take the 161 * wakeup (because of eg a pending Unix signal or timeout) then try 162 * to ensure that another LWP sees it. This is necessary because 163 * there may be multiple waiters, and at least one should take the 164 * wakeup if possible. 165 */ 166 static inline int 167 cv_exit(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l, const int error) 168 { 169 170 mutex_enter(mtx); 171 if (__predict_false(error != 0)) 172 cv_signal(cv); 173 174 LOCKDEBUG_UNLOCKED(CV_DEBUG_P(cv), cv, CV_RA, 0); 175 KASSERT(cv_is_valid(cv)); 176 177 return error; 178 } 179 180 /* 181 * cv_unsleep: 182 * 183 * Remove an LWP from the condition variable and sleep queue. This 184 * is called when the LWP has not been awoken normally but instead 185 * interrupted: for example, when a signal is received. Must be 186 * called with the LWP locked, and must return it unlocked. 187 */ 188 static void 189 cv_unsleep(lwp_t *l, bool cleanup) 190 { 191 kcondvar_t *cv; 192 193 cv = (kcondvar_t *)(uintptr_t)l->l_wchan; 194 195 KASSERT(l->l_wchan == (wchan_t)cv); 196 KASSERT(l->l_sleepq == CV_SLEEPQ(cv)); 197 KASSERT(cv_is_valid(cv)); 198 KASSERT(cv_has_waiters(cv)); 199 200 sleepq_unsleep(l, cleanup); 201 } 202 203 /* 204 * cv_wait: 205 * 206 * Wait non-interruptably on a condition variable until awoken. 207 */ 208 void 209 cv_wait(kcondvar_t *cv, kmutex_t *mtx) 210 { 211 lwp_t *l = curlwp; 212 213 KASSERT(mutex_owned(mtx)); 214 215 cv_enter(cv, mtx, l); 216 (void)sleepq_block(0, false); 217 (void)cv_exit(cv, mtx, l, 0); 218 } 219 220 /* 221 * cv_wait_sig: 222 * 223 * Wait on a condition variable until a awoken or a signal is received. 224 * Will also return early if the process is exiting. Returns zero if 225 * awoken normally, ERESTART if a signal was received and the system 226 * call is restartable, or EINTR otherwise. 227 */ 228 int 229 cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx) 230 { 231 lwp_t *l = curlwp; 232 int error; 233 234 KASSERT(mutex_owned(mtx)); 235 236 cv_enter(cv, mtx, l); 237 error = sleepq_block(0, true); 238 return cv_exit(cv, mtx, l, error); 239 } 240 241 /* 242 * cv_timedwait: 243 * 244 * Wait on a condition variable until awoken or the specified timeout 245 * expires. Returns zero if awoken normally or EWOULDBLOCK if the 246 * timeout expired. 247 */ 248 int 249 cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int timo) 250 { 251 lwp_t *l = curlwp; 252 int error; 253 254 KASSERT(mutex_owned(mtx)); 255 256 cv_enter(cv, mtx, l); 257 error = sleepq_block(timo, false); 258 return cv_exit(cv, mtx, l, error); 259 } 260 261 /* 262 * cv_timedwait_sig: 263 * 264 * Wait on a condition variable until a timeout expires, awoken or a 265 * signal is received. Will also return early if the process is 266 * exiting. Returns zero if awoken normally, EWOULDBLOCK if the 267 * timeout expires, ERESTART if a signal was received and the system 268 * call is restartable, or EINTR otherwise. 269 */ 270 int 271 cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int timo) 272 { 273 lwp_t *l = curlwp; 274 int error; 275 276 KASSERT(mutex_owned(mtx)); 277 278 cv_enter(cv, mtx, l); 279 error = sleepq_block(timo, true); 280 return cv_exit(cv, mtx, l, error); 281 } 282 283 /* 284 * cv_signal: 285 * 286 * Wake the highest priority LWP waiting on a condition variable. 287 * Must be called with the interlocking mutex held. 288 */ 289 void 290 cv_signal(kcondvar_t *cv) 291 { 292 293 /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */ 294 KASSERT(cv_is_valid(cv)); 295 296 if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv)))) 297 cv_wakeup_one(cv); 298 } 299 300 static void __noinline 301 cv_wakeup_one(kcondvar_t *cv) 302 { 303 sleepq_t *sq; 304 kmutex_t *mp; 305 lwp_t *l; 306 307 KASSERT(cv_is_valid(cv)); 308 309 mp = sleepq_hashlock(cv); 310 sq = CV_SLEEPQ(cv); 311 l = TAILQ_FIRST(sq); 312 if (l == NULL) { 313 mutex_spin_exit(mp); 314 return; 315 } 316 KASSERT(l->l_sleepq == sq); 317 KASSERT(l->l_mutex == mp); 318 KASSERT(l->l_wchan == cv); 319 sleepq_remove(sq, l); 320 mutex_spin_exit(mp); 321 322 KASSERT(cv_is_valid(cv)); 323 } 324 325 /* 326 * cv_broadcast: 327 * 328 * Wake all LWPs waiting on a condition variable. Must be called 329 * with the interlocking mutex held. 330 */ 331 void 332 cv_broadcast(kcondvar_t *cv) 333 { 334 335 /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */ 336 KASSERT(cv_is_valid(cv)); 337 338 if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv)))) 339 cv_wakeup_all(cv); 340 } 341 342 static void __noinline 343 cv_wakeup_all(kcondvar_t *cv) 344 { 345 sleepq_t *sq; 346 kmutex_t *mp; 347 lwp_t *l, *next; 348 349 KASSERT(cv_is_valid(cv)); 350 351 mp = sleepq_hashlock(cv); 352 sq = CV_SLEEPQ(cv); 353 for (l = TAILQ_FIRST(sq); l != NULL; l = next) { 354 KASSERT(l->l_sleepq == sq); 355 KASSERT(l->l_mutex == mp); 356 KASSERT(l->l_wchan == cv); 357 next = TAILQ_NEXT(l, l_sleepchain); 358 sleepq_remove(sq, l); 359 } 360 mutex_spin_exit(mp); 361 362 KASSERT(cv_is_valid(cv)); 363 } 364 365 /* 366 * cv_has_waiters: 367 * 368 * For diagnostic assertions: return non-zero if a condition 369 * variable has waiters. 370 */ 371 bool 372 cv_has_waiters(kcondvar_t *cv) 373 { 374 375 return !TAILQ_EMPTY(CV_SLEEPQ(cv)); 376 } 377 378 /* 379 * cv_is_valid: 380 * 381 * For diagnostic assertions: return non-zero if a condition 382 * variable appears to be valid. No locks need be held. 383 */ 384 bool 385 cv_is_valid(kcondvar_t *cv) 386 { 387 388 return CV_WMESG(cv) != deadcv && CV_WMESG(cv) != NULL; 389 } 390