1 /* $OpenBSD: kern_smr.c,v 1.12 2021/07/06 09:34:07 kettenis Exp $ */ 2 3 /* 4 * Copyright (c) 2019-2020 Visa Hankala 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/kthread.h> 23 #include <sys/mutex.h> 24 #include <sys/percpu.h> 25 #include <sys/proc.h> 26 #include <sys/smr.h> 27 #include <sys/time.h> 28 #include <sys/witness.h> 29 30 #include <machine/cpu.h> 31 32 #define SMR_PAUSE 100 /* pause between rounds in msec */ 33 34 void smr_dispatch(struct schedstate_percpu *); 35 void smr_grace_wait(void); 36 void smr_thread(void *); 37 void smr_wakeup(void *); 38 39 struct mutex smr_lock = MUTEX_INITIALIZER(IPL_HIGH); 40 struct smr_entry_list smr_deferred; 41 struct timeout smr_wakeup_tmo; 42 unsigned int smr_expedite; 43 unsigned int smr_ndeferred; 44 unsigned char smr_grace_period; 45 46 #ifdef WITNESS 47 static const char smr_lock_name[] = "smr"; 48 struct lock_object smr_lock_obj = { 49 .lo_name = smr_lock_name, 50 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 51 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 52 }; 53 struct lock_type smr_lock_type = { 54 .lt_name = smr_lock_name 55 }; 56 #endif 57 58 static inline int 59 smr_cpu_is_idle(struct cpu_info *ci) 60 { 61 return ci->ci_curproc == ci->ci_schedstate.spc_idleproc; 62 } 63 64 void 65 smr_startup(void) 66 { 67 SIMPLEQ_INIT(&smr_deferred); 68 WITNESS_INIT(&smr_lock_obj, &smr_lock_type); 69 timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL); 70 } 71 72 void 73 smr_startup_thread(void) 74 { 75 if (kthread_create(smr_thread, NULL, NULL, "smr") != 0) 76 panic("could not create smr thread"); 77 } 78 79 struct timeval smr_logintvl = { 300, 0 }; 80 81 void 82 smr_thread(void *arg) 83 { 84 struct timeval elapsed, end, loglast, start; 85 struct smr_entry_list deferred; 86 struct smr_entry *smr; 87 88 KERNEL_ASSERT_LOCKED(); 89 KERNEL_UNLOCK(); 90 91 memset(&loglast, 0, sizeof(loglast)); 92 SIMPLEQ_INIT(&deferred); 93 94 for (;;) { 95 mtx_enter(&smr_lock); 96 if (smr_ndeferred == 0) { 97 while (smr_ndeferred == 0) 98 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 99 "bored", INFSLP); 100 } else { 101 if (smr_expedite == 0) 102 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 103 "pause", MSEC_TO_NSEC(SMR_PAUSE)); 104 } 105 106 SIMPLEQ_CONCAT(&deferred, &smr_deferred); 107 smr_ndeferred = 0; 108 smr_expedite = 0; 109 mtx_leave(&smr_lock); 110 111 getmicrouptime(&start); 112 113 smr_grace_wait(); 114 115 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 116 WITNESS_LOCK(&smr_lock_obj, 0); 117 118 while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) { 119 SIMPLEQ_REMOVE_HEAD(&deferred, smr_list); 120 smr->smr_func(smr->smr_arg); 121 } 122 123 WITNESS_UNLOCK(&smr_lock_obj, 0); 124 125 getmicrouptime(&end); 126 timersub(&end, &start, &elapsed); 127 if (elapsed.tv_sec >= 5 && 128 ratecheck(&loglast, &smr_logintvl)) 129 printf("smr: dispatch took %ld seconds\n", 130 (long)elapsed.tv_sec); 131 } 132 } 133 134 /* 135 * Announce next grace period and wait until all CPUs have entered it 136 * by crossing quiescent state. 137 */ 138 void 139 smr_grace_wait(void) 140 { 141 #ifdef MULTIPROCESSOR 142 CPU_INFO_ITERATOR cii; 143 struct cpu_info *ci; 144 unsigned char smrgp; 145 146 smrgp = READ_ONCE(smr_grace_period) + 1; 147 WRITE_ONCE(smr_grace_period, smrgp); 148 149 curcpu()->ci_schedstate.spc_smrgp = smrgp; 150 151 CPU_INFO_FOREACH(cii, ci) { 152 if (!CPU_IS_RUNNING(ci)) 153 continue; 154 if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp) 155 continue; 156 sched_peg_curproc(ci); 157 KASSERT(ci->ci_schedstate.spc_smrgp == smrgp); 158 } 159 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 160 #endif /* MULTIPROCESSOR */ 161 } 162 163 void 164 smr_wakeup(void *arg) 165 { 166 wakeup(&smr_ndeferred); 167 } 168 169 void 170 smr_read_enter(void) 171 { 172 #ifdef DIAGNOSTIC 173 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 174 175 spc->spc_smrdepth++; 176 #endif 177 } 178 179 void 180 smr_read_leave(void) 181 { 182 #ifdef DIAGNOSTIC 183 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 184 185 KASSERT(spc->spc_smrdepth > 0); 186 spc->spc_smrdepth--; 187 #endif 188 } 189 190 /* 191 * Move SMR entries from the local queue to the system-wide queue. 192 */ 193 void 194 smr_dispatch(struct schedstate_percpu *spc) 195 { 196 int expedite = 0, wake = 0; 197 198 mtx_enter(&smr_lock); 199 if (smr_ndeferred == 0) 200 wake = 1; 201 SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred); 202 smr_ndeferred += spc->spc_ndeferred; 203 spc->spc_ndeferred = 0; 204 smr_expedite |= spc->spc_smrexpedite; 205 spc->spc_smrexpedite = 0; 206 expedite = smr_expedite; 207 mtx_leave(&smr_lock); 208 209 if (expedite) 210 wakeup(&smr_ndeferred); 211 else if (wake) 212 timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE); 213 } 214 215 /* 216 * Signal that the current CPU is in quiescent state. 217 */ 218 void 219 smr_idle(void) 220 { 221 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 222 unsigned char smrgp; 223 224 SMR_ASSERT_NONCRITICAL(); 225 226 if (spc->spc_ndeferred > 0) 227 smr_dispatch(spc); 228 229 /* 230 * Update this CPU's view of the system's grace period. 231 * The update must become visible after any preceding reads 232 * of SMR-protected data. 233 */ 234 smrgp = READ_ONCE(smr_grace_period); 235 if (__predict_false(spc->spc_smrgp != smrgp)) { 236 membar_exit(); 237 WRITE_ONCE(spc->spc_smrgp, smrgp); 238 } 239 } 240 241 void 242 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg, 243 int expedite) 244 { 245 struct cpu_info *ci = curcpu(); 246 struct schedstate_percpu *spc = &ci->ci_schedstate; 247 int s; 248 249 KASSERT(smr->smr_func == NULL); 250 251 smr->smr_func = func; 252 smr->smr_arg = arg; 253 254 s = splhigh(); 255 SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list); 256 spc->spc_ndeferred++; 257 spc->spc_smrexpedite |= expedite; 258 splx(s); 259 260 /* 261 * If this call was made from an interrupt context that 262 * preempted idle state, dispatch the local queue to the shared 263 * queue immediately. 264 * The entries would linger in the local queue long if the CPU 265 * went to sleep without calling smr_idle(). 266 */ 267 if (smr_cpu_is_idle(ci)) 268 smr_dispatch(spc); 269 } 270 271 void 272 smr_barrier_func(void *arg) 273 { 274 struct cond *c = arg; 275 276 cond_signal(c); 277 } 278 279 void 280 smr_barrier_impl(int expedite) 281 { 282 struct cond c = COND_INITIALIZER(); 283 struct smr_entry smr; 284 285 if (panicstr != NULL || db_active) 286 return; 287 288 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 289 290 smr_init(&smr); 291 smr_call_impl(&smr, smr_barrier_func, &c, expedite); 292 cond_wait(&c, "smrbar"); 293 } 294