1 /* $OpenBSD: kern_smr.c,v 1.9 2020/12/25 12:49:31 visa Exp $ */ 2 3 /* 4 * Copyright (c) 2019-2020 Visa Hankala 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/kthread.h> 23 #include <sys/mutex.h> 24 #include <sys/percpu.h> 25 #include <sys/proc.h> 26 #include <sys/smr.h> 27 #include <sys/time.h> 28 #include <sys/witness.h> 29 30 #include <machine/cpu.h> 31 32 #define SMR_PAUSE 100 /* pause between rounds in msec */ 33 34 void smr_dispatch(struct schedstate_percpu *); 35 void smr_grace_wait(void); 36 void smr_thread(void *); 37 void smr_wakeup(void *); 38 39 struct mutex smr_lock = MUTEX_INITIALIZER(IPL_HIGH); 40 struct smr_entry_list smr_deferred; 41 struct timeout smr_wakeup_tmo; 42 unsigned int smr_expedite; 43 unsigned int smr_ndeferred; 44 unsigned char smr_grace_period; 45 46 #ifdef WITNESS 47 static const char smr_lock_name[] = "smr"; 48 struct lock_object smr_lock_obj = { 49 .lo_name = smr_lock_name, 50 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 51 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 52 }; 53 struct lock_type smr_lock_type = { 54 .lt_name = smr_lock_name 55 }; 56 #endif 57 58 static inline int 59 smr_cpu_is_idle(struct cpu_info *ci) 60 { 61 return ci->ci_curproc == ci->ci_schedstate.spc_idleproc; 62 } 63 64 void 65 smr_startup(void) 66 { 67 SIMPLEQ_INIT(&smr_deferred); 68 WITNESS_INIT(&smr_lock_obj, &smr_lock_type); 69 timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL); 70 } 71 72 void 73 smr_startup_thread(void) 74 { 75 if (kthread_create(smr_thread, NULL, NULL, "smr") != 0) 76 panic("could not create smr thread"); 77 } 78 79 struct timeval smr_logintvl = { 300, 0 }; 80 81 void 82 smr_thread(void *arg) 83 { 84 struct timeval elapsed, end, loglast, start; 85 struct smr_entry_list deferred; 86 struct smr_entry *smr; 87 88 KERNEL_ASSERT_LOCKED(); 89 KERNEL_UNLOCK(); 90 91 memset(&loglast, 0, sizeof(loglast)); 92 SIMPLEQ_INIT(&deferred); 93 94 for (;;) { 95 mtx_enter(&smr_lock); 96 if (smr_ndeferred == 0) { 97 while (smr_ndeferred == 0) 98 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 99 "bored", INFSLP); 100 } else { 101 if (smr_expedite == 0) 102 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 103 "pause", MSEC_TO_NSEC(SMR_PAUSE)); 104 } 105 106 SIMPLEQ_CONCAT(&deferred, &smr_deferred); 107 smr_ndeferred = 0; 108 smr_expedite = 0; 109 mtx_leave(&smr_lock); 110 111 getmicrouptime(&start); 112 113 smr_grace_wait(); 114 115 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 116 WITNESS_LOCK(&smr_lock_obj, 0); 117 118 while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) { 119 SIMPLEQ_REMOVE_HEAD(&deferred, smr_list); 120 smr->smr_func(smr->smr_arg); 121 } 122 123 WITNESS_UNLOCK(&smr_lock_obj, 0); 124 125 getmicrouptime(&end); 126 timersub(&end, &start, &elapsed); 127 if (elapsed.tv_sec >= 5 && 128 ratecheck(&loglast, &smr_logintvl)) 129 printf("smr: dispatch took %ld seconds\n", 130 (long)elapsed.tv_sec); 131 } 132 } 133 134 /* 135 * Announce next grace period and wait until all CPUs have entered it 136 * by crossing quiescent state. 137 */ 138 void 139 smr_grace_wait(void) 140 { 141 #ifdef MULTIPROCESSOR 142 CPU_INFO_ITERATOR cii; 143 struct cpu_info *ci; 144 unsigned char smrgp; 145 146 smrgp = READ_ONCE(smr_grace_period) + 1; 147 WRITE_ONCE(smr_grace_period, smrgp); 148 149 curcpu()->ci_schedstate.spc_smrgp = smrgp; 150 151 CPU_INFO_FOREACH(cii, ci) { 152 if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp) 153 continue; 154 sched_peg_curproc(ci); 155 KASSERT(ci->ci_schedstate.spc_smrgp == smrgp); 156 } 157 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 158 #endif /* MULTIPROCESSOR */ 159 } 160 161 void 162 smr_wakeup(void *arg) 163 { 164 wakeup(&smr_ndeferred); 165 } 166 167 void 168 smr_read_enter(void) 169 { 170 #ifdef DIAGNOSTIC 171 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 172 173 spc->spc_smrdepth++; 174 #endif 175 } 176 177 void 178 smr_read_leave(void) 179 { 180 #ifdef DIAGNOSTIC 181 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 182 183 KASSERT(spc->spc_smrdepth > 0); 184 spc->spc_smrdepth--; 185 #endif 186 } 187 188 /* 189 * Move SMR entries from the local queue to the system-wide queue. 190 */ 191 void 192 smr_dispatch(struct schedstate_percpu *spc) 193 { 194 int expedite = 0, wake = 0; 195 196 mtx_enter(&smr_lock); 197 if (smr_ndeferred == 0) 198 wake = 1; 199 SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred); 200 smr_ndeferred += spc->spc_ndeferred; 201 spc->spc_ndeferred = 0; 202 smr_expedite |= spc->spc_smrexpedite; 203 spc->spc_smrexpedite = 0; 204 expedite = smr_expedite; 205 mtx_leave(&smr_lock); 206 207 if (expedite) 208 wakeup(&smr_ndeferred); 209 else if (wake) 210 timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE); 211 } 212 213 /* 214 * Signal that the current CPU is in quiescent state. 215 */ 216 void 217 smr_idle(void) 218 { 219 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 220 unsigned char smrgp; 221 222 SMR_ASSERT_NONCRITICAL(); 223 224 if (spc->spc_ndeferred > 0) 225 smr_dispatch(spc); 226 227 /* 228 * Update this CPU's view of the system's grace period. 229 * The update must become visible after any preceding reads 230 * of SMR-protected data. 231 */ 232 smrgp = READ_ONCE(smr_grace_period); 233 if (__predict_false(spc->spc_smrgp != smrgp)) { 234 membar_exit(); 235 WRITE_ONCE(spc->spc_smrgp, smrgp); 236 } 237 } 238 239 void 240 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg, 241 int expedite) 242 { 243 struct cpu_info *ci = curcpu(); 244 struct schedstate_percpu *spc = &ci->ci_schedstate; 245 int s; 246 247 KASSERT(smr->smr_func == NULL); 248 249 smr->smr_func = func; 250 smr->smr_arg = arg; 251 252 s = splhigh(); 253 SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list); 254 spc->spc_ndeferred++; 255 spc->spc_smrexpedite |= expedite; 256 splx(s); 257 258 /* 259 * If this call was made from an interrupt context that 260 * preempted idle state, dispatch the local queue to the shared 261 * queue immediately. 262 * The entries would linger in the local queue long if the CPU 263 * went to sleep without calling smr_idle(). 264 */ 265 if (smr_cpu_is_idle(ci)) 266 smr_dispatch(spc); 267 } 268 269 void 270 smr_barrier_func(void *arg) 271 { 272 struct cond *c = arg; 273 274 cond_signal(c); 275 } 276 277 void 278 smr_barrier_impl(int expedite) 279 { 280 struct cond c = COND_INITIALIZER(); 281 struct smr_entry smr; 282 283 if (panicstr != NULL || db_active) 284 return; 285 286 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 287 288 smr_init(&smr); 289 smr_call_impl(&smr, smr_barrier_func, &c, expedite); 290 cond_wait(&c, "smrbar"); 291 } 292