1 /* $OpenBSD: kern_smr.c,v 1.15 2021/11/24 13:17:37 visa Exp $ */ 2 3 /* 4 * Copyright (c) 2019-2020 Visa Hankala 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/kthread.h> 23 #include <sys/mutex.h> 24 #include <sys/percpu.h> 25 #include <sys/proc.h> 26 #include <sys/smr.h> 27 #include <sys/time.h> 28 #include <sys/tracepoint.h> 29 #include <sys/witness.h> 30 31 #include <machine/cpu.h> 32 33 #define SMR_PAUSE 100 /* pause between rounds in msec */ 34 35 void smr_dispatch(struct schedstate_percpu *); 36 void smr_grace_wait(void); 37 void smr_thread(void *); 38 void smr_wakeup(void *); 39 40 struct mutex smr_lock = MUTEX_INITIALIZER(IPL_HIGH); 41 struct smr_entry_list smr_deferred; 42 struct timeout smr_wakeup_tmo; 43 unsigned int smr_expedite; 44 unsigned int smr_ndeferred; 45 unsigned char smr_grace_period; 46 47 #ifdef WITNESS 48 static const char smr_lock_name[] = "smr"; 49 struct lock_object smr_lock_obj = { 50 .lo_name = smr_lock_name, 51 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 52 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 53 }; 54 struct lock_type smr_lock_type = { 55 .lt_name = smr_lock_name 56 }; 57 #endif 58 59 static inline int 60 smr_cpu_is_idle(struct cpu_info *ci) 61 { 62 return ci->ci_curproc == ci->ci_schedstate.spc_idleproc; 63 } 64 65 void 66 smr_startup(void) 67 { 68 SIMPLEQ_INIT(&smr_deferred); 69 WITNESS_INIT(&smr_lock_obj, &smr_lock_type); 70 timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL); 71 } 72 73 void 74 smr_startup_thread(void) 75 { 76 if (kthread_create(smr_thread, NULL, NULL, "smr") != 0) 77 panic("could not create smr thread"); 78 } 79 80 struct timeval smr_logintvl = { 300, 0 }; 81 82 void 83 smr_thread(void *arg) 84 { 85 struct timeval elapsed, end, loglast, start; 86 struct smr_entry_list deferred; 87 struct smr_entry *smr; 88 unsigned long count; 89 90 KERNEL_ASSERT_LOCKED(); 91 KERNEL_UNLOCK(); 92 93 memset(&loglast, 0, sizeof(loglast)); 94 SIMPLEQ_INIT(&deferred); 95 96 for (;;) { 97 mtx_enter(&smr_lock); 98 if (smr_ndeferred == 0) { 99 while (smr_ndeferred == 0) 100 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 101 "bored", INFSLP); 102 } else { 103 if (smr_expedite == 0) 104 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 105 "pause", MSEC_TO_NSEC(SMR_PAUSE)); 106 } 107 108 SIMPLEQ_CONCAT(&deferred, &smr_deferred); 109 smr_ndeferred = 0; 110 smr_expedite = 0; 111 mtx_leave(&smr_lock); 112 113 getmicrouptime(&start); 114 115 smr_grace_wait(); 116 117 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 118 WITNESS_LOCK(&smr_lock_obj, 0); 119 120 count = 0; 121 while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) { 122 SIMPLEQ_REMOVE_HEAD(&deferred, smr_list); 123 TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg); 124 smr->smr_func(smr->smr_arg); 125 count++; 126 } 127 128 WITNESS_UNLOCK(&smr_lock_obj, 0); 129 130 getmicrouptime(&end); 131 timersub(&end, &start, &elapsed); 132 if (elapsed.tv_sec >= 2 && 133 ratecheck(&loglast, &smr_logintvl)) { 134 printf("smr: dispatch took %ld.%06lds\n", 135 (long)elapsed.tv_sec, 136 (long)elapsed.tv_usec); 137 } 138 TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count); 139 } 140 } 141 142 /* 143 * Announce next grace period and wait until all CPUs have entered it 144 * by crossing quiescent state. 145 */ 146 void 147 smr_grace_wait(void) 148 { 149 #ifdef MULTIPROCESSOR 150 CPU_INFO_ITERATOR cii; 151 struct cpu_info *ci; 152 unsigned char smrgp; 153 154 smrgp = READ_ONCE(smr_grace_period) + 1; 155 WRITE_ONCE(smr_grace_period, smrgp); 156 157 curcpu()->ci_schedstate.spc_smrgp = smrgp; 158 159 CPU_INFO_FOREACH(cii, ci) { 160 if (!CPU_IS_RUNNING(ci)) 161 continue; 162 if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp) 163 continue; 164 sched_peg_curproc(ci); 165 KASSERT(ci->ci_schedstate.spc_smrgp == smrgp); 166 } 167 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 168 #endif /* MULTIPROCESSOR */ 169 } 170 171 void 172 smr_wakeup(void *arg) 173 { 174 TRACEPOINT(smr, wakeup, NULL); 175 wakeup(&smr_ndeferred); 176 } 177 178 void 179 smr_read_enter(void) 180 { 181 #ifdef DIAGNOSTIC 182 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 183 184 spc->spc_smrdepth++; 185 #endif 186 } 187 188 void 189 smr_read_leave(void) 190 { 191 #ifdef DIAGNOSTIC 192 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 193 194 KASSERT(spc->spc_smrdepth > 0); 195 spc->spc_smrdepth--; 196 #endif 197 } 198 199 /* 200 * Move SMR entries from the local queue to the system-wide queue. 201 */ 202 void 203 smr_dispatch(struct schedstate_percpu *spc) 204 { 205 int expedite = 0, wake = 0; 206 207 mtx_enter(&smr_lock); 208 if (smr_ndeferred == 0) 209 wake = 1; 210 SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred); 211 smr_ndeferred += spc->spc_ndeferred; 212 spc->spc_ndeferred = 0; 213 smr_expedite |= spc->spc_smrexpedite; 214 spc->spc_smrexpedite = 0; 215 expedite = smr_expedite; 216 mtx_leave(&smr_lock); 217 218 if (expedite) 219 smr_wakeup(NULL); 220 else if (wake) 221 timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE); 222 } 223 224 /* 225 * Signal that the current CPU is in quiescent state. 226 */ 227 void 228 smr_idle(void) 229 { 230 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 231 unsigned char smrgp; 232 233 SMR_ASSERT_NONCRITICAL(); 234 235 if (spc->spc_ndeferred > 0) 236 smr_dispatch(spc); 237 238 /* 239 * Update this CPU's view of the system's grace period. 240 * The update must become visible after any preceding reads 241 * of SMR-protected data. 242 */ 243 smrgp = READ_ONCE(smr_grace_period); 244 if (__predict_false(spc->spc_smrgp != smrgp)) { 245 membar_exit(); 246 WRITE_ONCE(spc->spc_smrgp, smrgp); 247 } 248 } 249 250 void 251 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg, 252 int expedite) 253 { 254 struct cpu_info *ci = curcpu(); 255 struct schedstate_percpu *spc = &ci->ci_schedstate; 256 int s; 257 258 KASSERT(smr->smr_func == NULL); 259 260 smr->smr_func = func; 261 smr->smr_arg = arg; 262 263 s = splhigh(); 264 SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list); 265 spc->spc_ndeferred++; 266 spc->spc_smrexpedite |= expedite; 267 splx(s); 268 TRACEPOINT(smr, call, func, arg, expedite); 269 270 /* 271 * If this call was made from an interrupt context that 272 * preempted idle state, dispatch the local queue to the shared 273 * queue immediately. 274 * The entries would linger in the local queue long if the CPU 275 * went to sleep without calling smr_idle(). 276 */ 277 if (smr_cpu_is_idle(ci)) 278 smr_dispatch(spc); 279 } 280 281 void 282 smr_barrier_func(void *arg) 283 { 284 struct cond *c = arg; 285 286 cond_signal(c); 287 } 288 289 void 290 smr_barrier_impl(int expedite) 291 { 292 struct cond c = COND_INITIALIZER(); 293 struct smr_entry smr; 294 295 if (panicstr != NULL || db_active) 296 return; 297 298 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 299 300 TRACEPOINT(smr, barrier_enter, expedite); 301 smr_init(&smr); 302 smr_call_impl(&smr, smr_barrier_func, &c, expedite); 303 cond_wait(&c, "smrbar"); 304 TRACEPOINT(smr, barrier_exit, expedite); 305 } 306