1 /* $OpenBSD: kern_smr.c,v 1.16 2022/08/14 01:58:27 jsg Exp $ */ 2 3 /* 4 * Copyright (c) 2019-2020 Visa Hankala 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kthread.h> 22 #include <sys/mutex.h> 23 #include <sys/percpu.h> 24 #include <sys/proc.h> 25 #include <sys/smr.h> 26 #include <sys/time.h> 27 #include <sys/tracepoint.h> 28 #include <sys/witness.h> 29 30 #include <machine/cpu.h> 31 32 #define SMR_PAUSE 100 /* pause between rounds in msec */ 33 34 void smr_dispatch(struct schedstate_percpu *); 35 void smr_grace_wait(void); 36 void smr_thread(void *); 37 void smr_wakeup(void *); 38 39 struct mutex smr_lock = MUTEX_INITIALIZER(IPL_HIGH); 40 struct smr_entry_list smr_deferred; 41 struct timeout smr_wakeup_tmo; 42 unsigned int smr_expedite; 43 unsigned int smr_ndeferred; 44 unsigned char smr_grace_period; 45 46 #ifdef WITNESS 47 static const char smr_lock_name[] = "smr"; 48 struct lock_object smr_lock_obj = { 49 .lo_name = smr_lock_name, 50 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 51 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 52 }; 53 struct lock_type smr_lock_type = { 54 .lt_name = smr_lock_name 55 }; 56 #endif 57 58 static inline int 59 smr_cpu_is_idle(struct cpu_info *ci) 60 { 61 return ci->ci_curproc == ci->ci_schedstate.spc_idleproc; 62 } 63 64 void 65 smr_startup(void) 66 { 67 SIMPLEQ_INIT(&smr_deferred); 68 WITNESS_INIT(&smr_lock_obj, &smr_lock_type); 69 timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL); 70 } 71 72 void 73 smr_startup_thread(void) 74 { 75 if (kthread_create(smr_thread, NULL, NULL, "smr") != 0) 76 panic("could not create smr thread"); 77 } 78 79 struct timeval smr_logintvl = { 300, 0 }; 80 81 void 82 smr_thread(void *arg) 83 { 84 struct timeval elapsed, end, loglast, start; 85 struct smr_entry_list deferred; 86 struct smr_entry *smr; 87 unsigned long count; 88 89 KERNEL_ASSERT_LOCKED(); 90 KERNEL_UNLOCK(); 91 92 memset(&loglast, 0, sizeof(loglast)); 93 SIMPLEQ_INIT(&deferred); 94 95 for (;;) { 96 mtx_enter(&smr_lock); 97 if (smr_ndeferred == 0) { 98 while (smr_ndeferred == 0) 99 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 100 "bored", INFSLP); 101 } else { 102 if (smr_expedite == 0) 103 msleep_nsec(&smr_ndeferred, &smr_lock, PVM, 104 "pause", MSEC_TO_NSEC(SMR_PAUSE)); 105 } 106 107 SIMPLEQ_CONCAT(&deferred, &smr_deferred); 108 smr_ndeferred = 0; 109 smr_expedite = 0; 110 mtx_leave(&smr_lock); 111 112 getmicrouptime(&start); 113 114 smr_grace_wait(); 115 116 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 117 WITNESS_LOCK(&smr_lock_obj, 0); 118 119 count = 0; 120 while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) { 121 SIMPLEQ_REMOVE_HEAD(&deferred, smr_list); 122 TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg); 123 smr->smr_func(smr->smr_arg); 124 count++; 125 } 126 127 WITNESS_UNLOCK(&smr_lock_obj, 0); 128 129 getmicrouptime(&end); 130 timersub(&end, &start, &elapsed); 131 if (elapsed.tv_sec >= 2 && 132 ratecheck(&loglast, &smr_logintvl)) { 133 printf("smr: dispatch took %ld.%06lds\n", 134 (long)elapsed.tv_sec, 135 (long)elapsed.tv_usec); 136 } 137 TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count); 138 } 139 } 140 141 /* 142 * Announce next grace period and wait until all CPUs have entered it 143 * by crossing quiescent state. 144 */ 145 void 146 smr_grace_wait(void) 147 { 148 #ifdef MULTIPROCESSOR 149 CPU_INFO_ITERATOR cii; 150 struct cpu_info *ci; 151 unsigned char smrgp; 152 153 smrgp = READ_ONCE(smr_grace_period) + 1; 154 WRITE_ONCE(smr_grace_period, smrgp); 155 156 curcpu()->ci_schedstate.spc_smrgp = smrgp; 157 158 CPU_INFO_FOREACH(cii, ci) { 159 if (!CPU_IS_RUNNING(ci)) 160 continue; 161 if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp) 162 continue; 163 sched_peg_curproc(ci); 164 KASSERT(ci->ci_schedstate.spc_smrgp == smrgp); 165 } 166 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 167 #endif /* MULTIPROCESSOR */ 168 } 169 170 void 171 smr_wakeup(void *arg) 172 { 173 TRACEPOINT(smr, wakeup, NULL); 174 wakeup(&smr_ndeferred); 175 } 176 177 void 178 smr_read_enter(void) 179 { 180 #ifdef DIAGNOSTIC 181 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 182 183 spc->spc_smrdepth++; 184 #endif 185 } 186 187 void 188 smr_read_leave(void) 189 { 190 #ifdef DIAGNOSTIC 191 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 192 193 KASSERT(spc->spc_smrdepth > 0); 194 spc->spc_smrdepth--; 195 #endif 196 } 197 198 /* 199 * Move SMR entries from the local queue to the system-wide queue. 200 */ 201 void 202 smr_dispatch(struct schedstate_percpu *spc) 203 { 204 int expedite = 0, wake = 0; 205 206 mtx_enter(&smr_lock); 207 if (smr_ndeferred == 0) 208 wake = 1; 209 SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred); 210 smr_ndeferred += spc->spc_ndeferred; 211 spc->spc_ndeferred = 0; 212 smr_expedite |= spc->spc_smrexpedite; 213 spc->spc_smrexpedite = 0; 214 expedite = smr_expedite; 215 mtx_leave(&smr_lock); 216 217 if (expedite) 218 smr_wakeup(NULL); 219 else if (wake) 220 timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE); 221 } 222 223 /* 224 * Signal that the current CPU is in quiescent state. 225 */ 226 void 227 smr_idle(void) 228 { 229 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 230 unsigned char smrgp; 231 232 SMR_ASSERT_NONCRITICAL(); 233 234 if (spc->spc_ndeferred > 0) 235 smr_dispatch(spc); 236 237 /* 238 * Update this CPU's view of the system's grace period. 239 * The update must become visible after any preceding reads 240 * of SMR-protected data. 241 */ 242 smrgp = READ_ONCE(smr_grace_period); 243 if (__predict_false(spc->spc_smrgp != smrgp)) { 244 membar_exit(); 245 WRITE_ONCE(spc->spc_smrgp, smrgp); 246 } 247 } 248 249 void 250 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg, 251 int expedite) 252 { 253 struct cpu_info *ci = curcpu(); 254 struct schedstate_percpu *spc = &ci->ci_schedstate; 255 int s; 256 257 KASSERT(smr->smr_func == NULL); 258 259 smr->smr_func = func; 260 smr->smr_arg = arg; 261 262 s = splhigh(); 263 SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list); 264 spc->spc_ndeferred++; 265 spc->spc_smrexpedite |= expedite; 266 splx(s); 267 TRACEPOINT(smr, call, func, arg, expedite); 268 269 /* 270 * If this call was made from an interrupt context that 271 * preempted idle state, dispatch the local queue to the shared 272 * queue immediately. 273 * The entries would linger in the local queue long if the CPU 274 * went to sleep without calling smr_idle(). 275 */ 276 if (smr_cpu_is_idle(ci)) 277 smr_dispatch(spc); 278 } 279 280 void 281 smr_barrier_func(void *arg) 282 { 283 struct cond *c = arg; 284 285 cond_signal(c); 286 } 287 288 void 289 smr_barrier_impl(int expedite) 290 { 291 struct cond c = COND_INITIALIZER(); 292 struct smr_entry smr; 293 294 if (panicstr != NULL || db_active) 295 return; 296 297 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL); 298 299 TRACEPOINT(smr, barrier_enter, expedite); 300 smr_init(&smr); 301 smr_call_impl(&smr, smr_barrier_func, &c, expedite); 302 cond_wait(&c, "smrbar"); 303 TRACEPOINT(smr, barrier_exit, expedite); 304 } 305